我有一个搜索页面,其中包含两种搜索结果类型:摘要结果和具体结果。
要获得摘要页面,我使用请求:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Query(q =>
q.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)
.Size(10)
.Aggregations(aggs => aggs.TopHits("top_tag_hits", t => t.Size(3)))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1));
var elasticResult = _elasticClient.Search<ElasticType>(_ => searchDescriptor);例如,我得到了结果
{
"aggregations": {
"category": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "category1",
"doc_count": 40,
"top_tag_hits": {
"hits": {
"total": 40,
"max_score": 5.4,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 5.4,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 3 // FAIL!
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 4.3,
"_source": {
"id": 2
}
}]
}
}
}]
}
}
}所以我在同一个_score上得到了很少的点击量。
要获得具体结果(按类别划分),请使用以下请求:
var searchDescriptor = new SearchDescriptor<ElasticType>();
searchDescriptor.Index("index_name")
.Size(perPage <= 0 ? 100 : perPage)
.From(page * perPage)
.Query(q => q
.MultiMatch(m => m
.Fields(fs => fs
.Field(f => f.Content1, 3)
.Field(f => f.Content2, 2)
.Field(f => f.Content3, 1)
.Field(f => f.Category))
.Fuzziness(Fuzziness.EditDistance(1))
.Query(searchRequest.Query)
.Boost(1.1)
.Slop(2)
.PrefixLength(1)
.MaxExpansions(100)
.Operator(Operator.Or)
.MinimumShouldMatch(2)
.FuzzyRewrite(RewriteMultiTerm.ConstantScoreBoolean)
.TieBreaker(1.0)
.CutoffFrequency(0.5)
.Lenient()
.ZeroTermsQuery(ZeroTermsQuery.All))
&& q.Term(t => t.Field(f => f.Category).Value(searchRequest.Category))
&& (q.Terms(t => t.Field(f => f.LanguageId).Terms(1)) || q.Terms(t => t.Field(f => f.LanguageId).Terms(0))))
.FielddataFields(fs => fs
.Field(p => p.Content1, 3)
.Field(p => p.Content2, 2)
.Field(p => p.Content3, 1))
.Aggregations(a => a
.Terms("category", tagd => tagd
.Field(f => f.Category)));结果是这样的:
{
"hits": {
"total": 40,
"max_score": 7.816723,
"hits": [{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 7.816723,
"_source": {
"id": 1
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514713,
"_source": {
"id": 2
}
},
{
"_index": "...",
"_type": "...",
"_id": "...",
"_score": 6.514709,
"_source": {
"id": 3
}
}]
}
}所以在第二种情况下,对于一个特定的类别,我得到的_score具有很高的精度和弹性,可以很容易地对结果进行排序。但是在聚合的情况下,有相同的_score的结果,在这种情况下,排序不清楚它是如何工作的。
有人能指点我如何解决这个问题吗?或者如何在结果中实现相同的顺序?也许我能提高聚合结果的准确性?
我使用elasticsearch服务器版本"5.3.0“和NEST库版本"5.0.0”。
更新:聚合请求的本机查询:
{
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"size": 3
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": "sparta",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1"
],
"zero_terms_query": "all"
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}对具体请求的本机查询:
{
"from": 0,
"size": 100,
"fielddata_fields": [
"content1^3",
"content2^2",
"content3^1"
],
"aggs": {
"category": {
"terms": {
"field": "category"
}
}
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1.0,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
},
{
"term": {
"category": {
"value": "category1"
}
}
}
]
}
},
{
"bool": {
"should": [
{
"terms": {
"languageId": [
1
]
}
},
{
"terms": {
"languageId": [
0
]
}
}
]
}
}
]
}
}
}此外,我还使用下一个映射来创建索引:
var descriptor = new CreateIndexDescriptor(indexName)
.Mappings(ms => ms
.Map<ElasticType>(m => m
.Properties(ps => ps
.Keyword(s => s.Name(ecp => ecp.Title))
.Text(s => s.Name(ecp => ecp.Content1))
.Text(s => s.Name(ecp => ecp.Content2))
.Text(s => s.Name(ecp => ecp.Content3))
.Date(s => s.Name(ecp => ecp.Date))
.Number(s => s.Name(ecp => ecp.LanguageId).Type(NumberType.Integer))
.Keyword(s => s.Name(ecp => ecp.Category))
.Text(s => s.Name(ecp => ecp.PreviewImageUrl).Index(false))
.Text(s => s.Name(ecp => ecp.OptionalContent).Index(false))
.Text(s => s.Name(ecp => ecp.Url).Index(false)))));
_elasticClient.CreateIndex(indexName, _ => descriptor);发布于 2017-06-09 08:48:59
您的查询有问题。
must和should在must中的组合,作为bool查询的一部分。
因此,如果您在这个链接中读到更多内容,您可以看到must
子句(查询)必须出现在匹配的文档中,并将有助于得分。因此,它将与所有符合条件的文档等分。任何其他条件,如果不符合条件,甚至不会在那里的结果得分。
您应该使用should查询做什么,但是在must查询之外,所以Elasticsearch将能够正确评分您的文档。
您应该在查询中传递'explain': true。您可以阅读更多有关解释查询以及如何在此链接中解释结果的内容。
由于每个分数都是相同的,因此Elasticsearch可以以任何方式对结果进行排序,从而从其节点获取响应。
可能的解决方案:
您应该重新组织您的查询,以真正利用should查询及其增强功能。您可以阅读更多关于增强这里的内容。
我尝试了两个类似于您的查询,但是正确地使用了should,它们给了我与预期相同的订单。您的两个查询都应如下所示:
{
"from": 0,
"size": 10,
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
}
}和第二个查询
{
"size": 0,
"query": {
"bool": {
"should": [
{
"match": {
"languageId": 1
}
},
{
"match": {
"languageId": 0
}
}
],
"must": [
{
"multi_match": {
"boost": 1.1,
"query": ".....",
"fuzzy_rewrite": "constant_score_boolean",
"fuzziness": 1,
"cutoff_frequency": 0.5,
"prefix_length": 1,
"max_expansions": 100,
"slop": 2,
"lenient": true,
"tie_breaker": 1,
"minimum_should_match": 2,
"operator": "or",
"fields": [
"content1^3",
"content2^2",
"content3^1",
"category"
],
"zero_terms_query": "all"
}
}
]
}
},
"aggs": {
"categories": {
"terms": {
"field": "category",
"size": 10
},
"aggs": {
"produdtcs": {
"top_hits": {
"_source": [
"content1^3",
"content2^2",
"content3^1"
],
"size": 3
}
}
}
}
}
}https://stackoverflow.com/questions/44327996
复制相似问题