我有一个类似下面的原始聚合脚本,但很难将其转换为elasticsearch dsl。
我读了文档,发现描述中说我们可以使用.bucket(),.metric()和.pipeline()方法来嵌套聚合,但缺乏进一步说明如何将这三种方法用于更复杂的聚合,如更多层的信息。
{
"aggs": {
"statistics": {
"terms": {
"field":"id"
},
"aggs":{
"date":{
"date_histogram":{
"min_doc_count":0,
"field":"date",
"interval":"1d",
"format":"yyyy-MM-dd"
},
"aggs":{
"column_a":{
"avg":{
"field":"column_a"
}
},
"column_b":{
"avg":{
"field":"column_b"
}
},
"column_c":{
"avg":{
"field":"column_c"
}
},
"a_gap":{
"serial_diff":{
"buckets_path":"column_a"
}
},
"b_gap":{
"serial_diff":{
"buckets_path":"column_b"
}
},
"c_gap":{
"serial_diff":{
"buckets_path":"column_c"
}
}
}
},
"sum_a_gap":{
"sum_bucket":{
"buckets_path":"date>a_gap"
}
},
"sum_b_gap":{
"sum_bucket":{
"buckets_path":"date>b_gap"
}
},
"sum_c_gap":{
"sum_bucket":{
"buckets_path":"date>c_gap"
}
}
}
}
}
}我的Elasticsearch-dsl查询像这样使'sum_a_gap‘与'column_a’和'a_gap‘具有相同的级别。
self._search.aggs
.bucket('statistics', 'terms', field='id')
.bucket('date', 'date_histogram', field='date',
interval='1d', min_doc_count=0, format='yyyy-MM-dd')
.metric('column_a', 'avg', field='column_a')
.metric('column_b', 'avg', field='column_b')
.metric('column_c', 'avg', field='column_c')
.pipeline('a_gap', 'serial_diff', buckets_path='column_a')
.pipeline('b_gap', 'serial_diff', buckets_path='column_b')
.pipeline('c_gap', 'serial_diff', buckets_path='column_c')
.pipeline('sum_a_gap', 'sum_bucket', buckets_path='date>a_gap')
.pipeline('sum_b_gap', 'sum_bucket', buckets_path='date>b_gap')
.pipeline('sum_c_gap', 'sum_bucket', buckets_path='date>c_gap')首先要感谢大家!
发布于 2019-12-03 10:02:21
最终,我想通了。我稍微改变了一下顺序,结果和预期的一样。这将在同一层聚合“id”、“date”和“sum_{}_gap”,并在“date”下聚合其他度量和管道。
self._search.aggs
.bucket('statistics', 'terms', field='id')
.pipeline('sum_a_gap', 'sum_bucket', buckets_path='date>a_gap')
.pipeline('sum_b_gap', 'sum_bucket', buckets_path='date>b_gap')
.pipeline('sum_c_gap', 'sum_bucket', buckets_path='date>c_gap')
.bucket('date', 'date_histogram', field='date',
interval='1d', min_doc_count=0, format='yyyy-MM-dd')
.metric('column_a', 'avg', field='column_a')
.metric('column_b', 'avg', field='column_b')
.metric('column_c', 'avg', field='column_c')
.pipeline('a_gap', 'serial_diff', buckets_path='column_a')
.pipeline('b_gap', 'serial_diff', buckets_path='column_b')
.pipeline('c_gap', 'serial_diff', buckets_path='column_c')https://stackoverflow.com/questions/59134250
复制相似问题