跳到主要内容

ES聚合查询

聚合分类

  • 分桶聚合(bucket aggregations)
  • 指标聚合(metric aggregations)
  • 管道聚合(pipeline aggregations)

基本语法:

GET <index_name>/_search
{
"aggs": {
"<aggs_name>": {
"<aggs_type>": {
"field": "<field_name>"
}
}
}
}

分桶聚合

示例:

// 统计不同标签的商品数量
GET product/_search
{
"size": 0, // 不显示hits
"aggs": {
"aggs_tag": {
"terms": {
"field": "tags.keyword",
"size": 10, // 桶的数量,
"order": {
"_count": "desc"
}
}
}
}
}

指标聚合

示例:

// 最贵、最便宜和平均价格
GET product/_search
{
"size": 0,
"aggs": {
"max_price": {
"max": {
"field": "price"
}
},
"min_price": {
"min": {
"field": "price"
}
},
"avg_price": {
"avg": {
"field": "price"
}
}
}
}

stats 示例:

// 显示统计数据
GET product/_search
{
"size": 0,
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
}
}
}

"aggregations": {
"price_stats": {
"count": 4,
"min": 3999,
"max": 7999,
"avg": 5749,
"sum": 22996
}

cardinality :去重数量

// 按 name 去重的数量
GET product/_search
{
"size": 0,
"aggs": {
"name_count": {
"cardinality": {
"field": "name.keyword"
}
}
}
}

管道聚合

示例:

// 统计平均价格最低的商品分类
GET product/_search
{
"size": 0,
"aggs": {
"type_bucket": {
"terms": {
"field": "type.keyword"
},
"aggs": {
"price_bucket": {
"avg": {
"field": "price"
}
}
}
},
"min_bucket": {
"min_bucket": {
"buckets_path": "type_bucket>price_bucket"
}
}
}
}

嵌套聚合

示例:

// 统计不同类型商品不同档次的价格信息与标签信息
GET product/_search
{
"size": 0,
"aggs": {
"type_agg": {
"terms": {
"field": "type.keyword"
},
"aggs": {
"lv_agg": {
"terms": {
"field": "lv.keyword"
},
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
},
"tags_agg": {
"terms": {
"field": "tags.keyword"
}
}
}
}
}
}
}
}

// 统计每个商品类型中,不同档次商品中平均价格最低的档次
GET product/_search
{
"size": 0,
"aggs": {
"type_agg": {
"terms": {
"field": "type.keyword"
},
"aggs": {
"lv_agg": {
"terms": {
"field": "lv.keyword"
},
"aggs": {
"price_avg": {
"avg": {
"field": "price"
}
}
}
},
"min_price_bucket": {
"min_bucket": {
"buckets_path": "lv_agg>price_avg"
}
}
}
}
}
}

基于查询结果的聚合

示例:

// 价格5000以上商品的标签
GET product/_search
{
"size": 0,
"query": {
"range": {
"price": {
"gte": 5000
}
}
},
"aggs": {
"tags_bucket": {
"terms": {
"field": "tags.keyword"
}
}
}
}

基于聚合结果的查询

GET product/_search
{
"aggs": {
"tags_bucket": {
"terms": {
"field": "tags.keyword"
}
}
},
"post_filter": {
"term": {
"tags.keyword": "小米"
}
}
}
GET product/_search
{
"size": 0,
"query": {
"range": {
"price": {
"gte": 5000
}
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
},
"all_avg_price": {
"global": {}, // 取消查询条件
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
},
"multi_avg_price": {
"filter": { // 与上面的条件取交集
"range": {
"price": {
"lte": 6000
}
}
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}

聚合排序

示例:

// 多级聚合
GET product/_search?size=0
{
"aggs": {
"first_sort": {
"terms": {
"field": "type.keyword",
"order": {
"_count": "desc" // 按数量排序
}
},
"aggs": {
"second_sort": {
"terms": {
"field": "lv.keyword",
"order": {
"_key": "asc" // 按值进行排序
}
}
}
}
}
}
}

// 按子聚合结果排序
GET product/_search?size=0
{
"query": {
"terms": {
"type.keyword": ["手机", "耳机"]
}
},
"aggs": {
"type_avg_price": {
"terms": {
"field": "type.keyword",
"order": {
"price_stats.min": "asc"
}
},
"aggs": {
"price_stats": {
"stats": {
"field": "price"
}
}
}
}
}
}

_key :按值排序,字符串按字典序、数字按大小、日期按时间戳等,字符串区分大小写。

若字段是数值类型但被映射为 keyword_key 会按字符串排序。

如果是多层聚合,使用> 连接子聚合层级。

常用的聚合函数

histogram

示例:

GET product/_search?size=0
{
"aggs": {
"price_hist": {
"histogram": {
"field": "price",
"interval": 1000
}
}
}
}

GET product/_search?size=0
{
"aggs": {
"date_hist": {
"date_histogram": {
"field": "create_time",
"calendar_interval": "1M",
"format": "yyyy-MM",
"extended_bounds": {
"min": "2024-01",
"max": "2025-03"
}
}
}
}
}

GET product/_search?size=0
{
"aggs": {
"date_hist": {
"auto_date_histogram": {
"field": "create_time",
"format": "yyyy-MM",
"buckets": 12 // 根据buckets自动确定interval
}
}
}
}

参数:

  • field :字段
  • interval :间隔
  • min_doc_count :数量少于此值就不返回该桶
  • missing :空值的默认值
  • calendar_interval :基于日历时间单位,受日历规则影响。支持1M (月)、1q (季度)、1y (一年)等日历单位
  • fixed_interval :固定时长,与日历无关,支持mssm(分钟)、hdw (周)
  • format :日期格式
  • extended_bounds :强制包含范围外的空桶(需结合min_doc_count=0

cumulative_sum

GET product/_search?size=0
{
"aggs": {
"date_hist": {
"date_histogram": {
"field": "create_time",
"calendar_interval": "1M",
"format": "yyyy-MM"
},
"aggs": {
"sum_agg": {
"sum": {
"field": "price"
}
},
"my_cumulative_sum": {
"cumulative_sum": {
"buckets_path": "sum_agg"
}
}
}
}
}
}

percentiles

百分位数聚合,计算数值字段的多个百分位值,反映数据分布。

GET product/_search?size=0
{
"aggs": {
"price_percentiles": {
"percentiles": {
"field": "price",
"percents": [ // 百分位点,如p95、p99等
1,
5,
25,
50,
75,
95,
99
]
}
}
}
}

percentile_ranks

百分位排名聚合,计算指定数值在数据集中的百分位排名,返回值为近似值(依赖tdigest 算法)。

GET product/_search?size=0
{
"aggs": {
"price_percentiles": {
"percentile_ranks": {
"field": "price",
"values": [
3000,
5000,
7000,
9000
]
}
}
}
}