Elasticsearch脚本教程——计算日期生成分组
本文我们一起通过实例实战Elasticsearch的Painless脚本。
1. 概述
本文环境为Elasticsearch 7.x版本,主要使用Painless脚本。
需求说明:我们有日志数据,字段分别为name、start_date、close_date.现在需要计算两个日期之差并对结果进行分组。
日期差以分钟为单位:
* 0~30 分钟.
* 31~60 分钟.
* 大于1 小时.
日期差以小时为单位:
* 0~2 小时.
* 2~24 小时.
* 大于1 天.
*
日期差以天为单位:
* 0~3 天.
* 3~7 天.
* 大于1 周.
2. 准备数据
首先我们创建索引:
PUT logs
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"start_date": {
"type": "date",
"format": "date_optional_time"
},
"close_date": {
"type": "date",
"format": "date_optional_time"
}
}
}
}
接着增加数据:
PUT logs/_bulk?refresh
{"index":{"_id":1}}
{"name":"Person AA","start_date":"2016-01-01T14:00:56","close_date":"2016-01-10T14:00:50"}
{"index":{"_id":2}}
{"name":"Person CC","start_date":"2016-01-09T15:01:52","close_date":"2016-01-19T15:45:41"}
{"index":{"_id":3}}
{"name":"Person BB","start_date":"2016-01-09T15:01:53","close_date":"2016-01-20T16:50:41"}
{"index":{"_id":4}}
{"name":"Person BB","start_date":"2016-01-09T15:01:53","close_date":"2016-01-19T16:00:41"}
{"index":{"_id":5}}
{"name":"Person CC","start_date":"2016-01-09T15:01:53","close_date":"2016-01-19T15:30:41"}
{"index":{"_id":6}}
{"name":"Person EE","start_date":"2016-01-09T15:01:54","close_date":"2016-01-09T17:00:41"}
{"index":{"_id":7}}
{"name":"Person EE","start_date":"2016-01-09T15:01:55","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":8}}
{"name":"Person CC","start_date":"2016-01-09T15:01:55","close_date":"2016-01-10T17:00:41"}
{"index":{"_id":9}}
{"name":"Person AA","start_date":"2016-01-09T15:01:56","close_date":"2016-01-19T15:30:41"}
{"index":{"_id":10}}
{"name":"Person BB","start_date":"2016-01-09T15:01:56","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":11}}
{"name":"Person BB","start_date":"2016-01-09T15:01:56","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":12}}
{"name":"Person BB","start_date":"2016-01-09T15:01:57","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":13}}
{"name":"Person AA","start_date":"2016-01-09T15:01:57","close_date":"2016-01-10T14:00:50"}
{"index":{"_id":14}}
{"name":"Person EE","start_date":"2016-01-09T15:01:58","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":15}}
{"name":"Person CC","start_date":"2016-01-09T15:01:58","close_date":"2016-01-10T14:00:50"}
{"index":{"_id":16}}
{"name":"Person BB","start_date":"2016-01-09T15:01:59","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":17}}
{"name":"Person AA","start_date":"2016-01-09T15:01:59","close_date":"2016-01-09T15:45:41"}
{"index":{"_id":18}}
{"name":"Person EE","start_date":"2016-01-09T15:02:00","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":19}}
{"name":"Person BB","start_date":"2016-01-09T15:02:00","close_date":"2016-01-10T14:00:50"}
{"index":{"_id":20}}
{"name":"Person CC","start_date":"2016-01-09T15:02:00","close_date":"2016-01-20T15:00:41"}
{"index":{"_id":21}}
{"name":"Person AA","start_date":"2016-01-09T15:02:01","close_date":"2016-01-19T15:30:41"}
{"index":{"_id":22}}
{"name":"Person BB","start_date":"2016-01-09T15:02:01","close_date":"2016-01-10T14:00:50"}
{"index":{"_id":23}}
{"name":"Person CC","start_date":"2016-01-09T15:02:02","close_date":"2016-02-01T15:00:41"}
{"index":{"_id":24}}
{"name":"Person AA","start_date":"2016-01-09T15:02:02","close_date":"2016-01-19T15:00:41"}
{"index":{"_id":25}}
{"name":"Person BB","start_date":"2016-01-09T15:02:03","close_date":"2016-01-09T20:00:41"}
准备好后,我们可以使用下列命令进行验证:
GET logs/_search
3. 生成范围分组
首先根据分钟单位进行分组:
GET logs/_search
{
"size": 0,
"aggs": {
"groupby": {
"range": {
"script": {
"source": "(doc['close_date'].value.toInstant().toEpochMilli() - doc['start_date'].value.toInstant().toEpochMilli()) / (3600000.0/60) "
},
"ranges": [
{
"from": 0.0,
"to": 30.0,
"key": "0~30分钟"
},
{
"from": 30.0,
"to": 60.0,
"key": "30~60分钟"
},
{
"from": 60.0,
"key": "大于1小时"
}
]
}
}
}
}
以更可读方式看脚本:
( doc['close_date'].value.toInstant().toEpochMilli() - doc['start_date'].value.toInstant().toEpochMilli() ) / (3600000.0/60)
doc[‘close_date’].value返回JodaCompatibleZonedDateTime类型,然后调用toInstant().toEpochMilli()方法获得毫秒数,为了转为分钟,需要除以(3600000.0/60)
。
后台对每个文档按照这种方式进行计算获取分钟数,接着对分钟数进行范围分组。最后需要说明的是范围分组包括from的值,不包括to的值,定义范围时需要考虑这点。
总结下思路:
- 计算日期差
- 根据需要格式化结果
- 对结果进行分组
接下来,按照小时进行分组:
GET logs/_search
{
"size": 0,
"aggs": {
"groupbyHours": {
"range": {
"script": {
"source": "(doc['close_date'].value.toInstant().toEpochMilli() - doc['start_date'].value.toInstant().toEpochMilli()) / (3600000.0) "
},
"ranges": [
{
"from": 0,
"to": 2,
"key": "0~2小时"
},
{
"from": 2.0,
"to": 24.0,
"key": "2~24小时"
},
{
"from": 24.0,
"key": "大于1天"
}
]
}
}
}
}
最后按照天为单位进行分组:
GET logs/_search
{
"size": 0,
"aggs": {
"groupbyHours": {
"range": {
"script": {
"source": "(doc['close_date'].value.toInstant().toEpochMilli() - doc['start_date'].value.toInstant().toEpochMilli()) / (3600000.0*24) "
},
"ranges": [
{
"from": 0,
"to": 3,
"key": "0~3天"
},
{
"from": 3.0,
"to": 7.0,
"key": "3~7天"
},
{
"from": 7.0,
"key": "大于1周"
}
]
}
}
}
}
我们也可以通过脚本实现条件查询。下面示例查询0~3天的数据:
GET logs/_search
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"lang": "painless",
"source": " ((doc['close_date'].value.toInstant().toEpochMilli() - doc['start_date'].value.toInstant().toEpochMilli()) / (3600000.0*24)) < 3.0 "
}
}
}
}
}
}
这个示例与前面示例有两个差异:
- 这不是聚集查询,仅仅查询原始数据
- Painless脚本调整为返回boolean类型的表达式,即日期差小于3天。
4. 总结
本文通过示例介绍了Painless脚本实现复杂分组和查询。
本文参考链接:https://blog.csdn.net/neweastsun/article/details/105028355