前言
之前其实写过ES查询数据,进行分组聚合统计:
复杂聚合分组统计实现
一、目标场景
- 机房机柜的物联网设备上传环境数据,会存储到ES
- 存到ES的温湿度数据需要查询,进行分组后,再聚合统计求平均值
二、使用步骤
1.引入库
我这里因为ES服务已经升级到8.0.0了,然后ES数据查询分组,我这里需要对时间进行格式化,再聚合avg,所以客户端相关版本用的7.17.4
<dependency><groupId>org.elasticsearch.client</groupId><artifactId>elasticsearch-rest-client</artifactId><version>7.17.4</version><exclusions><exclusion><groupId>org.elasticsearch</groupId><artifactId>elasticsearch</artifactId></exclusion></exclusions>
</dependency>
<dependency><groupId>org.elasticsearch.client</groupId><artifactId>elasticsearch-rest-high-level-client</artifactId><version>7.17.4</version><exclusions><exclusion><groupId>org.elasticsearch</groupId><artifactId>elasticsearch</artifactId></exclusion></exclusions>
</dependency><dependency><groupId>org.elasticsearch</groupId><artifactId>elasticsearch</artifactId><version>7.17.4</version>
</dependency>
2.配置类
目前我们就是单服务的,这个配置类够用了。其实我配置类就是要把RestHighLevelClient注入,并交给spring管理。
/*** ES配置类* @author zwmac*/
@Configuration
@Data
public class ElasticSearchConfig {@Value("${es.host}")private String host;@Value("${es.port}")private int port;@Value("${es.username}")private String loginName;@Value("${es.password}")private String password;private RestHighLevelClient client;@Beanpublic RestHighLevelClient client() {final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();credentialsProvider.setCredentials(AuthScope.ANY,new UsernamePasswordCredentials(loginName, password));HttpHost[] httpHostArray = new HttpHost[1];httpHostArray[0] = new HttpHost(host, port);RestClientBuilder restClientBuilder = RestClient.builder(httpHostArray).setHttpClientConfigCallback(httpClientBuilder -> {httpClientBuilder.disableAuthCaching();return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);});restClientBuilder.setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.setConnectTimeout(60000).setSocketTimeout(150000));client = new RestHighLevelClient(restClientBuilder);return client;}
}
3.使用
@Resourceprivate RestHighLevelClient restHighLevelClient;/*** 查询温湿度24小时平均值* @param deviceCode 设备编码* @param startTime 开始时间* @param endTime 结束时间* @param humName 湿度字段名* @param tempName 温度字段名* @return 温湿度24小时平均值*/private TreeMap<String, Map<String, Double>> queryTempHumDayAvg(String deviceCode, Date startTime, Date endTime, String humName, String tempName) {TreeMap<String, Map<String, Double>> treeMap = new TreeMap<>();//ES查询String index = EsCalendar.getDeviceFlowIndex(startTime, endTime);SearchRequest searchRequest = new SearchRequest(index);SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();//忽略不可用索引,允许索引不不存在,通配符表达式将扩展为打开的索引searchRequest.indicesOptions(IndicesOptions.fromOptions(true, true, true, false));String timeFmt = "yyyy-MM-dd";// 组装ES请求数据String startTimeStr = DateUtil.format(startTime, DatePattern.NORM_DATETIME_PATTERN);String endTimeStr = DateUtil.format(endTime, DatePattern.NORM_DATETIME_PATTERN);QueryBuilder rangeQuery = QueryBuilders.rangeQuery("createTime").lte(endTimeStr).gte(startTimeStr);BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();// 必须为deviceCodeboolQueryBuilder.must(QueryBuilders.termQuery("deviceCode", deviceCode));rangeQuery = QueryBuilders.boolQuery().must(rangeQuery).must(boolQueryBuilder);QueryBuilder boolQuery = QueryBuilders.boolQuery().must(rangeQuery);searchSourceBuilder.query(boolQuery).size(0);//平均值 温度//String tempName = "temp_avg";String tempAvgName = tempName + "_avg";String tempFactorName = "data." + tempName;AvgAggregationBuilder tempAvgAggregationBuilder = AggregationBuilders.avg(tempAvgName).field(tempFactorName);//平均值 湿度//String humName = "hygrometer_avg";String humAvgName = humName + "_avg";String humFactorName = "data." + humName;AvgAggregationBuilder humAvgAggregationBuilder = AggregationBuilders.avg(humAvgName).field(humFactorName);String createTimeGroup = "createTimeGroup";DateHistogramAggregationBuilder aggregation = AggregationBuilders.dateHistogram(createTimeGroup).field("createTime").fixedInterval(DateHistogramInterval.DAY).format(timeFmt)//过滤掉count为0的数据.minDocCount(1).subAggregation(tempAvgAggregationBuilder).subAggregation(humAvgAggregationBuilder);//分组条件searchSourceBuilder.aggregation(aggregation);searchRequest.source(searchSourceBuilder);// 按照因子列表查询searchRequest.source(searchSourceBuilder);SearchResponse searchResponse = null;Map<String, Map<String, Double>> mp = new HashMap<>();try {log.info("方法getCabinetTempHum24HourAvg查询ES请求数据:" + searchRequest);searchResponse = restHighLevelClient.search(searchRequest, RequestOptions.DEFAULT);log.info("方法getCabinetTempHum24HourAvg查询ES响应数据:" + searchResponse.toString());Aggregations aggregations = searchResponse.getAggregations();if (aggregations != null) {//组织出参数aggregations.forEach(agg -> {ParsedDateHistogram parsedDateHistogram = (ParsedDateHistogram) agg;List buckets = parsedDateHistogram.getBuckets();if (CollectionUtil.isNotEmpty(buckets)) {buckets.forEach(bucket -> {ParsedDateHistogram.ParsedBucket timeGroupTerm = (ParsedDateHistogram.ParsedBucket) bucket;String timeStr = timeGroupTerm.getKeyAsString();Aggregations subAggregations = timeGroupTerm.getAggregations();if (subAggregations != null) {Map<String, Double> tempHumMap = new HashMap<>();Map<String, Aggregation> subAggMap = subAggregations.asMap();if (subAggMap != null) {Aggregation tempAgg = subAggMap.get(tempAvgName);if (tempAgg != null) {ParsedAvg tempAggPdh = (ParsedAvg) tempAgg;tempHumMap.put(tempName, tempAggPdh.getValue());}Aggregation humAgg = subAggMap.get(humAvgName);if (humAgg != null) {ParsedAvg humAggPdh = (ParsedAvg) humAgg;tempHumMap.put(humName, humAggPdh.getValue());}}mp.put(timeStr, tempHumMap);}});}});}//数据补全List<DateTime> dateTimeList = DateUtil.rangeToList(startTime, DateUtil.offsetHour(endTime, -1), DateField.HOUR_OF_DAY);if (CollectionUtil.isNotEmpty(dateTimeList)) {String finTempName = "temp_avg";String finHumName = "hum_avg";dateTimeList.forEach(dateTime -> {String timeStr = DateUtil.format(dateTime, timeFmt);Map<String, Double> finTempHumMap = new HashMap<>();Map<String, Double> tempHumMap = mp.get(timeStr);if (tempHumMap == null) {finTempHumMap.put(finTempName, 0.0);finTempHumMap.put(finHumName, 0.0);} else {Double tempAvg = tempHumMap.get(tempName);Double humAvg = tempHumMap.get(humName);finTempHumMap.put(finTempName, tempAvg);finTempHumMap.put(finHumName, humAvg);}treeMap.put(timeStr, finTempHumMap);});}} catch (Exception e) {log.error("方法countByEs查询ES异常", e);}return treeMap;}
关键点注意:
-
QueryBuilders.rangeQuery传入的时间精度,需要yyyy-MM-dd HH:mm:ss,否则会报错
-
这里对时间格式化分组,使用的是DateHistogramAggregationBuilder
这个在EsApi7+就废弃了calendarInterval,替换新的fixedInterval -
分组再聚合,注意嵌套关系,各位自己理解下subAggregation
-
最后数据查询出来后,迭代解析,注意理解ParsedDateHistogram取值、parsedDateHistogram.getBuckets()、迭代解析
总结
- gs一直用老版本的ES6,这次终于被逼的更新了吧,真好。(之前一直建议、希望,都。。。。)
- 本来很想引入EasyEs用用,但是总有同事不认可,算了
- 之前也建议给ES装上sql-package插件,让DBeaver可以连接,试过一阵子,新版本又没装,算了
- 其他就没啥好说的了,唯一就是restHighLevelClient现在在7+也被标记为过时了,下次有机会,这个再改改。
- 希望能帮到大家,uping!