[ElasticStack系列]elasticsearch查询api

来源:互联网 发布:网络平台合作方案 编辑:程序博客网 时间:2024/06/05 13:25

1.查询所有index

GEThttp://10.1.1.xx:9200/_cat/indices?v返回:health status index                                  uuid                   pri rep docs.count docs.deleted store.size pri.store.sizeyellow open   .kibana                                SFIaLg2TRV2ZWXcq148rvA   1   1          2            0     10.3kb         10.3kbyellow open   tracelog_all_2017-10-10                3v14EsMfSi2t1rmEy0sG-Q   3   1          5            0     38.2kb         38.2kbyellow open   trace_service_reference_all_2017-10-10 w6bH3lbUTFypW7dwCwIN_Q   5   1          3            0     22.9kb         22.9kbyellow open   applog_all_2017-10-10                  wHCyhfo3SyurY6GMWofc4Q   3   1          5            0       28kb           28kbyellow open   trace_service_info                     XJ0iVnmeQ5C66nCVuBUsRQ   5   1          4            0     21.5kb         21.5kb

2.查询指定记录

POST /_search HTTP/1.1Host: es.gy.mcc.comContent-Type: application/jsonCache-Control: no-cachePostman-Token: 9b13f5c0-1985-68cb-7399-1722b5fcfee9{    "query": {        "bool": {            "must_not": [                {                    "wildcard": {                        "tags": "_jsonparsefailure"                    }                },                {                    "term": {                        "type": "kafka"                    }                }            ],            "must": [                {                    "term": {                        "message": "www"                    }                }            ]        }    }}

term

字段完全匹配
可以用wildcard实现部分匹配 但不属于模糊匹配


match

模糊匹配,输入message: i am child 会得到 i am child的所有结果(前提是message做了分析)

原始数据如下:

{  "took": 1,  "timed_out": false,  "_shards": {    "total": 3,    "successful": 3,    "skipped": 0,    "failed": 0  },  "hits": {    "total": 2,    "max_score": 0.36464313,    "hits": [      {        "_index": "tracelog_all_2017-10-11",        "_type": "all",        "_id": "AV8LiPjE2ztl8R45O9Qc",        "_score": 0.36464313,        "_source": {          "ip": "172.16.90.236",          "serviceName": "cache-server",          "entryServiceName": "client",          "frontServiceName": "cache-server",          "version": "1.1",          "traceId": "56qq-client.172.16.90.236.1507726348.1.1",          "spanId": "5.1.1",          "operationName": "select * from user where id= 1",          "error": "true",          "logs": "[{1507726348439000, {event=cs}}, {1507726348439000, {event=cr}}, {1507726348455000, {event=error, message=error!, error.kind=Exception, stack=java.lang.RuntimeException: error!\r\n\tat (TestTraceRequest.java:257)\r\n\tat $3.run(TestTraceRequest.java:216)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\n}}]",          "duration": 5170,          "elapsed": 1462,          "self": 5170,          "datetime": 1507726348439,          "component": "jdbc",          "db_instance": "user",          "http_status_code": 0        }      },      {        "_index": "tracelog_all_2017-10-11",        "_type": "all",        "_id": "AV8LiPjE2ztl8R45O9Qf",        "_score": 0.36464313,        "_source": {          "ip": "172.16.90.236",          "serviceName": "cache-server",          "entryServiceName": "client",          "frontServiceName": "user-server",          "version": "1.1",          "traceId": "56qq-client.172.16.90.236.1507726348.1.1",          "spanId": "4.1.1",          "operationName": "/api/getUserCache",          "error": "true",          "logs": "[{1507726348439000, {event=sr}}, {1507726348510000, {event=ss}}, {1507726348510000, {event=error, message=error!, error.kind=Exception, stack=java.lang.RuntimeException: error!\r\n\tat$3.run(TestTraceRequest.java:218)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\n}}]",          "duration": 69624,          "elapsed": 0,          "self": 69624,          "datetime": 1507726348439,          "component": "spring",          "http_status_code": 500,          "span_kind": "server"        }      }    ]  }}

查询语句如下:

{    "query": {        "bool": {            "must": [                {                    "term": {"serviceName":"cache-server"}                },                {                    "term": {"error":"true"}                },                {                        "match": {"logs": "event=sr"} //所有匹配上event和sr都会有                }            ]        }    },    "size": 100}

结果: 2条都匹配

{  "took": 2,  "timed_out": false,  "_shards": {    "total": 3,    "successful": 3,    "skipped": 0,    "failed": 0  },  "hits": {    "total": 2,    "max_score": 1.3388524,    "hits": [      {        "_index": "tracelog_all_2017-10-11",        "_type": "all",        "_id": "AV8LiPjE2ztl8R45O9Qf",        "_score": 1.3388524,        "_source": {          "ip": "172.16.90.236",          "serviceName": "cache-server",          "entryServiceName": "56qq-client",          "frontServiceName": "user-server",          "version": "1.1",          "traceId": "56qq-client.172.16.90.236.1507726348.1.1",          "spanId": "4.1.1",          "operationName": "/api/getUserCache",          "error": "true",          "logs": "[{1507726348439000, {event=sr}}, {1507726348510000, {event=ss}}, {1507726348510000, {event=error, message=error!, error.kind=Exception, stack=java.lang.RuntimeException: error!\r\n\tat com.wlqq.mon.galileo.logger.TestTraceRequest$3.run(TestTraceRequest.java:218)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\n}}]",          "duration": 69624,          "elapsed": 0,          "self": 69624,          "datetime": 1507726348439,          "component": "spring",          "http_status_code": 500,          "span_kind": "server"        }      },      {        "_index": "tracelog_all_2017-10-11",        "_type": "all",        "_id": "AV8LiPjE2ztl8R45O9Qc",        "_score": 0.625296,        "_source": {          "ip": "172.16.90.236",          "serviceName": "cache-server",          "entryServiceName": "56qq-client",          "frontServiceName": "cache-server",          "version": "1.1",          "traceId": "56qq-client.172.16.90.236.1507726348.1.1",          "spanId": "5.1.1",          "operationName": "select * from user where id= 1",          "error": "true",          "logs": "[{1507726348439000, {event=cs}}, {1507726348439000, {event=cr}}, {1507726348455000, {event=error, message=error!, error.kind=Exception, stack=java.lang.RuntimeException: error!\r\n\tat com.wlqq.mon.galileo.logger.TestTraceRequest.mysql(TestTraceRequest.java:257)\r\n\tat com.wlqq.mon.galileo.logger.TestTraceRequest$3.run(TestTraceRequest.java:216)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\n}}]",          "duration": 5170,          "elapsed": 1462,          "self": 5170,          "datetime": 1507726348439,          "component": "jdbc",          "db_instance": "user",          "http_status_code": 0        }      }    ]  }}

现在的场景是logs需要匹配多个event=sr,message=error
如果用match肯定不对。所以需要用到match_phrase。这个会分析位置即event=sr需要连在一起的 message=error连在一起的,这样就OK了

match_phrase

{    "query": {        "bool": {            "must": [                {                    "term": {"serviceName":"cache-server"}                },                {                    "term": {"error":"true"}                },                {                        "match_phrase": {"logs": "event=sr"}                },                {                        "match_phrase": {"logs": "event=ss"}                }            ]        }    },    "size": 100}

结果: 只有event=sr且event=ss的才匹配

{  "took": 315,  "timed_out": false,  "_shards": {    "total": 3,    "successful": 3,    "skipped": 0,    "failed": 0  },  "hits": {    "total": 1,    "max_score": 1.2344636,    "hits": [      {        "_index": "tracelog_all_2017-10-11",        "_type": "all",        "_id": "AV8LiPjE2ztl8R45O9Qf",        "_score": 1.2344636,        "_source": {          "ip": "172.16.90.236",          "serviceName": "cache-server",          "entryServiceName": "56qq-client",          "frontServiceName": "user-server",          "version": "1.1",          "traceId": "56qq-client.172.16.90.236.1507726348.1.1",          "spanId": "4.1.1",          "operationName": "/api/getUserCache",          "error": "true",          "logs": "[{1507726348439000, {event=sr}}, {1507726348510000, {event=ss}}, {1507726348510000, {event=error, message=error!, error.kind=Exception, stack=java.lang.RuntimeException: error!\r\n\tat com.wlqq.mon.galileo.logger.TestTraceRequest$3.run(TestTraceRequest.java:218)\r\n\tat java.lang.Thread.run(Thread.java:745)\r\n}}]",          "duration": 69624,          "elapsed": 0,          "self": 69624,          "datetime": 1507726348439,          "component": "spring",          "http_status_code": 500,          "span_kind": "server"        }      }    ]  }}

freemark查询es的模板

1.term和match一起使用
2.wildcard对应的没有分词的字段的部分匹配 (用match_phrase)
3.match和match_phrase对应的是有分词的字段的模糊匹配
模糊匹配和部分匹配是有区别的!主要在于模糊匹配两个分词可以不是连在一起的
4.用filter而不是query(无需打分,优化查询)

{    "query": {        "bool": {            "must": {                "match_all": {}            },            "filter": {                "bool": {                    "must": [                        {                            "term": {"serviceName":"${serviceName}"}                        },                        <#if traceId??>                        {                            "term": {"traceId":"${traceId}"}                        },                        </#if>                        <#if operationName??>                        {                            "match_phrase": {"operationName":"${operationName}"}                        },                        </#if>                        <#if error??>                        {                            "term": {"error":"${error}"}                        },                        </#if>                        <#if logsFilterList??>                            <#list logsFilterList as logsFilter>                            {                                "match_phrase": {"logs": "${logsFilter}"}                            },                            </#list>                        </#if>                        <#if duration??>                        {                            "range": {                                "duration": {                                    "gte": ${duration?c}                                }                            }                        },                        </#if>                        {                            "range": {                                "datetime": {                                    "gte": ${startTimestamp?c},                                    "lte": ${endTimestamp?c}                                }                            }                        }                    ]                }            }        }    },    "sort": {        "${sortKey}": {            "order": "${sortOrder}"        }    },    "size": ${size}}

FreemarkUtil.java

package com.wlqq.mon.galileo.api.util;import freemarker.template.Configuration;import freemarker.template.Template;import freemarker.template.TemplateException;import org.slf4j.Logger;import org.slf4j.LoggerFactory;import java.io.File;import java.io.IOException;import java.io.StringWriter;/** * 模板生成json * @author Mingchenchen * */public class FreemarkUtil {    private static Logger logger = LoggerFactory.getLogger(FreemarkUtil.class);    private static String traceTemplateFileName = "ESTraceQueryTemplate.ftl";    private static Template traceQueryTemplate = null;    /**     * 初始化配置 注意此处设置的是根路径,即模板文件需要放在根路径     */    static{        traceTemplateInit();    }    private static void traceTemplateInit() {        Configuration cfg = new Configuration();        File templateFilePath = null;        try {            String traceBasePath = Thread.currentThread().getContextClassLoader().getResource("trace").getPath();            templateFilePath = new File(traceBasePath);            cfg.setDirectoryForTemplateLoading(templateFilePath);            traceQueryTemplate = cfg.getTemplate(traceTemplateFileName);        } catch (IOException e) {            logger.error("freemark init error!", e);        }    }    public static String getTraceQueryBody(TraceQueryFilter filter){        StringWriter writer = new StringWriter();        try {            traceQueryTemplate.process(filter, writer); //把map处理后的结果存入Writer对象            return writer.toString();        } catch (IOException | TemplateException e) {            logger.error("freemark process error!", e);            throw new IllegalArgumentException("param missing, " + e.getMessage());        }finally {            try {                writer.close();            } catch (IOException e) {            }        }    }}

优化点

match_phrase和wildcard区别
同事说wildcard正则匹配性能差一些?

如果不需要打分 则用filter而不是query
http://blog.csdn.net/oryjk/article/details/50750850


参考:
http://www.cnblogs.com/huanxiyun/articles/5890897.html
http://www.cnblogs.com/yjf512/p/4897294.html


最新更新

1.一般情况下不用打分的则用filter而不是query
filtered已经弃用了,直接用filter即可

Filtered query:

The filtered query is replaced by the bool query. Instead of the following:## INCORRECT - DEPRECATED SYNTAX, DO NOT USEGET _search{  "query": {    "filtered": {      "query": {        "match": {          "text": "quick brown fox"        }      },      "filter": {        "term": {          "status": "published"        }      }    }  }}

move the query and filter to the must and filter parameters in the bool query:

GET _search{  "query": {    "bool": {      "must": {        "match": {          "text": "quick brown fox"        }      },      "filter": {        "term": {          "status": "published"        }      }    }  }}

现在最新5.6的查询bool query用法如下:

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-bool-query.html

POST _search{  "query": {    "bool" : {      "must" : {        "term" : { "user" : "kimchy" }      },      "filter": {        "term" : { "tag" : "tech" }      },      "must_not" : {        "range" : {          "age" : { "gte" : 10, "lte" : 20 }        }      },      "should" : [        { "term" : { "tag" : "wow" } },        { "term" : { "tag" : "elasticsearch" } }      ],      "minimum_should_match" : 1,      "boost" : 1.0    }  }}