Elasticsearch 的一些示例, 增删改查，映射，结构化查询，聚合

来源：互联网发布：python 高斯曲线拟合编辑：程序博客网时间：2024/06/03 12:52

es版本 5.1.1

基本增删改操作

查询删除

POST /user_analysys/user_event/_delete_by_query{  "query": {     "match_all": {}  }}

PUT /megacorp/employee/1{    "first_name" : "John",    "last_name" :  "Smith",    "age" :        25,    "about" :      "I love to go rock climbing",    "interests": [ "sports", "music" ]}PUT /megacorp/employee/2{    "first_name" :  "Jane",    "last_name" :   "Smith",    "age" :         32,    "about" :       "I like to collect rock albums",    "interests":  [ "music" ]}PUT /megacorp/employee/3{    "first_name" :  "Douglas",    "last_name" :   "Fir",    "age" :         35,    "about":        "I like to build cabinets",    "interests":  [ "forestry" ]}GET /megacorp/employee/1GET /megacorp/employee/_searchGET /megacorp/employee/1/_sourceGET /megacorp/employee/_search?q=first_name:Jane#局部更新POST /megacorp/employee/1/_update{  "doc": {    "about": "I love to go rock climbing xxx"  } }#更新不存在则插入POST /megacorp/employee/4/_update{  "doc": {    "about": "I love to go rock climbing xxx"  },  "upsert": {    "first_name" : "xiang",    "last_name" :  "kevin",    "age" :        25,    "about" :      "I love to go rock climbing zzz",    "interests": [ "sports", "music" ]  }}POST /megacorp/employee/_search{    "query": {        "match": {           "first_name": "John"        }    }}POST /megacorp/employee/_search{    "query": {        "match": {           "about": "rock climbing"        }    }}#match_phrase 确切的匹配若干个单词或者短语POST /megacorp/employee/_search{    "query": {        "match_phrase": {           "about": "rock climbing"        }        },    "highlight": {        "fields": {            "about":{}        }    }}POST /megacorp/employee/_search  {      "query": {        "bool": {        "must": {          "match": {             "last_name": "Smith"            }        },        "filter": {          "range": {            "age": {              "gte": 10,              "lte": 30            }          }        }              }    }}  #https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.htmlGET /megacorp/employee/_mappingPUT /megacorp/_mapping/employee{  "properties": {    "interests": {       "type":     "text",      "fielddata": true    }  }}  #聚合,聚合也允许分级汇总。例如，让我们统计每种兴趣下职员的平均年龄  POST /megacorp/employee/_search  {         "query": {          "match": {             "last_name": "Smith"          }      },       "aggs" : {          "all_interests" : {              "terms" : {"field" : "interests"},              "aggs":{                  "avg_age":{                      "avg" : {"field":"age"}                  }              }                        }      }  }

分词和映射

GET /megacorp/employee/_searchGET /megacorp/employee/_mapping#分词测试 GET /_analyze?analyzer=standard&text=Text to analyzeGET /megacorp/_analyze?field=about&text=Black-catsGET /gb/_mappingDELETE /gb#创建映射 PUT /gb{  "mappings": {    "type_tweet" : {      "properties" : {        "tweet" : {          "type" :    "string",          "analyzer": "english"        },        "date" : {          "type" :   "date"        },        "name" : {          "type" :   "string"        },        "user_id" : {          "type" :   "long"        }      }    }  }}#能添加新字段的映射 PUT /gb/_mapping/type_tweet{  "properties" : {    "tag" : {      "type" :    "string",      "index":    "not_analyzed"    }  }}#不能修改映射 PUT /gb/_mapping/type_tweet{  "properties" : {    "tag" : {      "type" :    "long"    }  }}

结构化查询和结构化过滤

#结构化查询 和 结构化过滤#原则上来说，使用查询语句做全文本搜索或其他需要进行相关性评分的时候，剩下的全部用过滤语句#一条过滤语句会询问每个文档的字段值是否包含着特定值。,不会进行相关性分析和评分#查询语句会询问每个文档的字段值与特定值的匹配程度如何？#可以使用以下语句验证合法性和执行过程#GET /megacorp/employee/_validate/query?explain#结构如下：GET /megacorp/employee/_search?explain{  "query": {    "bool": {      "must": [        {"match": {          "about": "rock"        }},        {"match": {          "last_name": "Smith"        }}      ],      "filter": {        "range": {          "age": {            "gte": 10,            "lte": 25          }        }      }    }  }}GET /megacorp/employee/1GET /megacorp/employee/_searchGET /megacorp/employee/_search{  "query": {    "match": {      "about": "rock climbing"    }  }}#multi_match查询允许你做match查询的基础上同时搜索多个字段GET /megacorp/employee/_search{  "query": {    "multi_match": {      "query": "Smith",      "fields": ["first_name","last_name"]    }  }}#match_phrase 想要精确匹配所有同时包含 GET /megacorp/employee/_search{  "query": {    "match_phrase": {      "about": "rock climbing"    }  }}GET /megacorp/_analyze?field=about&text=I love to go rock climbing xxx #term是代表完全匹配，即不进行分词器分析，文档中必须包含整个搜索的词汇, 下面的查询由于已经被分词了， 就会找不到rock climbing， 搜不出结果GET /megacorp/employee/_search{  "query": {    "term": {      "about": "rock climbing"    }  }}#bool查询 must，must_not或者should#must: 文档必须完全匹配条件#should: should下面会带一个以上的条件，至少满足一个条件，这个文档就符合should#must_not: 文档必须不匹配条件GET /megacorp/employee/_search{  "query": {    "bool": {      "must": [        {"range": {          "age": {            "gte": 10,            "lte": 25          }        }}      ],      "should": [        {"match": {          "last_name": "Smith"        }}      ],      "must_not": [        {"match": {          "first_name": "xiang"        }}      ]    }  }}

结构化搜索

## 结构化搜索GET /megacorp/employee/_mapping#过滤器的bool使用GET /megacorp/employee/_search{    "query": {      "bool": {        "must": [        {"match": {          "about": "rock climbing"        }}      ],      "filter": {        "bool": {          "must" : [            { "range": { "age": { "gte": 25 }}}          ]        }      }    }    }  } -- sql 等价 ---SELECT productFROM   productsWHERE  (price = 20 OR productID = "XHDK-A-1293-#fJ3")  AND  (price != 30)  "filter" : {  "bool" : {    "should" : [       { "term" : {"price" : 20}},        { "term" : {"productID" : "XHDK-A-1293-#fJ3"}}    ],    "must_not" : {       "term" : {"price" : 30}     } }}SELECT documentFROM   productsWHERE  productID      = "KDKE-B-9947-#kL5"  OR (     productID = "JODL-X-1937-#pV7"       AND price     = 30 )       "filter" : {  "bool" : {    "should" : [      { "term" : {"productID" : "KDKE-B-9947-#kL5"}},       { "bool" : {         "must" : [          { "term" : {"productID" : "JODL-X-1937-#pV7"}},          { "term" : {"price" : 30}}        ]      }}    ] }}# 文档是否存在某个字段 WHERE  tags IS NOT NULL# exists 过滤器 <==> is not null # missing 过滤器 <==> is null GET /megacorp/employee/_search{  "query": {    "bool": {      "filter": {        "exists": {          "field": "age"        }      }    }  }}

嵌套

### 嵌套对象DELETE  /my_indexPUT /my_index{  "mappings": {    "blogpost": {      "properties": {        "title": { "type": "string"  },        "body": { "type": "string"  },        "tags": { "type": "string"  },        "comments": {          "type": "nested",          "properties": {            "name":    { "type": "string"  },            "comment": { "type": "string"  },            "age":     { "type": "short"   },            "stars":   { "type": "short"   },            "date":    { "type": "date"    }          }        }      }    }  }}GET /my_index/blogpost/_mappingGET /my_index/blogpost/_searchPUT /my_index/blogpost/1{  "title": "Nest eggs",  "body":  "Making your money work...",  "tags":  [ "cash", "shares" ],  "comments": [     {      "name":    "John Smith",      "comment": "Great article",      "age":     28,      "stars":   4,      "date":    "2014-09-01"    },    {      "name":    "Alice White",      "comment": "More like this please",      "age":     31,      "stars":   5,      "date":    "2014-10-22"    }  ]}PUT /my_index/blogpost/2{  "title": "Investment secrets",  "body":  "What they don't tell you ...",  "tags":  [ "shares", "equities" ],  "comments": [    {      "name":    "Mary Brown",      "comment": "Lies, lies, lies",      "age":     42,      "stars":   1,      "date":    "2014-10-18"    },    {      "name":    "John Smith",      "comment": "You're making it up!",      "age":     28,      "stars":   2,      "date":    "2014-10-16"    }  ]}GET /my_index/blogpost/_search{  "query": {    "bool": {      "must": [        { "match": { "title": "eggs" }},        {          "nested": {            "path": "comments",            "query": {              "bool": {                "must": [                  { "match": { "comments.name": "john" }},                  { "match": { "comments.age":  28     }}                ]        }}}}      ]}}}#取回在十月中有收到回应的blog文章，并依照所取回的各个blog文章中最少stars数量的顺序作排序GET /my_index/blogpost/_search{  "query": {    "nested": {       "path": "comments",      "query": {        "bool": {          "filter": {            "range": {              "comments.date": {                "gte": "2014-10-01",                "lt":  "2014-11-01"              }            }          }        }      }    }  },  "sort": {    "comments.stars": {       "order": "asc",         "mode":  "min",         "nested_filter": {         "range": {          "comments.date": {            "gte": "2014-10-01",            "lt":  "2014-11-01"          }        }      }    }  }}#嵌套对象的聚合GET /my_index/blogpost/_search{  "aggs": {    "comments": {       "nested": {        "path": "comments"      },      "aggs": {        "by_month": {          "date_histogram": {             "field":    "comments.date",            "interval": "month",            "format":   "yyyy-MM"          },          "aggs": {            "avg_stars": {              "avg": {                 "field": "comments.stars"              }            }          }        }      }    }  }}PUT /my_index/blogpost/_mapping{    "properties": {      "tags": {         "type":     "string",        "fielddata": true      }    }  }  #<1> 共有四个评论#<2> 有两个评论的发表者年龄介於20至30之间#<3> 两个blog文章与这些评论相关#<4> 这些blog文章的火红标签是shares丶cash丶equitiesGET /my_index/blogpost/_search{  "aggs": {    "comments": {      "nested": {         "path": "comments"      },      "aggs": {        "age_group": {          "histogram": {             "field":    "comments.age",            "interval": 10          },          "aggs": {            "blogposts": {              "reverse_nested": {},               "aggs": {                "tags": {                  "terms": {                     "field": "tags"                  }                }              }            }          }        }      }    }  }}

agg聚合

#es聚合DELETE /user_analysys_littlePUT /user_analysys_littlePUT /user_analysys_little/_mapping/user  {    "properties" : {      "userId" : {        "type" :    "string",        "index":    "not_analyzed"      },    "userName" : {        "type" :    "string",        "index":    "not_analyzed"      },     "provinceId" : {        "type" :    "long",        "index":    "not_analyzed"      },    "provinceName" : {        "type" :    "string",        "index":    "not_analyzed"      },     "age" : {        "type" :    "long",        "index":    "not_analyzed"      }  }  }  PUT /user_analysys_little/_mapping/user_event  {    "properties" : {      "userId" : {        "type" :    "string",        "index":    "not_analyzed"      },    "userName" : {        "type" :    "string",        "index":    "not_analyzed"      },     "provinceId" : {        "type" :    "long",        "index":    "not_analyzed"      },    "provinceName" : {        "type" :    "string",        "index":    "not_analyzed"      },     "age" : {        "type" :    "long",        "index":    "not_analyzed"      },     "eventId" : {        "type" :    "long",        "index":    "not_analyzed"      },     "eventName" : {        "type" :    "string",        "index":    "not_analyzed"      },     "statDate" : {        "type" :    "date",        "index":    "not_analyzed"      },     "productName" : {        "type" :    "string",        "index":    "not_analyzed"      }   }  }  GET _cat/indicesGET /user_analysys_little/user/_searchGET /user_analysys_little/user_event/_search# select userId from user_event where provinceName='青海' group by userId# 如果省略 query部分就是相当于对全局结果做统计GET /user_analysys_little/user_event/_search{  "size": 1,   "query": {    "term": {      "provinceName": {        "value": "青海"      }    }  },   "aggs": {    "group_userid": {      "terms": {        "field": "userId",        "order": {          "_count": "desc"        }      }    }  }}#全局桶, 将会忽略query的条件，对全局数据进行统计 #青海的平均年龄和全国的平均年龄做比较GET /user_analysys_little/user_event/_search{  "size": 1,   "query": {    "term": {      "provinceName": {        "value": "青海"      }    }  },   "aggs": {    "avg_age_青海": {      "avg": {        "field": "age"      }    },    "all": {      "global": {},      "aggs": {        "avg_age_全国": {          "avg": {            "field": "age"          }        }      }    }  }    }# 先按省份分组，然后统计省份的平均/最大/最小年龄，再嵌套统计各个年龄的分布情况GET /user_analysys_little/user_event/_search{  "size": 1,   "aggs": {    "group_province": {      "terms": {        "field": "provinceName"      },      "aggs": {        "avg_age": {          "avg": {            "field": "age"          }        },        "max_age": {          "max": {            "field": "age"          }        },        "min_age": {          "min": {            "field": "age"          }        },        "group_age":{          "terms": {            "field": "age"          }        }      }    }  }}#直方图, 以年龄间隔为10的区间进行统计，例如 [10~19] [20~29] .. 这样分区间分组统计GET /user_analysys_little/user_event/_search{  "size": 1,  "aggs": {    "histogram_age": {      "histogram": {        "field": "age",        "interval": 10      },      "aggs": {        "max_age": {          "max": {            "field": "age"          }        },        "min_age":{          "min": {            "field": "age"          }        },        "avg_age":{          "avg": {            "field": "age"          }        }      }    }  }}#直方图，专门的时间统计， 按天统计, extended_bounds可以设定起始边界GET /user_analysys_little/user_event/_search{  "size": 1,  "aggs": {    "date_histogram_statDate": {      "date_histogram": {        "field": "statDate",        "interval": "day",        "format": "yyyy-MM-dd",        "time_zone":"+08:00",        "min_doc_count" : 0,        "extended_bounds" : {             "min" : "2016-11-28",            "max" : "2016-12-31"        }      }    }  }}#过滤桶, 使用过滤桶在查询范围基础上应用过滤器#即搜索针对全青海的人，但聚合统计针对青海年龄在10~50的人GET /user_analysys_little/user_event/_search{  "size": 1,   "query": {    "term": {      "provinceName": {        "value": "青海"      }    }  },   "aggs": {    "avg_age_青海": {      "filter": {        "range": {          "age": {            "gte": 10,            "lte": 50          }        }      },      "aggs": {        "avg_age": {          "avg": {            "field": "age"          }        }      }    }  }    }#后过滤器 只过滤搜索结果，不过滤聚合结果#即搜索针对青海年龄在10~50的人，但聚合统计针对全青海的人GET /user_analysys_little/user_event/_search{  "size": 1,   "query": {    "term": {      "provinceName": {        "value": "青海"      }    }  },  "post_filter": {    "range": {      "age": {        "gte": 10,        "lte": 50      }    }  },   "aggs": {    "avg_age":{      "avg": {        "field": "age"      }    }      }    }#去重, 统计每天有多少个不重复的用户 即 dau#注意： cardinality是通过算法来做的近似计算，不是100%精确#可以通过设置precision_threshold来调节精度， 接受 0–40,000 之间的数字，更大的值还是会被当作 40,000 来处理。 示例会确保当字段唯一值在 100 以内时会得到非常准确的结果。尽管算法是无法保证这点的，但如果基数在阈值以下，几乎总是 100% 正确的。高于阈值的基数会开始节省内存而牺牲准确度，同时也会对度量结果带入误差。GET /user_analysys_little/user_event/_search{  "size": 1,  "aggs": {    "date_histogram_statDate": {      "date_histogram": {        "field": "statDate",        "interval": "day",        "format": "yyyy-MM-dd",        "time_zone":"+08:00",        "min_doc_count" : 0,        "extended_bounds" : {             "min" : "2016-11-28",            "max" : "2016-12-31"        }      },      "aggs": {        "distinct_userId": {          "cardinality": {            "field": "userId",            "precision_threshold": 100          }        }      }    }  }}

0 0