Elasticsearch优秀的索引设置技巧

来源:互联网 发布:qq扣字软件 编辑:程序博客网 时间:2024/05/20 05:57

下面的索引设置是从国外一家优秀的图片搜索公司获取,可以从其设置索引的设计上得到一些如何提高索引设计技巧的方式:

{  "500px.photos-2016-05-06-20-09": {    "aliases": {      "500px.photos": {}    },    "mappings": {      "photo": {        "_all": {          "enabled": false        },        "_routing": {          "required": true,          "path": "user_id"        },        "properties": {          "camera": {            "type": "string",            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "category": {            "type": "integer"          },          "collections_count": {            "type": "long"          },          "comments_count": {            "type": "integer"          },          "context_tags": {            "type": "nested",            "properties": {              "context": {                "type": "string",                "index": "not_analyzed"              },              "id": {                "type": "integer"              },              "name": {                "type": "string",                "fields": {                  "exact": {                    "type": "string",                    "analyzer": "exact"                  },                  "exact_stemmed_synonyms": {                    "type": "string",                    "index_analyzer": "exact_stemmed_synonyms",                    "search_analyzer": "exact_stemmed_synonyms_search"                  },                  "not_analyzed": {                    "type": "string",                    "index": "not_analyzed"                  },                  "prefix": {                    "type": "string",                    "index_analyzer": "prefix",                    "search_analyzer": "prefix_search"                  },                  "stemmed": {                    "type": "string",                    "analyzer": "stemmed"                  },                  "stemmed_synonyms": {                    "type": "string",                    "index_analyzer": "stemmed_synonyms",                    "search_analyzer": "stemmed_synonyms_search"                  }                }              },              "name_de": {                "type": "string",                "fields": {                  "exact": {                    "type": "string",                    "analyzer": "exact"                  },                  "not_analyzed": {                    "type": "string",                    "index": "not_analyzed"                  },                  "prefix": {                    "type": "string",                    "index_analyzer": "prefix",                    "search_analyzer": "prefix_search"                  },                  "stemmed": {                    "type": "string",                    "analyzer": "stemmed_de"                  }                }              },              "weight": {                "type": "float"              },              "weight_new": {                "type": "float"              }            }          },          "context_tags_tags_count": {            "type": "integer"          },          "converted": {            "type": "integer"          },          "created_at": {            "type": "date",            "format": "dateOptionalTime"          },          "delivery_type_current": {            "type": "nested",            "properties": {              "delivery_type": {                "type": "string",                "index": "not_analyzed"              },              "enabled": {                "type": "boolean"              }            }          },          "description": {            "type": "string",            "boost": 5,            "fields": {              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              },              "stemmed": {                "type": "string",                "analyzer": "stemmed"              },              "stemmed_synonyms": {                "type": "string",                "index_analyzer": "stemmed_synonyms",                "search_analyzer": "stemmed_synonyms_search"              }            }          },          "favorites_count": {            "type": "integer"          },          "feature": {            "type": "nested",            "properties": {              "end_at": {                "type": "date",                "format": "dateOptionalTime"              },              "name": {                "type": "string",                "index": "not_analyzed"              },              "start_at": {                "type": "date",                "format": "dateOptionalTime"              }            }          },          "for_sale": {            "type": "boolean"          },          "galleries": {            "type": "nested",            "properties": {              "id": {                "type": "long"              },              "position": {                "type": "long"              }            }          },          "geo_coordinates": {            "type": "geo_point",            "lat_lon": true          },          "height": {            "type": "integer"          },          "hi_res_uploaded": {            "type": "integer"          },          "highest_rating": {            "type": "float"          },          "id": {            "type": "integer"          },          "image_format": {            "type": "integer"          },          "lens": {            "type": "string",            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "license_requests_enabled": {            "type": "boolean"          },          "license_type": {            "type": "integer"          },          "licensed_at": {            "type": "date",            "format": "dateOptionalTime"          },          "licensing_status": {            "type": "integer"          },          "licensing_status_organizer_index": {            "type": "integer"          },          "name": {            "type": "string",            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "exact_stemmed_synonyms": {                "type": "string",                "index_analyzer": "exact_stemmed_synonyms",                "search_analyzer": "exact_stemmed_synonyms_search"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              },              "stemmed": {                "type": "string",                "analyzer": "stemmed"              },              "stemmed_synonyms": {                "type": "string",                "index_analyzer": "stemmed_synonyms",                "search_analyzer": "stemmed_synonyms_search"              }            }          },          "nsfw": {            "type": "boolean"          },          "photo_sets": {            "type": "nested",            "properties": {              "id": {                "type": "long"              },              "position": {                "type": "long"              }            }          },          "privacy": {            "type": "integer"          },          "rating": {            "type": "float"          },          "sales_count": {            "type": "integer"          },          "status": {            "type": "integer"          },          "taken_at": {            "type": "date",            "format": "dateOptionalTime"          },          "times_viewed": {            "type": "integer"          },          "updated_at": {            "type": "date",            "format": "dateOptionalTime"          },          "user_firstname": {            "type": "string",            "boost": 4,            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "user_id": {            "type": "long"          },          "user_lastname": {            "type": "string",            "boost": 4,            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "user_name": {            "type": "string",            "boost": 4,            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "user_partner_optout": {            "type": "boolean"          },          "user_status": {            "type": "integer"          },          "user_store_on": {            "type": "boolean"          },          "user_username": {            "type": "string",            "boost": 4,            "fields": {              "exact": {                "type": "string",                "analyzer": "exact"              },              "not_analyzed": {                "type": "string",                "index": "not_analyzed"              },              "prefix": {                "type": "string",                "index_analyzer": "prefix",                "search_analyzer": "prefix_search"              }            }          },          "votes_count": {            "type": "integer"          },          "width": {            "type": "integer"          }        }      }    },    "settings": {      "index": {        "creation_date": "1462579747496",        "uuid": "V9kxOgQPR82FXpj-UN_Rdw",        "analysis": {          "char_filter": {            "amp_and": {              "type": "mapping",              "mappings": [                "&=> and "              ]            },            "punctuation": {              "type": "mapping",              "mappings": [                ".=> "              ]            }          },          "filter": {            "preserved_asciifolding": {              "type": "asciifolding",              "preserve_original": "true"            },            "large_prefixer": {              "max_gram": "100",              "min_gram": "1",              "type": "edgeNGram",              "side": "front"            },            "prefixer": {              "max_gram": "8",              "type": "edgeNGram",              "min_gram": "2",              "side": "front"            },            "german_stemmer": {              "type": "stemmer",              "language": "light_german"            },            "german_stop": {              "type": "stop",              "stopwords": "_german_"            },            "fivegrammer": {              "min_gram": "5",              "type": "nGram",              "max_gram": "5"            },            "synonyms": {              "type": "synonym",              "synonyms_path": "analysis/wn_s.pl",              "format": "wordnet"            },            "trigrammer": {              "type": "nGram",              "min_gram": "3",              "max_gram": "3"            },            "custom_stems": {              "type": "stemmer_override",              "rules_path": "analysis/custom_stems.txt"            }          },          "analyzer": {            "exact_stemmed_synonyms": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "asciifolding",                "lowercase",                "trim",                "custom_stems",                "kstem",                "synonyms",                "custom_stems",                "stop"              ],              "tokenizer": "keyword"            },            "stemmed": {              "filter": [                "standard",                "lowercase",                "custom_stems",                "stop",                "kstem"              ],              "tokenizer": "standard"            },            "exact_stemmed_synonyms_search": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "standard",                "asciifolding",                "lowercase",                "trim",                "custom_stems",                "stop",                "kstem"              ],              "tokenizer": "standard"            },            "synonyms": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "standard",                "lowercase",                "synonyms"              ],              "tokenizer": "standard"            },            "partial": {              "filter": [                "preserved_asciifolding",                "large_prefixer"              ],              "tokenizer": "lowercase"            },            "prefix_search": {              "tokenizer": "lowercase"            },            "stemmed_synonyms": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "standard",                "asciifolding",                "lowercase",                "trim",                "custom_stems",                "kstem",                "synonyms",                "custom_stems",                "stop"              ],              "tokenizer": "standard"            },            "fivegram_ascii": {              "filter": [                "standard",                "asciifolding",                "lowercase",                "trim",                "fivegrammer"              ],              "tokenizer": "standard"            },            "prefix": {              "filter": [                "preserved_asciifolding",                "prefixer"              ],              "tokenizer": "lowercase"            },            "exact": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "asciifolding",                "lowercase",                "trim"              ],              "tokenizer": "keyword"            },            "stemmed_synonyms_search": {              "type": "custom",              "char_filter": [                "amp_and"              ],              "filter": [                "standard",                "asciifolding",                "lowercase",                "trim",                "custom_stems",                "stop",                "kstem"              ],              "tokenizer": "standard"            },            "trigram": {              "filter": [                "lowercase",                "trim",                "trigrammer"              ],              "tokenizer": "keyword"            },            "stemmed_de": {              "filter": [                "standard",                "asciifolding",                "lowercase",                "german_stop",                "german_normalization",                "german_stemmer"              ],              "tokenizer": "standard"            },            "partial_search": {              "tokenizer": "lowercase"            }          }        },        "number_of_replicas": "2",        "number_of_shards": "20",        "refresh_interval": "1",        "version": {          "created": "1040499"        }      }    },    "warmers": {}  }}
  • 500px.photos-2016-05-06-20-09 通过具体日期来命名索引名称,来记录某次索引修改的日期,小细节
  • 所有修改的索引都会设置别名:500px.photos
  • 参数 “_all”: {
  • “enabled”: false
  • },有效的阻止进行全文系统在索引阶段对CPU和存储空间资源的开销
  • 设置_routing,均以user_id为路由路径,可以使每个用户的图片都会存储到相同的shard中,当进行用户个人图片相关搜索,都会提高搜索效率
  • 针对camera进行查询,设置了三种不同的analyzer,1.exact 2.not_analyzed 3.prefix,分别对应精确查找,前缀查
  • 设置多条能判断图片质量好坏的计数,如collections_count,comments_count,favorites_count,context_tags_tags_count,sales_count,votes_count
  • context_tags是最精华的地方,详细在另一个文档中参考,参考地址:
  • context_tags_tags_count单独拿出来说,通过设置这个计数来参看某张图片的权重,关键词越多,说明内容越丰富,相对的每个关键词的权重应该偏低。
  • converted,图片被编辑的次数,暂时不清楚其目的,猜测为某张图片被编辑的次数越多,该图片质量应该会相对偏高,只有好的图片,摄影师会对其用心对待(修改内容)
阅读全文
0 0