spring data es拼音插件

来源：互联网发布：js集合和数组编辑：程序博客网时间：2024/05/21 01:48

下载安装
- 下载和es对应版本的拼音插件，现用的es版本是2.4.5，对应下载1.8.5版本插件
  https://github.com/medcl/elasticsearch-analysis-pinyin/releases/download/v1.8.5/elasticsearch-analysis-pinyin-1.8.5.zip
- 在{ES_HOME}/plugins下新建pinyin目录
- 解压插件zip文件到{ES_HOME}/plugins/pinyin/下
- 重启es
添加index

 http://localhost:9200/phr/-d'{    "index" : {        "analysis" : {            "analyzer" : {                "pinyin_analyzer" : {                    "tokenizer" : "my_pinyin"                    }            },            "tokenizer" : {                "my_pinyin" : {                    "type" : "pinyin",                    //true：支持首字母  eg: 刘德华 -> [ldh]                    "keep_first_letter": true,                    //false：不支持首字母分隔 eg: 刘德华 -> [l,d,h]                    "keep_separate_first_letter" : false,                       //true：支持全拼  eg: 刘德华 -> [liu,de,hua]                    "keep_full_pinyin" : true,                    //true：支持全拼  eg: 刘德华 -> [liudehua]                    "keep_joined_full_pinyin": true,                    //设置最大长度                    "limit_first_letter_length" : 16,                    //小写非中文字母                    "lowercase" : true,                    //重复的项将被删除,eg: 德的 -> de                    "remove_duplicated_term" : true                }            }        }    }}'

3.测试分词

http://localhost:9200/phr/_analyze?text=刘德华&analyzer=pinyin_analyzer    {        "tokens": [            {                "token": "liu",                "start_offset": 0,                "end_offset": 1,                "type": "word",                "position": 0            },            {                "token": "de",                "start_offset": 1,                "end_offset": 2,                "type": "word",                "position": 1            },            {                "token": "hua",                "start_offset": 2,                "end_offset": 3,                "type": "word",                "position": 2            },            {                "token": "刘德华",                "start_offset": 0,                "end_offset": 3,                "type": "word",                "position": 3            },            {                "token": "liudehua",                "start_offset": 0,                "end_offset": 8,                "type": "word",                "position": 4            },            {                "token": "ldh",                "start_offset": 0,                "end_offset": 3,                "type": "word",                "position": 5            }        ]    }

4.es bean

public class TestEsBean {        @Id        public Long id;        @Field(type = FieldType.String, searchAnalyzer = "pinyin_analyzer", analyzer = "pinyin_analyzer")        private String name;        public Long getId() {            return id;        }        public void setId(Long id) {            this.id = id;        }        public String getName() {            return name;        }        public void setName(String name) {            this.name = name;        }    }

type：默认auto，这里要显示指定字段类型analyzer：建立索引时指定的分词器searchAnalyzer：搜索时使用的分词器

阅读全文

0 0