java通过Solr的Suggest实现提示词

来源:互联网 发布:seo专员做什么的 编辑:程序博客网 时间:2024/06/05 10:15
需求:在Web端或移动设备上实现用户输入汉字或汉字的首字母,提供关联词提示。 

解决方案:通过Solr提供的Suggest实现此功能,Solr版本为4.10.4 

在Solr的配置文件solrconfig.xml文件中修改Suggest的配置,通过提示词文件构建搜索索引

<searchComponent name="suggest" class="solr.SuggestComponent"><lst name="suggester">  <str name="name">mySuggester</str>  <str name="lookupImpl">AnalyzingLookupFactory</str>  <str name="dictionaryImpl">FileDictionaryFactory</str>  <str name="field">suggest</str>  <str name="sourceLocation">suggest.txt</str>  <str name="suggestAnalyzerFieldType">string</str>  <str name="buildOnOptimize">false</str></lst></searchComponent><requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy"><lst name="defaults">  <str name="suggest">true</str>  <str name="suggest.count">5</str></lst><arr name="components">  <str>suggest</str></arr></requestHandler>

参数说明: 
name:suggester的名字,如果设置多个,可以在请求中指定。 
lookupImpl:查找方式的具体实现 
dictionaryImpl:字典的具体实现 
field:搜索的字段 
sourceLocation:字典文件 
suggestAnalyzerFieldType:字段的类型 
buildOnOptimize:何时创建拼写索引 
suggest.count:返回的搜索结果的数量 

因为要同时提供汉字和汉字拼音首字母的搜索,因此在构建字典文件时需要进行特殊处理,字典文件样例如下: 

abesb|阿巴二氏病阿巴二氏病|abesbabkw|阿巴卡韦阿巴卡韦|abkwabkwsfd|阿巴卡韦双夫定阿巴卡韦双夫定|abkwsfdabkwsfdp|阿巴卡韦双夫定片阿巴卡韦双夫定片|abkwsfdp


这样做就从业务需求上可以满足,但索引量成倍增多,在数据量不是特别大的时候性能问题基本可以忽略。 
搜索测试的链接如下: 
http://127.0.0.1:8080/solr/metis/suggest?qt=suggest&suggest.dictionary=mySuggester&wt=json&suggest.q=天 
返回结果如下: 

{- responseHeader:  {    - status: 0,    - QTime: 1- },- suggest:  {    - mySuggester:  {        - 天:  {            - numFound: 5,            - suggestions:  [                - {                    - term: "天一止咳|tyzk",                    - weight: 1,                    - payload: ""                - },                - {                    - term: "天一止咳糖浆|tyzktj",                    - weight: 1,                    - payload: ""                - },                - {                    - term: "天丹通络|tdtl",                    - weight: 1,                    - payload: ""                - },                - {                    - term: "天丹通络胶囊|tdtljn",                    - weight: 1,                    - payload: ""                - },                - {                    - term: "天仙藤|txt",                    - weight: 1,                    - payload: ""                - }            - ]        - }    - }- }}


页面上通过jQuery的Autocomplete功能实现。 
页面代码如下: 
<!DOCTYPE html><html><head>    <meta charset="UTF-8">    <title>Hello World!</title>    <link rel="stylesheet" href="./css/jquery-ui.css"/>    <script src="./js/jquery-1.8.3.js"></script>    <script src="./js/jquery-ui.js"></script>    <script>        $(function () {            $("#tags").autocomplete({                minLength: 0,                delay: 100,                source: []            });        });    </script></head><body><div class="ui-widget">    <label for="tags">Tags: </label>    <input type="text" id="tags" oninput="search(this.value)"/></div></body><script>    require('./renderer.js');    require('devtron').install()    var suggest = require('./apps/suggest/suggest.js');    var search = function (keyword) {        suggest.search(keyword).then(function (data) {            console.log(data);            $("#tags").autocomplete("option", "source", data);        }, function (error) {            console.error(error);        });    }</script></html>

JavaScript部分的代码如下,需要对搜索结果的词进行格式化,这部分功能也可以通过后端实现,但性能没有前端从Solr获取结果处理快: 

const http = require('http'),    qs = require('querystring');var Suggest = exports;Suggest.search = function (keyword) {    var data = {        qt: 'suggest',        'suggest.dictionary': 'mySuggester',        'wt': 'json',        'suggest.q': keyword    };    var content = qs.stringify(data);    var options = {        hostname: '127.0.0.1',        port: 8080,        path: '/solr/metis/suggest?' + content,        method: 'GET'    };    var result = [];    var promise = new Promise(function (resolve, reject) {        var req = http.request(options, function (res) {            res.on('data', function (chunk) {                result = convert(keyword, chunk);                resolve(result);            });        });        req.on('error', function (e) {            reject(new Error(e.message));        });        req.end();    });    return promise;};function convert(keyword, data) {    var result = [];    var json = JSON.parse(String(data));    var num = parseInt(json.suggest.mySuggester[keyword].numFound);    if (num > 0) {        var matcher = new RegExp("^[A-Za-z0-9]+$", "i");        var tmp = json.suggest.mySuggester[keyword].suggestions;        for (var i = 0; i < tmp.length; i++) {            var d = tmp[i]['term'].split('\|');            if (matcher.test(d[0])) {                var obj = {                    label: d[0],                    value: d[1]                };                result.push(obj);            } else {                result.push(d[0]);            }        }    }    return result;}


0 0
原创粉丝点击