solrj实现solr的suggestion(基于spellcheck)

来源:互联网 发布:摄像头监控软件下载 编辑:程序博客网 时间:2024/05/16 06:50

参考wiki:http://wiki.apache.org/solr/Suggester/


在solrconfig.xml中设置组件:

添加suggest组件:

<searchComponent name="suggest" class="solr.SpellCheckComponent">  <lst name="spellchecker">    <str name="name">suggest</str>    <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>    <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>    <str name="field">introduction</str>    <str name="sourceLocation">dict.txt</str>     <str name="buildOnCommit">true</str>     </lst></searchComponent>


设置suggestHandler

 <requestHandler name="/suggest" class="org.apache.solr.handler.component.SearchHandler">  <lst name="defaults">    <str name="spellcheck">true</str>    <str name="spellcheck.dictionary">suggest</str>    <str name="spellcheck.count">10</str>    <str name="spellcheck.onlyMorePopular">true</str>    <str name="spellcheck.collate">true</str>      </lst>  <arr name="components">    <str>suggest</str>  </arr></requestHandler>

使用solrj的实现代码如下:

package cn.wzb;import java.util.List;import java.util.Map;import org.apache.solr.client.solrj.SolrQuery;import org.apache.solr.client.solrj.SolrServer;import org.apache.solr.client.solrj.SolrServerException;import org.apache.solr.client.solrj.impl.HttpSolrServer;import org.apache.solr.client.solrj.response.QueryResponse;import org.apache.solr.client.solrj.response.SpellCheckResponse;import org.apache.solr.client.solrj.response.SpellCheckResponse.Collation;import org.apache.solr.client.solrj.response.SpellCheckResponse.Correction;import org.apache.solr.client.solrj.response.SpellCheckResponse.Suggestion;public class TestSpellcheck {public static void main(String[] args) {SolrServer solr = new HttpSolrServer("http://localhost:8983/solr");// http://localhost:8983/solr/spell?q=学生&spellcheck=on&spellcheck.build=trueSolrQuery params = new SolrQuery();String token = "学";params.set("qt", "/suggest");params.set("q", token);//params.set("sort", "score desc", "text desc");params.set("spellcheck", "on");params.set("spellcheck.build", "true");params.set("spellcheck.onlyMorePopular", "true");params.set("spellcheck.count", "100");params.set("spellcheck.alternativeTermCount", "4");params.set("spellcheck.onlyMorePopular", "true");params.set("spellcheck.extendedResults", "true");params.set("spellcheck.maxResultsForSuggest", "5");params.set("spellcheck.collate", "true");params.set("spellcheck.collateExtendedResults", "true");params.set("spellcheck.maxCollationTries", "5");params.set("spellcheck.maxCollations", "3");QueryResponse response = null;try {response = solr.query(params);System.out.println("查询耗时:" + response.getQTime());} catch (SolrServerException e) {System.err.println(e.getMessage());e.printStackTrace();} catch (Exception e) {System.err.println(e.getMessage());e.printStackTrace();} finally {solr.shutdown();}SpellCheckResponse spellCheckResponse = response.getSpellCheckResponse();if (spellCheckResponse != null) {List<Suggestion> suggestionList = spellCheckResponse.getSuggestions();for (Suggestion suggestion : suggestionList) {System.out.println("Suggestions NumFound: " + suggestion.getNumFound());System.out.println("Token: " + suggestion.getToken());System.out.print("Suggested: ");List<String> suggestedWordList = suggestion.getAlternatives();for (String word : suggestedWordList) {System.out.println(word + ", ");}System.out.println();}System.out.println();Map<String, Suggestion> suggestedMap = spellCheckResponse.getSuggestionMap();for (Map.Entry<String, Suggestion> entry : suggestedMap.entrySet()) {System.out.println("suggestionName: " + entry.getKey());Suggestion suggestion = entry.getValue();System.out.println("NumFound: " + suggestion.getNumFound());System.out.println("Token: " + suggestion.getToken());System.out.print("suggested: ");List<String> suggestedList = suggestion.getAlternatives();for (String suggestedWord : suggestedList) {System.out.print(suggestedWord + ", ");}System.out.println("\n\n");}Suggestion suggestion = spellCheckResponse.getSuggestion(token);System.out.println("NumFound: " + suggestion.getNumFound());System.out.println("Token: " + suggestion.getToken());System.out.print("suggested: ");List<String> suggestedList = suggestion.getAlternatives();for (String suggestedWord : suggestedList) {System.out.print(suggestedWord + ", ");}System.out.println("\n\n");System.out.println("The First suggested word for solr is : " + spellCheckResponse.getFirstSuggestion(token));System.out.println("\n\n");List<Collation> collatedList = spellCheckResponse.getCollatedResults();if (collatedList != null) {for (Collation collation : collatedList) {System.out.println("collated query String: " + collation.getCollationQueryString());System.out.println("collation Num: " + collation.getNumberOfHits());List<Correction> correctionList = collation.getMisspellingsAndCorrections();for (Correction correction : correctionList) {System.out.println("original: " + correction.getOriginal());System.out.println("correction: " + correction.getCorrection());}System.out.println();}}System.out.println();System.out.println("The Collated word: " + spellCheckResponse.getCollatedResult());System.out.println();}System.out.println("查询耗时:" + response.getQTime());// System.out.println("response = " + response);// System.out.println(response.getStatus());solr.shutdown();}}

测试结果:

2012-8-17 14:23:35 org.apache.solr.client.solrj.impl.HttpClientUtil createClient信息: Creating new http client, config:maxConnections=128&maxConnectionsPerHost=32&followRedirects=false查询耗时:6203Suggestions NumFound: 15Token: 学Suggested: 学子, 学校, 学校大, 学校坏, 学校破, 学校烂, 学习好习惯, 学校国家, 学生, 学校美国, 学校好地方, 学者大家, 学习, 学士, 学生爱生活, suggestionName: 学NumFound: 15Token: 学suggested: 学子, 学校, 学校大, 学校坏, 学校破, 学校烂, 学习好习惯, 学校国家, 学生, 学校美国, 学校好地方, 学者大家, 学习, 学士, 学生爱生活, NumFound: 15Token: 学suggested: 学子, 学校, 学校大, 学校坏, 学校破, 学校烂, 学习好习惯, 学校国家, 学生, 学校美国, 学校好地方, 学者大家, 学习, 学士, 学生爱生活, The First suggested word for solr is : 学子collated query String: 学子collation Num: 0original: 学correction: 学子collated query String: 学校collation Num: 0original: 学correction: 学校collated query String: 学校大collation Num: 0original: 学correction: 学校大The Collated word: 学子查询耗时:6203