solr的spellcheck和suggest配置代码记录

来源:互联网 发布:向往的生活 知乎 编辑:程序博客网 时间:2024/05/29 17:52
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">    <str name="queryAnalyzerFieldType">string</str>    <!-- Multiple "Spell Checkers" can be declared and used by this         component      -->    <!-- a spellchecker built from a field of the main index -->    <lst name="spellchecker">      <str name="name">default</str>      <str name="field">text</str>      <str name="classname">solr.DirectSolrSpellChecker</str>      <!-- the spellcheck distance measure used, the default is the internal levenshtein -->      <str name="distanceMeasure">internal</str>      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->      <float name="accuracy">0.5</float>      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->      <int name="maxEdits">2</int>      <!-- the minimum shared prefix when enumerating terms -->      <int name="minPrefix">1</int>      <!-- maximum number of inspections per result. -->      <int name="maxInspections">5</int>      <!-- minimum length of a query term to be considered for correction -->      <int name="minQueryLength">4</int>      <!-- maximum threshold of documents a query term can appear to be considered for correction -->      <float name="maxQueryFrequency">0.01</float>      <!-- uncomment this to require suggestions to occur in 1% of the documents      <float name="thresholdTokenFrequency">.01</float>      -->      <str name="spellcheckIndexDir">spellcheckerDefault</str>    </lst>        <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->    <lst name="spellchecker">        <str name="name">wordbreak</str>        <str name="classname">solr.WordBreakSolrSpellChecker</str>              <str name="field">text</str>        <str name="combineWords">true</str>        <str name="breakWords">true</str>        <int name="maxChanges">10</int>        <str name="spellcheckIndexDir">spellcheckerWord</str>    </lst>    <!-- a spellchecker that uses a different distance measure -->    <!--       <lst name="spellchecker">         <str name="name">jarowinkler</str>         <str name="field">spell</str>         <str name="classname">solr.DirectSolrSpellChecker</str>         <str name="distanceMeasure">           org.apache.lucene.search.spell.JaroWinklerDistance         </str>       </lst>     -->    <!-- a spellchecker that use an alternate comparator          comparatorClass be one of:          1. score (default)          2. freq (Frequency first, then score)          3. A fully qualified class name      -->            <lst name="spellchecker">            <str name="name">freq</str>            <str name="field">text</str>            <str name="classname">solr.DirectSolrSpellChecker</str>            <str name="comparatorClass">freq</str>                    </lst>        <!-- A spellchecker that reads the list of words from a file -->    <!--     <lst name="spellchecker">         <str name="classname">solr.FileBasedSpellChecker</str>         <str name="name">file</str>         <str name="sourceLocation">spellings.txt</str>         <str name="characterEncoding">UTF-8</str>         <str name="spellcheckIndexDir">spellcheckerFile</str>       </lst>    -->  </searchComponent>
 <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">    <lst name="defaults">      <str name="df">text</str>      <!-- Solr will use suggestions from both the 'default' spellchecker           and from the 'wordbreak' spellchecker and combine them.           collations (re-written queries) can include a combination of           corrections from both spellcheckers -->      <str name="spellcheck.dictionary">default</str>      <str name="spellcheck.dictionary">wordbreak</str>      <str name="spellcheck">on</str>      <str name="spellcheck.extendedResults">true</str>             <str name="spellcheck.count">10</str>      <str name="spellcheck.alternativeTermCount">5</str>      <str name="spellcheck.maxResultsForSuggest">5</str>             <str name="spellcheck.collate">true</str>      <str name="spellcheck.collateExtendedResults">true</str>        <str name="spellcheck.maxCollationTries">10</str>      <str name="spellcheck.maxCollations">5</str>             </lst>    <arr name="last-components">      <str>spellcheck</str>    </arr>  </requestHandler>

/suggest配置:

<!-- suggest -->    <searchComponent name="suggest" class="solr.SpellCheckComponent">        <str name="queryAnalyzerFieldType">text_ik</str>        <lst name="spellchecker">            <str name="name">suggest</str>            <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>            <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>            <str name="field">text</str>            <float name="threshold">0.0001</float>            <str name="sourceLocation">ext.dic</str>             <str name="spellcheckIndexDir">spellchecker</str>            <str name="comparatorClass">freq</str>            <str name="buildOnOptimize">true</str>            <str name="buildOnCommit">true</str>        </lst>    </searchComponent>    <requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">        <lst name="defaults">            <str name="spellcheck">true</str>            <str name="spellcheck.dictionary">suggest</str>            <str name="spellcheck.onlyMorePopular">true</str>            <str name="spellcheck.extendedResults">false</str>            <str name="spellcheck.count">10</str>            <str name="spellcheck.collate">true</str>        </lst>        <arr name="components">            <str>suggest</str>        </arr>    </requestHandler>        <queryConverter name="queryConverter" class="solr.SpellingQueryConverter"/>


0 0