Lucene同义词(一)

来源:互联网 发布:设计店铺的软件 编辑:程序博客网 时间:2024/05/21 06:16
package com.yuan;import java.io.IOException;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.synonym.SynonymFilterFactory;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.util.FilesystemResourceLoader;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;/** * @author hankcs */public class TestSynonyms{    private static void displayTokens(TokenStream ts) throws IOException    {        CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);        OffsetAttribute offsetAttribute = ts.addAttribute(OffsetAttribute.class);        ts.reset();        while (ts.incrementToken())        {            String token = termAttr.toString();            System.out.print(offsetAttribute.startOffset() + "-" + offsetAttribute.endOffset() + "[" + token + "] ");        }        System.out.println();        ts.end();        ts.close();    }    public static void main(String[] args) throws Exception    {        String testInput = "其实 i似 好人";        Version ver = Version.LUCENE_47;        Map<String, String> filterArgs = new HashMap<String, String>();        filterArgs.put("luceneMatchVersion", ver.toString());        filterArgs.put("synonyms", "c:/同义词/synonymword.dic");        //filterArgs.put("expand", "true");        SynonymFilterFactory factory = new SynonymFilterFactory(filterArgs);        factory.inform(new FilesystemResourceLoader());        IKAnalyzer ikAnalyzer = new IKAnalyzer();        TokenStream ts = factory.create(ikAnalyzer.tokenStream("someField", testInput));        displayTokens(ts);    }}
由于在同义词库的编码与读取文件所设的编码不一致,导致开始时中文同义词不好使。
0 0
原创粉丝点击