一个封装了StandardTokenizer的自定义Lucene Analyzer

来源:互联网 发布:seo常用指令 编辑:程序博客网 时间:2024/06/17 08:14
<strong><span style="font-size:18px;">/*** * @author YangXin * @info一个封装了StandardTokenizer的LuceneAnalyzer * 带有小写过滤器的MyAnalyzer */package unitTen;import java.io.Reader;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.LengthFilter;import org.apache.lucene.analysis.LowerCaseFilter;import org.apache.lucene.analysis.PorterStemFilter;import org.apache.lucene.analysis.StopFilter;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.analysis.standard.StandardTokenizer;import org.apache.lucene.util.Version;public class MyAnalyzer extends Analyzer { @SuppressWarnings("deprecation")  @Override  public TokenStream tokenStream(String fieldName, Reader reader) {    TokenStream result = new StandardTokenizer(        Version.LUCENE_CURRENT, reader);    result = new LowerCaseFilter(result);    result = new LengthFilter(result, 3, 50);    result = new StopFilter(true, result, StandardAnalyzer.STOP_WORDS_SET);    result = new PorterStemFilter(result);    return result;  }}</span></strong>

0 0