lucene....highlight应用

来源:互联网 发布:网络割接流程视频 编辑:程序博客网 时间:2024/04/30 01:24

导入luncene contrib/ lucene-highlighter-2.0.1-dev.jar

参考 WordsHighlighterTest .java 代码
package lucene;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.RAMDirectory;

import junit.framework.TestCase;

public class WordsHighlighterTest extends TestCase {
    private IndexReader reader;
    RAMDirectory ramDirectory;
    final private static String FIELD_NAME = "contents";
    final private static String queryString = "索引";

    String [] words = {
            "1:索引内容结构:Document,以及包含于Document的多个Field索",
            "2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
            "3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
            "4:索引创建速度的调整"
    };


    protected void setUp() throws Exception {
        ramDirectory = new RAMDirectory();
        IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
//        for (String s : words){
//            addDoc(indexWriter, s);
//        }
       
        for(int i=0;i<words.length;i++)
        {
            addDoc(indexWriter, words[i]);
        }
        indexWriter.optimize();
        indexWriter.close();
        reader = IndexReader.open(ramDirectory);
    }

    private void addDoc(IndexWriter indexWriter, String s) throws IOException {
        Document doc = new Document();
        doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
        indexWriter.addDocument(doc);
    }

    public void testSimpleWords() throws Exception {
        Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
        query = query.rewrite(reader);
        System.out.println("Searching for: " + query.toString(FIELD_NAME));
        Searcher searcher = new IndexSearcher(ramDirectory);
        Hits hits = searcher.search(query);
        Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=/"red/">", "</font>"), new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(20));
        Analyzer analyzer = new StandardAnalyzer();
        for (int i = 0; i < hits.length(); i++) {
            String text = hits.doc(i).get(FIELD_NAME);
            TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
            String result = highlighter.getBestFragments(tokenStream, text, 4, "");
            System.out.println("/t" + result);
        }
    }

    protected void tearDown() throws Exception {
        super.tearDown();
    }
}

显示结果为

Searching for: "索 引"
 4:<font color="red">索</font><font color="red">引</font>创建速度的调整
 3:<font color="red">索</font><font color="red">引</font>的写入IndexWriter,<font color="red">索</font><font color="red">引</font>的写入目标Directory,实现包括FsDirectory跟RamDirectory等
 1:<font color="red">索</font><font color="red">引</font>内容结构:Document,以及包含于Document的多个Field<font color="red">索</font>
 2:<font color="red">索</font><font color="red">引</font>内容优先性调整因子,boost(可对整个Document或Field指定

原创粉丝点击