lucene....highlight应用
来源:互联网 发布:网络割接流程视频 编辑:程序博客网 时间:2024/04/30 01:24
导入luncene contrib/ lucene-highlighter-2.0.1-dev.jar
参考 WordsHighlighterTest .java 代码
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.RAMDirectory;
import junit.framework.TestCase;
public class WordsHighlighterTest extends TestCase {
private IndexReader reader;
RAMDirectory ramDirectory;
final private static String FIELD_NAME = "contents";
final private static String queryString = "索引";
String [] words = {
"1:索引内容结构:Document,以及包含于Document的多个Field索",
"2:索引内容优先性调整因子,boost(可对整个Document或Field指定).",
"3:索引的写入IndexWriter,索引的写入目标Directory,实现包括FsDirectory跟RamDirectory等",
"4:索引创建速度的调整"
};
protected void setUp() throws Exception {
ramDirectory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(ramDirectory, new StandardAnalyzer(), true);
// for (String s : words){
// addDoc(indexWriter, s);
// }
for(int i=0;i<words.length;i++)
{
addDoc(indexWriter, words[i]);
}
indexWriter.optimize();
indexWriter.close();
reader = IndexReader.open(ramDirectory);
}
private void addDoc(IndexWriter indexWriter, String s) throws IOException {
Document doc = new Document();
doc.add(new Field(FIELD_NAME, s, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
indexWriter.addDocument(doc);
}
public void testSimpleWords() throws Exception {
Query query = new QueryParser(FIELD_NAME, new StandardAnalyzer()).parse(queryString);
query = query.rewrite(reader);
System.out.println("Searching for: " + query.toString(FIELD_NAME));
Searcher searcher = new IndexSearcher(ramDirectory);
Hits hits = searcher.search(query);
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=/"red/">", "</font>"), new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(20));
Analyzer analyzer = new StandardAnalyzer();
for (int i = 0; i < hits.length(); i++) {
String text = hits.doc(i).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
String result = highlighter.getBestFragments(tokenStream, text, 4, "");
System.out.println("/t" + result);
}
}
protected void tearDown() throws Exception {
super.tearDown();
}
}
显示结果为
Searching for: "索 引"
4:<font color="red">索</font><font color="red">引</font>创建速度的调整
3:<font color="red">索</font><font color="red">引</font>的写入IndexWriter,<font color="red">索</font><font color="red">引</font>的写入目标Directory,实现包括FsDirectory跟RamDirectory等
1:<font color="red">索</font><font color="red">引</font>内容结构:Document,以及包含于Document的多个Field<font color="red">索</font>
2:<font color="red">索</font><font color="red">引</font>内容优先性调整因子,boost(可对整个Document或Field指定
- lucene....highlight应用
- [转]Lucene 中文分词的 highlight 显示
- [转]Lucene 中文分词的 highlight 显示
- 关于Lucene中文分词的highlight显示
- Lucene中文分词的highlight显示
- 使用Lucene的highlight包高亮显示检索关键字
- Lucene应用
- lucene应用
- lucene应用
- lucene的首次应用
- Lucene学习笔记(应用)
- lucene 简单应用
- 初次应用lucene
- Lucene基本应用示例
- lucene全文检索应用
- lucene.net 应用资料
- Lucene的基本应用
- Lucene中的堆应用
- Visual Studio.NET中的团队开发(一)
- 在win32汇编中动态链接库的使用
- 如何查找 JAVA 内存溢出
- 九寨沟之旅
- 正则表达式
- lucene....highlight应用
- Debian How to 编译安装apache
- Test My Blog
- Debian How to 编译安装MYSQL
- GIS数据建库基本思想(上)
- 摄影_测试
- GIS数据建库基本思想(下)
- ^_^疼爱女友之36六计(搞笑版)
- 并查集 (Union-Find Sets)及其应用