Lucene索引,查询及高亮显示

来源:互联网 发布:淘宝大v认证运营故事 编辑:程序博客网 时间:2024/04/27 13:59

本文用lucene实现了简单的建立索引库和查询索引的过程, 并分别对中文和英文进行索引查询,对结果指定列进行高亮显示。具体实现代码如下:


package com.lucene;


import java.io.File;
import java.io.StringReader;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


/**
 * @author Administrator
 *
 */
public class IndexSearch {
public static void main(String[] args) {
try {
// 索引中写入数据
// addDocToIndexWriter();
// 英文搜索
// englishSearcher();
// 中文分词搜索
chineseSearcher(new IKAnalyzer());
} catch (Exception e) {
e.printStackTrace();
}
}


/**
* 向索引中写入数据

* @throws Exception
*/
public static void addDocToIndexWriter() throws Exception {
// 建立倒排索引
Directory directory = FSDirectory.open(new File("index"));
// 索引写入器的配置信息(英文分词)
// IndexWriterConfig config = new
// IndexWriterConfig(Version.LUCENE_4_10_4, new StandardAnalyzer());
// 中文分词
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_4, new IKAnalyzer());
// 初始化索引写入器
IndexWriter writer = new IndexWriter(directory, config);
// 构造文档对象
Document doc = new Document();
// id title name price
doc.add(new IntField("id", 1, Store.YES));
doc.add(new TextField("title", "big apple", Store.YES));
doc.add(new TextField("name", "这是一个大苹果", Store.YES));
doc.add(new IntField("price", 4999, Store.YES));
writer.addDocument(doc);
writer.close();
}


/**
* 搜索数据(英文分词)

* @throws Exception
*/
public static void englishSearcher() throws Exception {
// 初始化搜索目录
Directory directory = FSDirectory.open(new File("index"));
// 索引读取器
IndexReader reader = DirectoryReader.open(directory);
// 索引搜索器
IndexSearcher searcher = new IndexSearcher(reader);
// 搜索条件
Query query = new TermQuery(new Term("title", "apple"));
TopDocs docs = searcher.search(query, 10);
for (ScoreDoc scoreDocs : docs.scoreDocs) {
int id = scoreDocs.doc;
Document doc = searcher.doc(id);
System.out.println(doc.get("title"));
}
}


/**
* 根据中文分词搜索

* @throws Exception
*/
public static void chineseSearcher(Analyzer analyzer) throws Exception {
// 初始化搜索目录
Directory directory = FSDirectory.open(new File("index"));
// 索引读取器
IndexReader reader = DirectoryReader.open(directory);
// 索引搜索器
IndexSearcher searcher = new IndexSearcher(reader);
// 搜索条件
// Query query = new TermQuery(new Term("name", "苹果"));
// 中文分词需先对内容进行分词解析
QueryParser parser = new QueryParser("name", analyzer);
Query query = parser.parse("apple 一个苹果");
TopDocs docs = searcher.search(query, 10);
// 设置高亮显示
Formatter formatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
for (ScoreDoc scoreDocs : docs.scoreDocs) {
int id = scoreDocs.doc;
Document doc = searcher.doc(id);
String value = doc.get("title");
TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(value));
System.out.println(doc.get("id"));
System.out.println(highlighter.getBestFragment(tokenStream, value));
System.out.println(doc.get("name"));
System.out.println(doc.get("price"));
}
}
}

0 0
原创粉丝点击