Lucene5.3高级应用

来源:互联网 发布:linux mint 安装 wine 编辑:程序博客网 时间:2024/03/28 17:26

1.Lucence5.3.1高级应用

 来自 caicongyang  http://blog.csdn.net/caicongyang

2.代码

直接上代码,注释清晰!

package com.ccy.lucene.app;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.nio.file.Paths;import java.util.ArrayList;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.SortField.Type;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.Scorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.IOContext;import org.apache.lucene.store.RAMDirectory;import org.junit.Test;/** *  * <p>  * Title: IndexDao.java  * Package com.ccy.lucene.app  * </p> * <p> * Description: Lucene应用 * <p> * @author Tom.Cai * @created 2015-11-9 下午9:57:00  * @version V1.0  * */public class IndexDao {// 源文件String filePath1 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\小笑话_总统的房间 Room .txt";String filePath2 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\A股.txt";String filePath3 = "D:\\newWork\\lucene5.3\\luceneSourceFile\\IndexWriter addDocument's a javadoc .txt";// 索引文件String indexPath = "D:\\newWork\\lucene5.3\\luceneIndex";//分词器Analyzer analyzer = new SmartChineseAnalyzer();// 词库分词/** * 创建索引 * @throws Exception */@Testpublic void createIndex() throws Exception {FSDirectory fsDir = FSDirectory.open(Paths.get(indexPath));// 1.启动时读取原有磁盘索引文件Directory ramDir = new RAMDirectory(fsDir, new IOContext());IndexWriterConfig config = new IndexWriterConfig(analyzer);IndexWriter ramIndexWiter = new IndexWriter(ramDir, config);// 2. 添加 DocumentDocument doc1 = file2Document(filePath1);Document doc2 = file2Document(filePath2);Document doc3 = file2Document(filePath3);//内存索引添加DocumentramIndexWiter.addDocument(doc1);ramIndexWiter.addDocument(doc2);ramIndexWiter.addDocument(doc3);ramIndexWiter.close();// 2.退出时将内存索引保存到磁盘索引中IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer);IndexWriter fsIndexWriter = new IndexWriter(fsDir, indexConfig);fsIndexWriter.addIndexes(ramDir);fsIndexWriter.close();}/** * 根据关键字删除索引 * @param term */public void delete(Term term) {IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer);FSDirectory fsDir = null;IndexWriter indexWriter = null;try {fsDir = FSDirectory.open(Paths.get(indexPath));indexWriter = new IndexWriter(fsDir, indexConfig);indexWriter.deleteDocuments(term);} catch (Exception e) {throw new RuntimeException(e);} finally {try {indexWriter.close();} catch (Exception e) {e.printStackTrace();}}}/** * 根据关键字更新索引 *  * <pre> * indexWriter.deleteDocuments(term); * indexWriter.addDocument(doc); * </pre> *  * @param term * @param doc */public void update(Term term, Document doc) {IndexWriterConfig indexConfig = new IndexWriterConfig(analyzer);FSDirectory fsDir = null;IndexWriter indexWriter = null;try {fsDir = FSDirectory.open(Paths.get(indexPath));indexWriter = new IndexWriter(fsDir, indexConfig);indexWriter.updateDocument(term, doc);} catch (Exception e) {throw new RuntimeException(e);} finally {try {indexWriter.close();} catch (Exception e) {e.printStackTrace();}}}/** * 搜索 */@Testpublic void search() {IndexReader reader = null;try {reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));IndexSearcher searcher = new IndexSearcher(reader);//  排序Sort sort = new Sort();sort.setSort(new SortField("name", Type.SCORE)); // 默认为升序//lucene 5.x不建议使用filter,而使用BooleanQuery来代替//Filter filter = new TermFilter(new Term("title", "A"));// 2、搜索解析器QueryParser parser = new QueryParser("content", analyzer);Query query = parser.parse("房间");//lucene 4.x 此方法已经过时,不建议使用filter,而使用BooleanQuery来代替//TopDocs topDocs = searcher.search(query, filter, Integer.MAX_VALUE, sort); TopDocs topDocs = searcher.search(query,Integer.MAX_VALUE,sort);int recordCount = topDocs.totalHits;System.out.println("总共有【" + recordCount + "】条匹配结果");List<Document> recordList = new ArrayList<Document>();//准备高亮器Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");Scorer scorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, scorer);// 摘要Fragmenter fragmenter = new SimpleFragmenter(50);highlighter.setTextFragmenter(fragmenter);// 3取出当前页的数据int end = Math.min(10, topDocs.totalHits);//循环读出前10条for (int i = 0; i < end; i++) {ScoreDoc scoreDoc = topDocs.scoreDocs[i];int docSn = scoreDoc.doc; // 文档内部编号Document doc = searcher.doc(docSn); // 根据编号取出相应的文档// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 nullString hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));if (hc == null) {String content = doc.get("content");int endIndex = Math.min(50, content.length());hc = content.substring(0, endIndex);// 最多前50个字符}doc.removeField("content");doc.add(new Field("content", hc, TextField.TYPE_STORED));recordList.add(doc);}for (Document document : recordList) {System.out.println("------------------------------");System.out.println("name     = " + document.get("name"));System.out.println("content  = " + document.get("content"));System.out.println("size     = " + document.get("size"));System.out.println("path     = " + document.get("path"));}} catch (Exception e) {e.printStackTrace();}}/** * //1.将文本转化为doc对象 *  * @param path * @return */public Document file2Document(String path) {File file = new File(path);Document doc = new Document();doc.add(new Field("name", file.getName(), StringField.TYPE_STORED));doc.add(new Field("content", readFileContent(file), TextField.TYPE_STORED));doc.add(new LongField("size", file.length(), LongField.TYPE_STORED));doc.add(new Field("path", file.getAbsolutePath(), StringField.TYPE_STORED));return doc;}/** * 读取文件内容 */public static String readFileContent(File file) {try {BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));StringBuffer content = new StringBuffer();for (String line = null; (line = reader.readLine()) != null;) {content.append(line).append("\n");}return content.toString();} catch (Exception e) {throw new RuntimeException(e);}}}

3.欢迎指正

我的博客:http://blog.csdn.net/caicongyang




1 0