lucene4.3简单创建和查询索引实例

来源:互联网 发布:js模拟get请求 编辑:程序博客网 时间:2024/05/22 13:26

1.创建索引实例代码

import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.List;import javax.swing.filechooser.FileFilter;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.LogDocMergePolicy;import org.apache.lucene.index.LogMergePolicy;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public class Indexer {private IndexWriter writer;private Analyzer analyzer;List<Document> documents = new ArrayList<Document>();public static void main(String[] args) {String dataDir = "E:/lucene/data";String indexDir = "E:/lucene/index";try {Indexer indexer = new Indexer(indexDir);indexer.index(dataDir, new TextFilesFilter());indexer.writer.commit();System.out.println(indexer.writer.numDocs());indexer.writer.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public Indexer(String indexDir) throws IOException{Directory dir = FSDirectory.open(new File(indexDir));analyzer = new IKAnalyzer();LogMergePolicy mergePolicy = new LogDocMergePolicy();// 索引基本配置// 设置segment添加文档(Document)时的合并频率// 值较小,建立索引的速度就较慢// 值较大,建立索引的速度就较快,>10适合批量建立索引mergePolicy.setMergeFactor(30);// 设置segment最大合并文档(Document)数// 值较小有利于追加索引的速度// 值较大,适合批量建立索引和更快的搜索mergePolicy.setMaxMergeDocs(5000);IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);//IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_43,new StandardAnalyzer(Version.LUCENE_43));indexWriterConfig.setMaxBufferedDocs(10000);indexWriterConfig.setMergePolicy(mergePolicy);indexWriterConfig.setRAMBufferSizeMB(64);// /设置索引的打开模式 创建或者添加索引indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);writer = new IndexWriter(dir, indexWriterConfig);}//将File信息写入documentprivate Document getDocument(File f) throws IOException{Document document = new Document();document.add(new StringField("name", f.getName(), Store.YES));document.add(new TextField("content", "我爱你中国", Store.YES));document.add(new StringField("fullpath", f.getCanonicalPath(),Store.YES));document.add(new StringField("updateTime", String.valueOf(f.lastModified()),Store.YES));return document;}private List<Document> getDocuments(File [] files, FileFilter filesFilter) throws IOException{for(File f : files){if(f.isDirectory()){getDocuments(f.listFiles(),filesFilter);}else{if(!f.isHidden() && f.canRead() && (filesFilter != null && filesFilter.accept(f))){documents.add(getDocument(f));}}}return documents;}//写入索引private void indexFile(File [] files, FileFilter filesFilter) throws IOException{List<Document> documents = getDocuments(files, filesFilter);writer.addDocuments(documents);}private void index(String dataDri, TextFilesFilter filesFilter){File [] files = new File(dataDri).listFiles();try {indexFile(files, new TextFilesFilter());} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}/** * 过滤器,只索引txt格式文件 * @author ANWJ * */private static class TextFilesFilter extends FileFilter{@Overridepublic boolean accept(File f) {// TODO Auto-generated method stubreturn f.getName().toLowerCase().endsWith(".txt");}@Overridepublic String getDescription() {// TODO Auto-generated method stubreturn null;}}}

2.检索索引实例代码

import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.SearcherFactory;import org.apache.lucene.search.SearcherManager;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public class Searcher {public static void search(String indexDir, String key) throws IOException, ParseException{Directory directory = FSDirectory.open(new File(indexDir));SearcherManager sm = new SearcherManager(directory,new  SearcherFactory());IndexSearcher searcher = sm.acquire();Analyzer  analyzer = new IKAnalyzer();QueryParser parser = new QueryParser(Version.LUCENE_43, "content", analyzer);Query query = parser.parse(key);TopDocs hits = searcher.search(query, 10);for(ScoreDoc doc : hits.scoreDocs){Document document = searcher.doc(doc.doc);System.out.println(document.get("content"));}}public static void main(String[] args) {String indexDir = "E:/lucene/index";String key = "中国";try {search(indexDir, key);} catch (IOException e) {e.printStackTrace();} catch (ParseException e) {e.printStackTrace();}}}