Lucene5.0索引的创建与搜索

来源:互联网 发布:淘宝自助开通怎么弄 编辑:程序博客网 时间:2024/04/27 15:23




package daniel.work;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.nio.charset.StandardCharsets;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.TextField;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexCommit;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.RAMDirectory;/** * Lucene5.0版本 创建IndexWriter、IndexReader时不再需要指定版本 同时 在底层也更改了生成索引的方式 * 如果需要读取之前版本创建的索引 必须引入lucene-backward-codecs-5.0.0.jar包  * @author Daniel Chiu * */public class HelloLucene{private static Directory directory;private IndexWriter writer;private IndexReader reader;static{try{// 读取硬盘上的索引信息directory = FSDirectory.open(Paths.get("d://lucene"));// 读取内存中的索引信息 因为是在内存中 所以不需要指定索引文件夹// directory = new RAMDirectory();} catch (IOException e){e.printStackTrace();}}/** * 将Directory做成静态对象 便于获取 *  * @return */public static Directory getDirectory(){return directory;}/** * 获取IndexWriter对象 *  * @return */public IndexWriter getWriter(OpenMode createOrAppend){if (writer != null)return writer;Analyzer analyzer = new StandardAnalyzer();IndexWriterConfig conf = new IndexWriterConfig(analyzer);if (createOrAppend == null)// 默认策略为新建索引conf.setOpenMode(OpenMode.CREATE);elseconf.setOpenMode(createOrAppend);try{writer = new IndexWriter(directory, conf);return writer;} catch (IOException e){e.printStackTrace();}return null;}public IndexReader getIndexReader(){try{DirectoryReader newReader = null;// 判断reader是否为空 若为空就创建一个新的readerif (reader == null)reader = DirectoryReader.open(directory);else// 若不为空 查看索引文件是否发生改变 如果发生改变就重新创建readernewReader = DirectoryReader.openIfChanged((DirectoryReader) reader);if (newReader != null)reader = newReader;return reader;} catch (IOException e){e.printStackTrace();}return null;}/** * 获取IndexSearcher对象 *  * @return */public IndexSearcher getIndexSearcher(){return new IndexSearcher(getIndexReader());}/** * 创建索引 有几个概念需要理解 1. Directory类似于数据库中的表 2. Document类似于数据库的一条记录 3. Field类似于数据库中一条记录的某一列 */public void index(){Document document = null;writer = getWriter(OpenMode.CREATE);// 设置需要被索引文件的文件夹File file = new File("d://test");// 遍历需要被索引的文件夹for (File f : file.listFiles()){document = new Document();try{/** * 自Lucene4开始 创建field对象使用不同的类型 只需要指定是否需要保存源数据 不需指定分词类别  * 之前版本的写法如下  * doc.Add(new Field("id", item.id.ToString(), Field.Store.YES, Field.Index.ANALYZED)); */Field field = new StringField("fileName", f.getName(), Store.YES);document.add(field);Field field2 = new LongField("fileSize", f.length(), Store.NO);document.add(field2);Field field3 = new LongField("fileLastModified", f.lastModified(), Store.NO);document.add(field3);Field field4 = new TextField("content", new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8)));document.add(field4);if (writer.getConfig().getOpenMode() == OpenMode.CREATE){System.out.println("adding " + f);writer.addDocument(document);} else{System.out.println("updating " + f);writer.updateDocument(new Term("path", f.toString()), document);}} catch (FileNotFoundException e){e.printStackTrace();} catch (IOException e){e.printStackTrace();}}try{// 如果不是时常创建索引 一定要记得关闭writer 当然也可以将writer设计成单例的if (writer != null)writer.close();} catch (IOException e){e.printStackTrace();}}public void search(String queryStr, int num){// 默认搜索content域 使用标准分词器QueryParser parser = new QueryParser("content", new StandardAnalyzer());IndexSearcher searcher = getIndexSearcher();try{Query query = parser.parse(queryStr);TopDocs docs = searcher.search(query, num);System.out.println("一共搜索到结果:" + docs.totalHits + "条");for (ScoreDoc scoreDoc : docs.scoreDocs){System.out.print("序号为:" + scoreDoc.doc);System.out.print(" 评分为:" + scoreDoc.score);Document document = searcher.doc(scoreDoc.doc);System.out.print(" 文件名:" + document.get("fileName"));System.out.print(" 内容为:" + document.get("content"));System.out.print(" 文件大小:" + document.get("fileSize"));System.out.print(" 文件日期:" + document.get("fileLastModified"));System.out.println();}} catch (ParseException e){e.printStackTrace();} catch (IOException e){e.printStackTrace();}}}




0 0
原创粉丝点击