初识Lucene6.6.0

来源:互联网 发布:ppt用什么软件 编辑:程序博客网 时间:2024/05/16 04:17

      Lucene是简单而功能强大的基于Java的搜索库。它可以用于任何应用程序的搜索功能。它是可扩展的,高性能的库用于索引和搜索几乎任何类型的文本。

项目中使用Lucene做业务菜单的搜索功能。客户可以通过输入业务菜单的部分文字,通过Lucene检索,查询到相符合的菜单目录进行业务操作。闲话不说,本人根据项目中Lucene的使用情况结合新版的(6.6)Lucene使用情况写了个DEMO用于学习。

首先是DEMO中Lucene使用的公共常量类。

/** * lucene常量类 * @author zhouyi * */public class LuceneConstants {public static final String CONTENTS = "contents" ;public static final String FILE_NAME = "filename";public static final String FILE_PATH = "filepath" ;public static final int MAX_SEARCH = 10 ; //搜索数目为10条}

  然后对需要索引的文件做类别区分,这里暂时只对TXT文件进行索引。

import java.io.File;import java.io.FileFilter;public class TextFileFilter implements FileFilter {    @Override    public boolean accept(File pathname) {       return pathname.getName().toLowerCase().endsWith(".txt");    }   }
      下面开始对需要检索的文件建立索引。注意:新版的Lucene使用了NIO2中一系列方式,摈弃了File等IO的方式。

import java.io.BufferedReader;import java.io.File;import java.io.FileFilter;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.nio.charset.StandardCharsets;import java.nio.file.FileVisitResult;import java.nio.file.Files;import java.nio.file.Path;import java.nio.file.Paths;import java.nio.file.SimpleFileVisitor;import java.nio.file.attribute.BasicFileAttributes;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import cn.zhouyi.demo.lucene.LuceneConstants;public class Indexer {//索引创建类private IndexWriter writer ;public Indexer(String indexDirectoryPath) throws IOException{//读取需要索引的文件到Lucene的目录类中,新版的Lucene只支持IO2中的Path类型的变量了。Directory indexDirectory = FSDirectory.open(Paths.get(indexDirectoryPath)) ;//创建分词器,这个分词器必须和IndexSearcher中的一致。Analyzer analyzer = new StandardAnalyzer() ;//新版的Lucene中索引创建类只接收IndexWriterConfig配置。IndexWriterConfig iwc = new IndexWriterConfig(analyzer);writer = new IndexWriter(indexDirectory, iwc);}public void close() throws CorruptIndexException, IOException{writer.close();}//给文件创建索引private void indexFile(Path path) throws IOException{//因为使用了Java7的try(),所以文件流的操作必须在try()中写完,否则会自动关闭流。try(InputStream stream = Files.newInputStream(path)){//建立Lucene文档Document document = new Document() ;Field contentField = new TextField(LuceneConstants.CONTENTS, new BufferedReader(new InputStreamReader(stream,StandardCharsets.UTF_8))) ;Field fileNameField = new StringField(LuceneConstants.FILE_NAME, path.getFileName().toString(), Field.Store.YES);Field filePathField = new StringField(LuceneConstants.FILE_PATH, path.toString(), Field.Store.YES); document.add(contentField);document.add(fileNameField);document.add(filePathField);System.out.println("Indexing "+path.toString());//写入文档到索引创建类中writer.addDocument(document) ;}}//遍历文件目录下的文件,给这些文件加索引public int createIndex(String docPath, FileFilter filter) throws IOException{Path path = Paths.get(docPath) ;if(!Files.isReadable(path)){System.out.println("Document Directory '"+path.toAbsolutePath()+ "'is not readable or is not exist");System.exit(1);}if(Files.isDirectory(path)){//NIO2中优雅地遍历文件Files.walkFileTree(path, new SimpleFileVisitor<Path>(){@Overridepublic FileVisitResult visitFile(Path file, BasicFileAttributes attrs){try{if(filter.accept(file.toFile())){indexFile(file) ;}}catch(IOException ex){ex.printStackTrace();}return FileVisitResult.CONTINUE; }}) ;}else{if(filter.accept(path.toFile())){indexFile(path) ;}}return writer.numDocs() ;}}

      上面给对应目录的文件创建好了分词索引后,下面开始读取索引进行搜索。

import java.io.IOException;import java.nio.file.Paths;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import cn.zhouyi.demo.lucene.LuceneConstants;public class Searcher {//索引搜索类private IndexSearcher indexSearcher ;//索引读取类private IndexReader reader ;//将用户的搜索条件封装成Lucene的query条件private QueryParser queryParser ;private Query query ;public Searcher(String indexDirectoryPath) throws IOException{//将索引文件读取到lucene的索引读取类中Directory directory = FSDirectory.open(Paths.get(indexDirectoryPath));reader = DirectoryReader.open(directory);//创建索引搜索类indexSearcher = new IndexSearcher(reader) ;//此处分词器需要和索引类中的一致Analyzer analyzer = new StandardAnalyzer();queryParser = new QueryParser(LuceneConstants.CONTENTS, analyzer);}//根据用户的搜索条件返回lucene搜索的文档public TopDocs search(String searchQuery) throws ParseException, IOException{query = queryParser.parse(searchQuery);return indexSearcher.search(query, LuceneConstants.MAX_SEARCH) ;}//根据文档的id获取文档,注scoreDoc=TopDocs.scoreDocs[i]public Document getDocument(ScoreDoc scoreDoc) throws IOException{return indexSearcher.doc(scoreDoc.doc);}public void close() throws IOException{reader.close();}}

      以上简单的索引类和搜索类已经写好了,下面写一个测试类来测试一下lucene的功能。

import java.io.IOException;import org.apache.lucene.document.Document;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import cn.zhouyi.demo.lucene.LuceneConstants;public class LuceneTester {String indexDir = "E:\\code\\lucence\\index" ;String dataDir = "E:\\code\\lucence\\docs" ;Indexer indexer ;Searcher searcher ;public static void main(String args[]){LuceneTester tester ;try{tester = new LuceneTester() ;tester.createIndex();tester.search("you");}catch(Exception ex){ex.printStackTrace();}}private void createIndex() throws IOException{indexer = new Indexer(indexDir) ;int numIndexed ;long startTime = System.currentTimeMillis() ;//numIndexed = indexer.createIndex(dataDir, new TextFileFilter()) ;//使用一下Java8的新特性来实现一下文件的筛选。numIndexed = indexer.createIndex(dataDir, (pathname)->{return pathname.getName().toLowerCase().endsWith(".txt");});long endTime = System.currentTimeMillis() ;indexer.close();System.out.println(numIndexed+" File indexed, time taken: "+(endTime-startTime)+" ms");}private void search(String searchQuery) throws IOException, ParseException{searcher = new Searcher(indexDir);long startTime = System.currentTimeMillis();    TopDocs hits = searcher.search(searchQuery);    long endTime = System.currentTimeMillis();       System.out.println(hits.totalHits+" documents found. Time :" + (endTime - startTime));    for(ScoreDoc scoreDoc : hits.scoreDocs) {       Document doc = searcher.getDocument(scoreDoc);          System.out.println("File: " + doc.get(LuceneConstants.FILE_PATH));    }    searcher.close();}}
      执行上面测试类得到的结果:

Indexing E:\code\lucence\docs\doc1.txt1 File indexed, time taken: 105 ms1 documents found. Time :24File: E:\code\lucence\docs\doc1.txt
      收工完毕。





原创粉丝点击