javaee全文检索lucenc入门教程(一)

来源:互联网 发布:二手车众筹源码 编辑:程序博客网 时间:2024/04/29 20:02

第一步 

下载lucenc的jar包   此入门教程用的是lucene-3.5.0

第二步

建立普通的java项目

原理图

索引

索引库是一组文件的集合 

 Directory:索引库的位置,可以在本地磁盘,也可以在内存中,

Document::索引库的每一篇文档就是一个Field的集合

public class Document{:

Set<Field>field

}

IndexWriter: 操作索引库的增删改

下面的方法

Index()这个是建立索引的过程;

search() 这个方法是搜索的过程;

package org.itcast.lucenc;import java.io.File;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Fieldable;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class HelloLucenc {public void Index() {// 1.创建Directory//创建内存索引库  Directory directory = new RAMDirectory();// 2.创建IdexWriterIndexWriter indexWriter = null;try {Directory directory = FSDirectory.open(new File("d:/lucenc/index01"));IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));indexWriter = new IndexWriter(directory, iwc);// 3.创建Document对象Document doc = null;// 4.为DocumentFile f = new File("E:/lunces学习资料");for (File file : f.listFiles()) {doc = new Document();doc.add(new Field("content", new FileReader(file)));doc.add(new Field("filename", file.getName(), Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("path", file.getAbsolutePath(),Field.Store.YES, Field.Index.NOT_ANALYZED));indexWriter.addDocument(doc);}} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {indexWriter.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//}public void serarch() {try {// 1. 创建DirctoryDirectory directory = FSDirectory.open(new File("d:/lucenc/index01"));// 2.创建IndexReaderIndexReader reader = IndexReader.open(directory);// 3.根据indexReader创建IndexSearcherIndexSearcher searcher = new IndexSearcher(reader);// 4.创建搜索的QueryQueryParser parser = new QueryParser(Version.LUCENE_35, "content",new StandardAnalyzer(Version.LUCENE_35));Query query = parser.parse("Redis");// 5.根据seacher搜索并且返回TopDocsTopDocs tds = searcher.search(query, 10);// 6.根据TopDccs获取ScoreDoc对象ScoreDoc[] sds = tds.scoreDocs;for (ScoreDoc sd : sds) {// 7.根据seacher和ScordDoc对象获取具体的Documnet对象Document d = searcher.doc(sd.doc);// 8.根据Document对象获取所需要的值System.out.println(d.get("filename"));}// 9.关闭Readerreader.close();} catch (Exception e) {// TODO: handle exception}}}









原创粉丝点击