lucene(二) 索引的创建、增删改查

来源:互联网 发布:万能数据恢复破解补丁 编辑:程序博客网 时间:2024/06/18 12:39

一、索引的整个知识架构


二、例子一:创建对多个文件的索引并查询

import java.io.File;import java.io.FileReader;import java.io.IOException;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class HelloLucene {public static void main(String[]args){HelloLucene helloLucene=new HelloLucene();helloLucene.index();helloLucene.search();}/** 建立文件索引 * @author * @param * @return */public void index(){IndexWriter writer=null;//1、创建Directory//Directory directory=new RAMDirectory();//建立在内存中的try {Directory directory=FSDirectory.open(new File("d:/index"));//建在硬盘上的//2、创建IndexWriterwriter=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));//3、创建Document对象Document doc=null;//4、为Document添加FieldFile f=new File("d:/TestLucene");for(File file:f.listFiles()){doc=new Document();doc.add(new Field("content", new FileReader(file)));doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));writer.addDocument(doc);}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{if(writer!=null)try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}/** 查询 * @author * @param * @return */public void search(){try{//1、创建DirectoryDirectory directory=FSDirectory.open(new File("d:/index"));        //2、创建IndexReader    IndexReader reader=IndexReader.open(directory);    //3、根据IndexReader创建IndexSearcher    IndexSearcher searcher=new IndexSearcher(reader);    //4、创建搜索的Query    QueryParser parser=new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));Query query=parser.parse("奥运");//查询content字段内容为"奥运"的文件//5、根据seacher搜索并且返回TopDocsTopDocs tds=searcher.search(query, 10);//6、根据TopDocs获取ScoreDoc对象ScoreDoc[]sds=tds.scoreDocs;for(ScoreDoc sd:sds){//7、根据sercher和ScoreDoc对象获取具体的Document对象Document d=searcher.doc(sd.doc);//8、根据Document对象获取需要的值System.out.println(d.get("filename")+"["+d.get("path")+"]");}//9、关闭readerreader.close();}catch(CorruptIndexException e){e.printStackTrace();}catch (IOException e) {e.printStackTrace();}catch (ParseException e) {e.printStackTrace();}}}

三、域的说明、使用luke打开创建的索引二进制文件

1、域Field的说明


2、使用luke分析索引文件

创建好索引之后在硬盘中找到如下所示:


使用luke简单看一下各个文件是干什么的:


luke还有其他的功能,这里就不一一介绍了~

四、例子二:索引的增删改查

import java.io.File;import java.io.IOException;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.StaleReaderException;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class IndexUtil {private String[] ids = {"1","2","3","4","5","6"};private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};private String[] contents = {"welcome to visited the space,I like book","hello boy, I like pingpeng ball","my name is cc I like game","I like football", "I like football and I like basketball too","I like movie and swim"};private int[] attachs = {2,3,1,4,5,5};//附件private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};private Map<String,Float> scores = new HashMap<String,Float>();//存储权值private Directory directory=null;public static void main(String[]args){IndexUtil iUtil=new IndexUtil();iUtil.index();iUtil.search();}/** 构造函数 * @author  * @param * @return */public IndexUtil(){scores.put("itat.org",2.0f);scores.put("zttc.edu", 1.5f);try {//1、创建索引存在的地方directory=FSDirectory.open(new File("d:/index"));} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}/** 创建索引 * @author  * @param * @return */public void index(){//2、创建索引IndexWriter writer=null;try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));    //3、创建文档并建立索引(文档相当于二维表中的每一条记录,域相当于表的字段,所以整个索引可以理解为一个二维表)Document doc=null;for(int i=0;i<ids.length;i++){doc = new Document();doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));String et = emails[i].substring(emails[i].lastIndexOf("@")+1);System.out.println(et);//对文档进行加权处理//if(scores.containsKey(et)) {//doc.setBoost(scores.get(et));//} else {//doc.setBoost(0.5f);//}writer.addDocument(doc);}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{if(writer!=null)try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}/** 查询索引基本信息 * @author  * @param * @return */ public void query() {try {IndexReader reader = IndexReader.open(directory);//通过reader可以有效的获取到文档的数量System.out.println("numDocs:"+reader.numDocs());System.out.println("maxDocs:"+reader.maxDoc());System.out.println("deleteDocs:"+reader.numDeletedDocs());reader.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** 使用TermQuery具体查询 * @author  * @param * @return */ public void search() {try {IndexReader reader = IndexReader.open(directory);IndexSearcher searcher = new IndexSearcher(reader);TermQuery query = new TermQuery(new Term("content","like"));TopDocs tds = searcher.search(query, 10);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+"-"+doc.getBoost()+"-"+sd.score+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id"));}reader.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** 删除索引 * @author  * @param * @return */ public void delete() {IndexWriter writer = null;try {writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));//参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值//此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复writer.deleteDocuments(new Term("id","1"));writer.commit();} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {if(writer!=null) writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}/** 恢复删除的索引 * @author  * @param * @return */ public void undelete() {//使用IndexReader进行恢复try {IndexReader reader = IndexReader.open(directory,false);//恢复时,必须把IndexReader的只读(readOnly)设置为falsereader.undeleteAll();reader.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (StaleReaderException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** 强制删除回收站中的索引 * @author  * @param * @return */ public void forceDelete() {IndexWriter writer = null;try {writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));writer.forceMergeDeletes();} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {if(writer!=null) writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}/** 索引的更新 * @author  * @param * @return */ public void update() {IndexWriter writer = null;try {writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));/* * Lucene并没有提供更新,这里的更新操作其实是如下两个操作的合集 * 先删除之后再添加 */Document doc = new Document();doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));writer.updateDocument(new Term("id","1"), doc);} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {if(writer!=null) writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}}





0 0
原创粉丝点击