Lucene——IndexReader单例化

来源:互联网 发布:淘宝网地垫 编辑:程序博客网 时间:2024/06/03 18:00

IndexReader单例化来提高效率

如果Reader为空,初始化

如果Reader发生改变,关闭原来的reader,用openIfChange()重新赋值,

若不为空并且未发生改变,直接发返回Searcer

IndexReader.deleteDocuments()也可可以删除Document,reader.close()关闭的 时候自动提交

 

package org.itat.index;//创建索引import java.io.File;import java.io.IOException;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.StaleReaderException;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class IndexUtil {private String[] ids = { "1", "2", "3", "4", "5", "6" };private String[] emails = { "dd@@dd.org", "aa@jingtian.org","bb@jingtian.org", "cc@ytu.edu", "ee@ee.org", "ff@ff.org" };private String[] contents = { "I like soccer", "I like football","I like soccer and I like game", "I like book", "I like soccer","I like soccer" };private int[] attachs = { 3, 6, 8, 9, 5, 4, 5 };private String[] names = { "zhangsan", "lisi", "wangwu", "zhaoliu", "wuba","chenjiu" };private Directory directory = null;private Map<String, Float> scores = new HashMap<String, Float>();private Date[] dates = null;private static IndexReader reader = null;public IndexUtil() {try {scores.put("jingtian.org", 8f);scores.put("ytu.edu", 6f);setDate();directory = FSDirectory.open(new File("e:/lucene/index02"));reader = IndexReader.open(directory,false);} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public IndexSearcher getSearcher() {try {if (reader == null) {reader = IndexReader.open(directory);System.out.println("1");} else {IndexReader tr = IndexReader.openIfChanged(reader);if (tr != null){reader.close();reader = tr;}System.out.println("2");}System.out.println("3");return new IndexSearcher(reader);} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println("到这里了");return null;}public void setDate() {SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-DD"); // 时间转换,这个不会有点丢人try {dates = new Date[ids.length];dates[0] = sdf.parse("2010-08-17");dates[1] = sdf.parse("2011-02-17");dates[2] = sdf.parse("2012-03-17");dates[3] = sdf.parse("2011-04-17");dates[4] = sdf.parse("2012-05-17");dates[5] = sdf.parse("2011-07-17");} catch (Exception e) {e.printStackTrace();// TODO: handle exception}}public void query() {try {IndexReader reader = IndexReader.open(directory);System.out.println("numdocs:" + reader.numDocs());System.out.println("maxDocs:" + reader.maxDoc());System.out.println("detelemaxDocs:" + reader.numDeletedDocs());reader.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public void index() {IndexWriter writer = null;Document doc = null;try {// writer.deleteAll();writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));for (int i = 0; i < ids.length; i++) {doc = new Document();doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email", emails[i], Field.Store.YES,Field.Index.ANALYZED));doc.add(new Field("content", contents[i], Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name", names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));// 存储数字doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i]));// 存储日期doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);System.out.println(et);if (scores.containsKey(et)) {doc.setBoost(scores.get(et));} else {doc.setBoost(5f);}writer.addDocument(doc);}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}// 删除public void delete() {IndexWriter writer = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));// 参数是一个选项.可以是一个query,也可以是一个term term就是一个精确查找的值// 此时删除的文档并未完全删除,而是存储在回收站中,可以恢复的writer.deleteDocuments(new Term("id", "1"));} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}// 恢复public void undelete() {try {// 恢复时必须把reader的只读设为falseIndexReader reader = IndexReader.open(directory, false);reader.undeleteAll();reader.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// 清空回收站,强制优化public void forceDelete() {IndexWriter writer = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));writer.forceMergeDeletes();} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}// 删除,利用Reader执行删除public void delete02() {try {reader.deleteDocuments(new Term("id", "1"));reader.close();} catch (StaleReaderException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}// 更新索引public void update() {// lucene本身不支持更新// 通过删除索引然后再建立索引来更新IndexWriter writer = null;Document doc = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));doc = new Document();doc.add(new Field("id", "111", Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("emails", emails[0], Field.Store.YES,Field.Index.ANALYZED));doc.add(new Field("contents", contents[0], Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name", names[0], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));writer.updateDocument(new Term("id", "1"), doc);} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}public void search() {try {IndexReader reader = IndexReader.open(directory);IndexSearcher searcher = new IndexSearcher(reader);TermQuery query = new TermQuery(new Term("content", "like"));TopDocs tds = searcher.search(query, 10);for (ScoreDoc sd : tds.scoreDocs) {Document doc = searcher.doc(sd.doc);SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-DD");System.out.println("(" + sd.doc + "-" + doc.getBoost() + "-"+ sd.score + ")--------------" + doc.get("name") + "["+ doc.get("email") + "]--------------------"+ doc.get("id") + "," + doc.get("attach") + ","+ doc.get("date"));}reader.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blockete.printStackTrace();}}//单例化之后逇Readerpublic void search02() {try {IndexSearcher searcher = getSearcher();TermQuery query = new TermQuery(new Term("content", "like"));TopDocs tds = searcher.search(query, 10);for (ScoreDoc sd : tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("(" + sd.doc + "-" + doc.getBoost() + "-"+ sd.score + ")--------------" + doc.get("name") + "["+ doc.get("email") + "]--------------------"+ doc.get("id") + "," + doc.get("attach") + ","+ doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blockete.printStackTrace();}}}