Lucene小练八(实现了索引和搜索)

来源:互联网 发布:研究人工智能的意义 编辑:程序博客网 时间:2024/06/07 10:03
//主类package Java.se.lucene;import java.io.File;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.StaleReaderException;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class index {private String[] ids={"1","2","3","4","5","6"};private String[] emails={"aa@aa.com","bb@bb.com","cc@cc.com","dd@dd.com","ee@ee.com","ff@ff.com"};private String[] contents={"i like  gdsfgfds","i like fsdfs","i like fdsfsd","i like fdsfsd","i like like fdfs","i like like like fsefsdfg"};private int[] attachs={1,2,3,4,5,6};private String[] names={"liwu","zhangsan","xiaoqinag","laona","dabao","lisi"};private Directory directory=null;private IndexWriter writer=null;private Date[] dates=null;private Map<String,Float> scores=new HashMap<String,Float>();private static IndexReader reader=null;public index(){setDate();//创建日期try {scores.put("aa.com", 2.0f);scores.put("bb.com", 1.0f);scores.put("cc.com", 3.0f);scores.put("dd.com", 4.0f);scores.put("ee.com", 5.0f);scores.put("ff.com", 6.0f);  // directory=new RAMDirectory();//从内存打开Directory    Index();//从硬盘打开Directory    directory=FSDirectory.open(new File("f:/lucene/Index04"));//reader=IndexReader.open(directory);} catch (Exception e) {e.printStackTrace();}}//建立索引public void Index(){Document document=null;         try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36)));writer.deleteAll();//更新索引for(int i=0;i<ids.length;i++){document=new Document();document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));document.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("content", contents[i], Field.Store.YES, Field.Index.ANALYZED));document.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));    //为数字添加索引document.add(new NumericField("attach", Field.Store.YES,true).setIntValue(attachs[i]));//为日期添加索引document.add(new NumericField("date", Field.Store.YES,true).setLongValue(dates[i].getTime()));//记住要getTime    String str=emails[i].substring(emails[i].lastIndexOf("@")+1);    System.out.println(str);        if(scores.containsKey(str))    {    document.setBoost(scores.get(str));    }else{    document.setBoost(0.5f);    }    writer.addDocument(document);    writer.commit();//提交writer}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{            try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}    //遍历各种视频 public void query(){try {IndexReader reader=IndexReader.open(directory);System.out.println("numdocs:"+reader.numDocs());//文档总数System.out.println("maxDocs:"+reader.maxDoc());//可存储文章做大数目System.out.println("detelemaxDocs:"+reader.numDeletedDocs());reader.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}} //用writer删除索引,但并没有完全删除,可以恢复的public void delete01(){try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36)));writer.deleteDocuments(new Term("id","1"));} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}//用reader来删除//使用reader进行恢复@SuppressWarnings("deprecation")public void undelete(){IndexReader reader = null;try {reader = IndexReader.open(directory,false);reader.undeleteAll();reader.close();} catch (StaleReaderException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}//清空回收站public void forceDelete(){try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36)));writer.forceMergeDeletes();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}    //已经停用    public void forceMerge()    {    try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36)));writer.forceMerge(3);} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}    }    //更新索引    public void update()    {    Document document=null;        try {writer=new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36,new StandardAnalyzer(Version.LUCENE_36)));/* * lucene没有提供更新,只能先删除再添加 *  */for(int i=0;i<ids.length;i++){document=new Document();document.add(new Field("id", "11", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS  ));document.add(new Field("email",emails[0],Field.Store.YES,Field.Index.ANALYZED));document.add(new Field("content", contents[0], Field.Store.NO, Field.Index.NOT_ANALYZED));document.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED));    writer.updateDocument(new Term("id","1"), document);}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{           try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}    }    //搜索     public void search01()     {    IndexReader reader=null;try { reader = IndexReader.open(directory);IndexSearcher searcher=new IndexSearcher(reader);TermQuery query=new TermQuery(new Term("content","like"));TopDocs tds =searcher.search(query, 10);for(ScoreDoc sdc:tds.scoreDocs){Document document=searcher.doc(sdc.doc);System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get("date"));}reader.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}    }        public void search02()     {    //IndexReader reader=null;try { //reader = IndexReader.open(directory);    IndexSearcher searcher=getSearcher();TermQuery query=new TermQuery(new Term("content","like"));TopDocs tds =searcher.search(query, 10);for(ScoreDoc sdc:tds.scoreDocs){Document document=searcher.doc(sdc.doc);System.out.println("("+sdc.doc+")"+document.get("name")+"["+document.get("email")+"]-->"+document.get("id")+"..."+document.get("attach")+"..."+document.get("date"));}        searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}    }    //创建日期    public void setDate()    {    SimpleDateFormat sdf=new SimpleDateFormat("yyyy-mm-kk");    try {    dates=new Date[ids.length];        dates[0]=sdf.parse("2010-08-17");        dates[1]=sdf.parse("2011-02-17");        dates[2]=sdf.parse("2012-03-17");        dates[3]=sdf.parse("2011-04-17");        dates[4]=sdf.parse("2012-05-17");        dates[5]=sdf.parse("2011-07-17");} catch (Exception e) {e.printStackTrace();// TODO: handle exception}    }    //创建Searcher    public IndexSearcher getSearcher()    {    try {reader=IndexReader.open(directory);} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}    return(new IndexSearcher(reader));     }     }//测试类package Java.se.lucene;import org.junit.Test;public class Test_Index {@Testpublic void test_index() //测试索引{index ind=new index();ind.Index();}@Testpublic void test_query() //遍历{index ind=new index();ind.query();}@Testpublic void test_delete01() //删除索引{index ind=new index();ind.delete01();}@Testpublic void test_undelete() //恢复删除{index ind=new index();ind.undelete();}@Testpublic void test_forceDelete() //清空回收站站{index ind=new index();ind.forceDelete();}@Testpublic void test_forceMerge() //清空回收站站{index ind=new index();ind.forceMerge();}@Testpublic void test_update() //更新索引{index ind=new index();ind.update();}@Testpublic void test_search01() //更新索引{index ind=new index();ind.search01();}@Testpublic void test_search02() //更新索引{index ind=new index();for(int i=0;i<5;i++){ ind.search02(); System.out.println("------------------------");  try {Thread.sleep(5000);} catch (InterruptedException e) {e.printStackTrace();} }}/*public void check() throws IOException{//检查索引是否被正确建立(打印索引)Directory directory = FSDirectory.open(new File("f:/lucene/Index04/"));//创建directory,其储存方式为在IndexReader reader = IndexReader.open(directory);for(int i = 0;i<reader.numDocs();i++){System.out.println(reader.document(i));}reader.close();}public static void main(String[] args) throws IOException {new index().check();}*/}