2.3.2删除索引中的文档例程

来源:互联网 发布:c语言求平均值函数 编辑:程序博客网 时间:2024/04/29 18:31
package lia.indexing;/** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan      */import junit.framework.TestCase;//import lia.common.TestUtil;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexReader;import org.apache.lucene.analysis.WhitespaceAnalyzer;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.index.Term;import java.io.IOException;// From chapter 2public class IndexingTest_bai extends TestCase {//[ [1, Netherland, Amsterdam has lots of bridges, Amsterdam],//[2, Italy, Venice has lots of canals, Venice] ]  protected String[] ids = {"1", "2", "3"};  protected String[] unindexed = {"Netherlands", "Italy", "Deutschland"};  protected String[] unstored = {"Amsterdam has lots of bridges",                                 "Venice has lots of canals",                                 "Mia san Mia"};  protected String[] text = {"Amsterdam", "Venice", "Munchen"};  private Directory directory;//直接声明,不初始化  protected void setUp() throws Exception {     //1    directory = new RAMDirectory();//内存Directory    IndexWriter writer = getWriter();           //2    for (int i = 0; i < ids.length; i++) {      //3      Document doc = new Document();      doc.add(new Field("id", ids[i],                        Field.Store.YES,                        Field.Index.NOT_ANALYZED));      doc.add(new Field("country", unindexed[i],                        Field.Store.YES,                        Field.Index.NO));      doc.add(new Field("contents", unstored[i],                        Field.Store.NO,                        Field.Index.ANALYZED));      doc.add(new Field("city", text[i],                        Field.Store.YES,                        Field.Index.ANALYZED));      writer.addDocument(doc);    }    writer.close();  }  private IndexWriter getWriter() throws IOException {            // 2    return new IndexWriter(directory, new WhitespaceAnalyzer(),   // 2                           IndexWriter.MaxFieldLength.UNLIMITED); // 2  }  protected int getHitCount(String fieldName, String searchString)    throws IOException {    IndexSearcher searcher = new IndexSearcher(directory); //4        //IndexSearcher的样子    System.out.println("IndexSearcher is:" + searcher.toString());        Term t = new Term(fieldName, searchString);    Query query = new TermQuery(t);                        //5        //query的样子    System.out.println("Query is: " + query.toString());        //int hitCount = TestUtil.hitCount(searcher, query);     //6这句依赖到common.TestUtil.java,删掉    TopDocs td = searcher.search(query, 1);    int hitCount = td.totalHits;//返回匹配数量        //TopDocs的样子    System.out.println("TopDocs.toString is: " + td.toString());    System.out.println("TopDocs.scoreDocs is: " + td.scoreDocs);    System.out.println("TopDocs.scoreDocs is: " + td.totalHits);        searcher.close();    return hitCount;  }  public void testIndexWriter() throws IOException {    IndexWriter writer = getWriter();    assertEquals(ids.length, writer.numDocs());            //7    writer.close();  }  public void testIndexReader() throws IOException {    IndexReader reader = IndexReader.open(directory);    assertEquals(ids.length, reader.maxDoc());             //8    assertEquals(ids.length, reader.numDocs());            //8    reader.close();  }    public void showIndexInfo() throws CorruptIndexException, IOException{  IndexReader reader = IndexReader.open(directory);  System.out.println( "reader.numDocs() = " + reader.numDocs() );  System.out.println( "reader.maxDoc() = " + reader.maxDoc() );  System.out.println( "reader.getVersion() = " + reader.getVersion() );  for(int i = 0; i < reader.numDocs(); i++){  System.out.println( String.format("reader.numDocs(%d) = ",i) + reader.document(i).toString() );  }  }  /*    #1 Run before every test    #2 Create IndexWriter    #3 Add documents    #4 Create new searcher    #5 Build simple single-term query    #6 Get number of hits    #7 Verify writer document count    #8 Verify reader document count  */    public void testDeleteBeforeOptimize() throws IOException {    IndexWriter writer = getWriter();        //一个关于数量的断言    //assertEquals(2, writer.numDocs()); //A        writer.deleteDocuments(new Term("id", "1"));  //B    writer.commit();        //数量相关的断言    //assertTrue(writer.hasDeletions());    //1    //assertEquals(2, writer.maxDoc());    //2    //assertEquals(1, writer.numDocs());   //2           writer.close();  }  public void testDeleteAfterOptimize() throws IOException {    IndexWriter writer = getWriter();        //一句断言    //assertEquals(2, writer.numDocs());    writer.deleteDocuments(new Term("id", "1"));    writer.optimize();                //3    writer.commit();        //全是断言    //assertFalse(writer.hasDeletions());    //assertEquals(1, writer.maxDoc());  //C    //assertEquals(1, writer.numDocs()); //C            writer.close();  }    public static void main(String args[]) throws Exception{  IndexingTest_bai it = new IndexingTest_bai();  it.setUp();    System.out.println(it.getHitCount("city", "Amsterdam"));    it.showIndexInfo();    it.testDeleteBeforeOptimize();  //it.testDeleteAfterOptimize();    it.showIndexInfo();  }      /*    #A 2 docs in the index    #B Delete first document    #C 1 indexed document, 0 deleted documents    #1 Index contains deletions    #2 1 indexed document, 1 deleted document    #3 Optimize compacts deletes  */      public void testUpdate() throws IOException {    assertEquals(1, getHitCount("city", "Amsterdam"));    IndexWriter writer = getWriter();    Document doc = new Document();                   //A                doc.add(new Field("id", "1",                      Field.Store.YES,                      Field.Index.NOT_ANALYZED));    //A    doc.add(new Field("country", "Netherlands",                      Field.Store.YES,                      Field.Index.NO));              //A      doc.add(new Field("contents",                                          "Den Haag has a lot of museums",                      Field.Store.NO,                      Field.Index.ANALYZED));       //A    doc.add(new Field("city", "Den Haag",                      Field.Store.YES,                      Field.Index.ANALYZED));       //A    writer.updateDocument(new Term("id", "1"),       //B                          doc);                      //B    writer.close();    assertEquals(0, getHitCount("city", "Amsterdam"));//C       assertEquals(1, getHitCount("city", "Haag"));     //D    }      }
</pre><pre name="code" class="java">

对于it.testDeleteBeforeOptimize();的运行结果

IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425356634346
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 3
reader.getVersion() = 1425356634347
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>


对于it.testDeleteBeforeOptimize();的运行结果

IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425357077599
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 2
reader.getVersion() = 1425357077600
reader.numDocs(0) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(1) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>

0 0
原创粉丝点击