2.3.2删除索引中的文档例程
来源:互联网 发布:c语言求平均值函数 编辑:程序博客网 时间:2024/04/29 18:31
package lia.indexing;/** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */import junit.framework.TestCase;//import lia.common.TestUtil;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexReader;import org.apache.lucene.analysis.WhitespaceAnalyzer;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.index.Term;import java.io.IOException;// From chapter 2public class IndexingTest_bai extends TestCase {//[ [1, Netherland, Amsterdam has lots of bridges, Amsterdam],//[2, Italy, Venice has lots of canals, Venice] ] protected String[] ids = {"1", "2", "3"}; protected String[] unindexed = {"Netherlands", "Italy", "Deutschland"}; protected String[] unstored = {"Amsterdam has lots of bridges", "Venice has lots of canals", "Mia san Mia"}; protected String[] text = {"Amsterdam", "Venice", "Munchen"}; private Directory directory;//直接声明,不初始化 protected void setUp() throws Exception { //1 directory = new RAMDirectory();//内存Directory IndexWriter writer = getWriter(); //2 for (int i = 0; i < ids.length; i++) { //3 Document doc = new Document(); doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO)); doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); } private IndexWriter getWriter() throws IOException { // 2 return new IndexWriter(directory, new WhitespaceAnalyzer(), // 2 IndexWriter.MaxFieldLength.UNLIMITED); // 2 } protected int getHitCount(String fieldName, String searchString) throws IOException { IndexSearcher searcher = new IndexSearcher(directory); //4 //IndexSearcher的样子 System.out.println("IndexSearcher is:" + searcher.toString()); Term t = new Term(fieldName, searchString); Query query = new TermQuery(t); //5 //query的样子 System.out.println("Query is: " + query.toString()); //int hitCount = TestUtil.hitCount(searcher, query); //6这句依赖到common.TestUtil.java,删掉 TopDocs td = searcher.search(query, 1); int hitCount = td.totalHits;//返回匹配数量 //TopDocs的样子 System.out.println("TopDocs.toString is: " + td.toString()); System.out.println("TopDocs.scoreDocs is: " + td.scoreDocs); System.out.println("TopDocs.scoreDocs is: " + td.totalHits); searcher.close(); return hitCount; } public void testIndexWriter() throws IOException { IndexWriter writer = getWriter(); assertEquals(ids.length, writer.numDocs()); //7 writer.close(); } public void testIndexReader() throws IOException { IndexReader reader = IndexReader.open(directory); assertEquals(ids.length, reader.maxDoc()); //8 assertEquals(ids.length, reader.numDocs()); //8 reader.close(); } public void showIndexInfo() throws CorruptIndexException, IOException{ IndexReader reader = IndexReader.open(directory); System.out.println( "reader.numDocs() = " + reader.numDocs() ); System.out.println( "reader.maxDoc() = " + reader.maxDoc() ); System.out.println( "reader.getVersion() = " + reader.getVersion() ); for(int i = 0; i < reader.numDocs(); i++){ System.out.println( String.format("reader.numDocs(%d) = ",i) + reader.document(i).toString() ); } } /* #1 Run before every test #2 Create IndexWriter #3 Add documents #4 Create new searcher #5 Build simple single-term query #6 Get number of hits #7 Verify writer document count #8 Verify reader document count */ public void testDeleteBeforeOptimize() throws IOException { IndexWriter writer = getWriter(); //一个关于数量的断言 //assertEquals(2, writer.numDocs()); //A writer.deleteDocuments(new Term("id", "1")); //B writer.commit(); //数量相关的断言 //assertTrue(writer.hasDeletions()); //1 //assertEquals(2, writer.maxDoc()); //2 //assertEquals(1, writer.numDocs()); //2 writer.close(); } public void testDeleteAfterOptimize() throws IOException { IndexWriter writer = getWriter(); //一句断言 //assertEquals(2, writer.numDocs()); writer.deleteDocuments(new Term("id", "1")); writer.optimize(); //3 writer.commit(); //全是断言 //assertFalse(writer.hasDeletions()); //assertEquals(1, writer.maxDoc()); //C //assertEquals(1, writer.numDocs()); //C writer.close(); } public static void main(String args[]) throws Exception{ IndexingTest_bai it = new IndexingTest_bai(); it.setUp(); System.out.println(it.getHitCount("city", "Amsterdam")); it.showIndexInfo(); it.testDeleteBeforeOptimize(); //it.testDeleteAfterOptimize(); it.showIndexInfo(); } /* #A 2 docs in the index #B Delete first document #C 1 indexed document, 0 deleted documents #1 Index contains deletions #2 1 indexed document, 1 deleted document #3 Optimize compacts deletes */ public void testUpdate() throws IOException { assertEquals(1, getHitCount("city", "Amsterdam")); IndexWriter writer = getWriter(); Document doc = new Document(); //A doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); //A doc.add(new Field("country", "Netherlands", Field.Store.YES, Field.Index.NO)); //A doc.add(new Field("contents", "Den Haag has a lot of museums", Field.Store.NO, Field.Index.ANALYZED)); //A doc.add(new Field("city", "Den Haag", Field.Store.YES, Field.Index.ANALYZED)); //A writer.updateDocument(new Term("id", "1"), //B doc); //B writer.close(); assertEquals(0, getHitCount("city", "Amsterdam"));//C assertEquals(1, getHitCount("city", "Haag")); //D } }
</pre><pre name="code" class="java">
对于it.testDeleteBeforeOptimize();的运行结果
IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425356634346
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 3
reader.getVersion() = 1425356634347
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
对于it.testDeleteBeforeOptimize();的运行结果
IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425357077599
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 2
reader.getVersion() = 1425357077600
reader.numDocs(0) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(1) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
- 2.3.2删除索引中的文档例程
- 2.3.3 更新索引中的文档例程
- 2.3.1 向索引添加文档例程
- mapreduce文档倒排索引例程
- lucene中的删除索引
- 基本索引操作---添加删除更新文档
- lucene删除索引中的Document示例
- 删除设备例程
- web删除文件例程
- lucene-索引文档的删除、更新及增强加权
- Cocos2d-x v3.2 例程中的 SpriteBlur
- solr删除指定id文档,删除查询到是所有索引数据,删除所有数据
- Lotus中的管理员文档删除怎么办?
- 删除数据库中的复制存储冲突文档
- 怎样删除WORD文档中的所有超链接
- 删除整篇Word文档中的图文框
- 如何删除Word文档中的空行
- 使用UltraEdit中删除文档中的空行
- 虚拟主机配置
- 检测密码强度的js脚本(改编)
- 抓包工具之 Fiddler
- 如何获取 当前系统的 版本号?
- uva 11008 Antimatter Ray Clearcutting 状态压缩+记忆化搜索
- 2.3.2删除索引中的文档例程
- Primary Database Changes That Require Manual Intervention at a Physical Standby
- 或许被我们遗忘的JAVA Math类
- poj 1951 模拟(Krunch字串)
- 安装opencv,在vs2010配置opencv
- css背景色渐变详解
- UIKit框架-高级控件:7.UIPickerView的自定义
- javascript面向对象编程-简介(笔记1)
- 有点乱