lucene 4.3.0 demo 解析

来源：互联网发布：嵌入式linux启动美化编辑：程序博客网时间：2024/06/07 03:23
package org.apache.lucene.demo;/*各位抱歉，还在学习中，自己也是初学。 * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *//* firefaith 2013-6-9 edited该demo 为文档建立索引，后文对索引进行搜索；该demo 运行前提01. 已正确配置lucene,jdk ,其中 javax.sevelet.jar jdk没有，从官网下载02. 在本地建立存放index结果的文件夹，和存放源数据的data文件夹，data文件夹内放需要检索的txt文件03. 编译环境为eclipse，需要从commandline 输入参数，故运行时选 run as configrationlucene 作为全文检索工具，大致流程如下：1. 为文档建立索引2. 搜索索引3. 反馈结果*/import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.IOException;import java.io.InputStreamReader;import java.util.Date;/** Index all text files under a directory. * <p> * This is a command-line application demonstrating simple Lucene indexing. * Run it with no command-line arguments for usage information. */public class IndexFiles {    private IndexFiles() {}  /** Index all text files under a directory. */  public static void main(String[] args) {    String usage = "java org.apache.lucene.demo.IndexFiles"                 + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"                 + "This indexes the documents in DOCS_PATH, creating a Lucene index"                 + "in INDEX_PATH that can be searched with SearchFiles";    //usage 作为报错信息，在commandline 输入参数不符合规范时，报出；从usage中也可看出index需要的参数，两个路径，一个update参数    String indexPath = "index"; // 默认路径，可以修改为具体路径，run as application 就可以    String docsPath = null;     boolean create = true; //即为update标记，update表示，需要update一个新的index，而不是create；默认情况总是create        //read from command line begin,每隔空格一读，    for(int i=0;i<args.length;i++) {      if ("-index".equals(args[i])) {        indexPath = args[i+1];        i++;      } else if ("-docs".equals(args[i])) {        docsPath = args[i+1];        i++;      } else if ("-update".equals(args[i])) {        create = false;      }    }   // read from command line end        // data文件路径未给，则报错    if (docsPath == null) {      System.err.println("Usage: " + usage);      System.exit(1);    }//data 文件不存在或不可读，报错    final File docDir = new File(docsPath);    if (!docDir.exists() || !docDir.canRead()) {      System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");      System.exit(1);    }    /*总流程通过IndexWriter writer的 addDocument(Iterable<? extends IndexableField> doc) 将doc添加入索引IndexWriter addDocument （doc）
└IndexWriter addDocuments(doc, analyzer); analyzer为IndexWriter内成员，在上一级方法中赋值 └IndexWriter updateDocuments(null, doc, analyzer);null 作为term参数，此处传入null    └DocumentsWriter  docWriter.updateDocument(doc, analyzer,null); docWriter 是 IndexWriter 的DocumentsWriter成员        └DocumentsWriter dwpt.updateDocument(doc, analyzer, delTerm=null); DocumentsWriterPerThread dwpt=flushControl.obtainAndLock().dwpt;flushControl 为DocumentsWriter的 DocumentsWriterFlushControl成员变量  
└DocumentsWriterPerThread updateDocument(doc, analyzer, delTerm=null){ docState.analyzer = analyzer; DocState是内部类， docState.doc = doc;consumer.processDocument(fieldInfos); DocConsumer成员，fieldInfos 是FieldInfos.Builder 类型，也是私有成员};而writer 由构造函数 IndexWriter(Directory d, IndexWriterConfig conf)生成 index 开始：indexDocs(writer, docDir); 由demo自己实现，见后文；1. 需要一个分析器，Analyzer*/ Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); //设置config,是update还是create if (create){ // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): //可选项，已被注释，对性能可调，不影响功能 // iwc.setRAMBufferSizeMB(256.0); ndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } /** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the* <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField);// Add the last modified date of the file a field named "modified".// Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on// year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }}
	
					
					   lucene 4.3.0 demo 解析
	  	   Lucene 6.1.0 入门Demo
	  	   Lucene Demo
	  	   Lucene demo 调试笔记
	  	   lucene的demo
	  	   运行lucene 的Demo
	  	   lucene简单入门demo
	  	   Lucene 3.6.1 demo
	  	   Lucene入门与Demo
	  	   lucene demo 入门配置
	  	   lucene测试demo
	  	   运行Lucene demo
	  	   lucene入门demo
	  	   Lucene 5.2.1 demo
	  	   Lucene中文分词Demo
	  	   Lucene初学Demo
	  	   Lucene 6.5 demo
	  	   Lucene 6.1 Demo
	     		  
	  	   Git在Merge时提示(no branch)差点丢失数据，后找回
	  	   WebService流行框架之Axis和CXF
	  	   MFC 多线程及线程同步
	  	   共享一段CMPP编程时候，移动网关返回CMPP_SUBMIT_RESP消息中，解析Msg_Id字段的代码
	  	   ScrollView中嵌套ListView造成ListView高度只显示一行解决办法
	  	   lucene 4.3.0 demo 解析
	  	   函数指针讲解
	  	   熟练使用spring Aop
	  	   PHP学习之输出字符串(echo,print,printf,print_r和var_dump)
	  	   代码的未来
	  	   MySQL- SHOW TABLE STATUS命令
	  	   int eth_init (bd_t * bd)
	  	   UIMenuController的UIWindow不是KeyWindow也会让UIMenuController不显示哦~
	  	   cmpp