Lucene 建立索引和搜索

来源：互联网发布：双目视觉避障算法选取编辑：程序博客网时间：2024/05/17 08:07

package testLucene;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/**
* This class demonstrate the process of creating index with Lucene
* for text files
*/
public class TxtFileIndexer {
public static void main(String[] args) throws Exception{
  //建立文件对象，参数为索引存放路径
        File   indexDir = new File("D://luceneIndex");
        //建立文件对象，参数为要索引的源文件所在路径
        File   dataDir = new File("D://luceneData");
        //建立分析器对象
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        //建立文件对象数组dataFiles，存放源文件；文件File的方法：File.listFile()
        File[] dataFiles = dataDir.listFiles();
        //建立索引书写器对象，参数为索引存放路径，分析器，是否重写索引
        IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);
        //产生时间对象，记录开始时间
        long startTime = new Date().getTime();
        //文件数组长度
        for(int i = 0; i < dataFiles.length; i++){
        //如果dataFiles[i]是文件对象，且扩展名为.txt
        if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){
          //getCanonicalPath()获取规范路径
          System.out.println("索引文件 " + dataFiles[i].getCanonicalPath());
          //建立文档对象document
          Document document = new Document();
          //建立文件读取器FileReader，参数为文件对象
          Reader txtReader = new FileReader(dataFiles[i]);
          //向文档中添加字段，字段参数为字段名和字段内容
          document.add(Field.Keyword("path",dataFiles[i].getCanonicalPath()));
          document.add(Field.Text("contents",txtReader));
          //索引书写器调用方法addDocument（document）对文档建立索引
          indexWriter.addDocument(document);
        }
        }
        //索引书写器调用方法optimize()优化索引
        indexWriter.optimize();
        //关闭索引书写器
        indexWriter.close();
        //记录结束时间
        long endTime = new Date().getTime();
        //方法getPath()获取文件路径
        System.out.println("用时 " + (endTime - startTime)
                           + " milliseconds to create index for the files in directory "
                     + dataDir.getPath());
}
}
----------------------------------------------------------

package testLucene;
import java.io.File;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
/**
* This class is used to demonstrate the
* process of searching on an existing
* Lucene index
*
*/
public class TxtFileSearcher {
public static void main(String[] args) throws Exception{
  //设置查询字符串
     String queryStr = "files";
     //建立文件对象，参数为索引所在路径
        File indexDir = new File("D://luceneIndex");
        //建立目录对象，参数为文件对象
        FSDirectory directory = FSDirectory.getDirectory(indexDir,false);
        //建立索引搜索器，参数为目录对象，指出索引所在位置，在这个目录中的索引文件上搜索
        IndexSearcher searcher = new IndexSearcher(directory);
        //如果指出索引路径的文件对象不存在，说明没有索引
        if(!indexDir.exists()){
        System.out.println("The Lucene index is not exist");
        return;
        }
        //产生词条Term对象，参数为字段名，字段中所包含的某个关键字
        Term term = new Term("contents",queryStr.toLowerCase());
        //产生查询对象，参数为词条对象
        TermQuery luceneQuery = new TermQuery(term);
        //用索引搜索器进行搜索，参数为查询对象，结果赋给Hits对象
        Hits hits = searcher.search(luceneQuery);
        //hits.length()结果数目
        for(int i = 0; i < hits.length(); i++){
        //hits.doc(i)，返回结果中的第i个文档对象
        Document document = hits.doc(i);
        //document.get()获取字段的值，在此为字段路径的值
        System.out.println("查询字符串所在文件: " + document.get("path"));
        }
}
}