lucene基础

来源:互联网 发布:用ps做淘宝广告视频 编辑:程序博客网 时间:2024/05/17 07:41

Index:

import java.io.File;  import java.io.FileReader;  import java.io.Reader;  import java.util.Date;  import org.apache.lucene.analysis.Analyzer;  import org.apache.lucene.analysis.standard.StandardAnalyzer;  import org.apache.lucene.document.Document;  import org.apache.lucene.document.Field;  import org.apache.lucene.index.IndexWriter;  /**  * This class demonstrate the process of creating index with Lucene   * for text files  */  public class TxtFileIndex {      public static void main(String[] args) throws Exception{          //设置索引地址          File   indexDir = new File("D:\\luceneIndex");          //设置数据地址          File   dataDir  = new File("D:\\luceneData");          //建立分词          Analyzer luceneAnalyzer = new StandardAnalyzer();          //取得目录下所有Files          File[] dataFiles  = dataDir.listFiles();          //建立indexWrite  indexWrite主要作用是添加索引          IndexWriter indexWriter = new IndexWriter(indexDir,luceneAnalyzer,true);          //取得程序开启时间          long startTime = new Date().getTime();          //循环文件          for(int i = 0; i < dataFiles.length; i++){              //取出txt后缀的文档              if(dataFiles[i].isFile() && dataFiles[i].getName().endsWith(".txt")){                  System.out.println("Indexing file " + dataFiles[i].getCanonicalPath());                  //新建一个Document                  Document document = new Document();                  //读取数据                  Reader txtReader = new FileReader(dataFiles[i]);                  //Document添加path                  document.add(new Field("path", dataFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));                  //Document添加正文                  document.add(new Field("contents",txtReader));                  //添加索引                  indexWriter.addDocument(document);              }          }          indexWriter.optimize();          indexWriter.close();          long endTime = new Date().getTime();                    //输出程序所用时间          System.out.println("It takes " + (endTime - startTime)                              + " milliseconds to create index for the files in directory "                             + dataDir.getPath());              }  }  

Search:

import java.io.File;  //import org.apache.log4j.Logger;  import org.apache.lucene.document.Document;  import org.apache.lucene.index.Term;  import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.Hits;  import org.apache.lucene.search.IndexSearcher;  import org.apache.lucene.search.Query;import org.apache.lucene.search.TermQuery;  import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;  /**  * This class is used to demonstrate the   * process of searching on an existing   * Lucene index  *  */  public class TxtFileSearcher {      public static void main(String[] args) throws Exception{         // Logger logger=Logger.getLogger(TxtFileSearcher.class);          //要查询的词组          String queryStr = "456";          //索引地址          File indexDir = new File("D:\\luceneIndex");          testFuzzySearch("D:\\luceneIndex");        //取得索引字典         /*  FSDirectory directory = FSDirectory.getDirectory(indexDir,false);          //建立查询          IndexSearcher searcher = new IndexSearcher(directory);          //查询的索引地址是否存在          if(!indexDir.exists()){              System.out.println("The Lucene index is not exist");              return;          }          //建立term 查询docuemnt中contents中的内容(内容要转为大字)          Term term = new Term("contents",queryStr.toLowerCase());          //进行查询          TermQuery luceneQuery = new TermQuery(term);          //生成结果          Hits hits = searcher.search(luceneQuery);          for(int i = 0; i < hits.length(); i++){              //取得结果中的dowuemnt              Document document = hits.doc(i);              //取得返回的path属性              System.out.println("File: " + document.get("path"));          } */           }     public static void testFuzzySearch(String indexDirectory)throws Exception{         Directory dir = FSDirectory.getDirectory(indexDirectory,false);         IndexSearcher indexSearcher = new IndexSearcher(dir);         String[] searchWords = {"*12?", "456"};         Query query;         for(int i = 0; i < searchWords.length; i++){            query = new WildcardQuery(new Term("contents",searchWords[i]));            Hits results = indexSearcher.search(query);            System.out.println(results.length() + "search results for query " + searchWords[i]);         }      }  }  

这里是用通配符查询的。

0 0