Lucene 5.2.1 demo

来源:互联网 发布:百度数据库在哪里 编辑:程序博客网 时间:2024/05/18 22:11

原文http://www.lucenetutorial.com/sample-apps/textfileindexer-java.html

是4.0的改了几个地方,5.2.1可以用

主要是FSDirectory的open方法从open(File path)改成了open(Path path)。

package TextFileIndexer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopScoreDocCollector;import org.apache.lucene.store.FSDirectory;import java.io.*;import java.nio.file.*;import java.util.ArrayList;/** * This terminal application creates an Apache Lucene index in a folder and adds files into this index * based on the input of the user. */public class TextFileIndexer {  private static StandardAnalyzer analyzer = new StandardAnalyzer();  private IndexWriter writer;  private ArrayList<File> queue = new ArrayList<File>();  public static void main(String[] args) throws IOException {    System.out.println("Enter the path where the index will be created: (e.g. /tmp/index or c:\\temp\\index)");    String indexLocation = null;    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));    String s = br.readLine();    TextFileIndexer indexer = null;    try {      indexLocation = s;      indexer = new TextFileIndexer(s);    } catch (Exception ex) {      System.out.println("Cannot create index..." + ex.getMessage());      System.exit(-1);    }    //===================================================    //read input from user until he enters q for quit    //===================================================    while (!s.equalsIgnoreCase("q")) {      try {        System.out.println("Enter the full path to add into the index (q=quit): (e.g. /home/ron/mydir or c:\\Users\\ron\\mydir)");        System.out.println("[Acceptable file types: .xml, .html, .html, .txt]");        s = br.readLine();        if (s.equalsIgnoreCase("q")) {          break;        }        //try to add file into the index        indexer.indexFileOrDirectory(s);      } catch (Exception e) {        System.out.println("Error indexing " + s + " : " + e.getMessage());      }    }    //===================================================    //after adding, we always have to call the    //closeIndex, otherwise the index is not created        //===================================================    indexer.closeIndex();    //=========================================================    // Now search    //=========================================================        Path fileIndexLocation = FileSystems.getDefault().getPath(indexLocation);        IndexReader reader = DirectoryReader.open(FSDirectory.open(fileIndexLocation));    IndexSearcher searcher = new IndexSearcher(reader);//    TopScoreDocCollector collector = TopScoreDocCollector.create(5);    s = "";    while (!s.equalsIgnoreCase("q")) {      try {        System.out.println("Enter the search query (q=quit):");        s = br.readLine();        if (s.equalsIgnoreCase("q")) {          break;        }        Query q = new QueryParser("contents", analyzer).parse(s);        TopScoreDocCollector collector = TopScoreDocCollector.create(5);        searcher.search(q, collector);        ScoreDoc[] hits = collector.topDocs().scoreDocs;        // 4. display results        System.out.println("Found " + hits.length + " hits.");        for(int i=0;i<hits.length;++i) {          int docId = hits[i].doc;          Document d = searcher.doc(docId);          System.out.println((i + 1) + ". " + d.get("path") + " score=" + hits[i].score);        }      } catch (Exception e) {        System.out.println("Error searching " + s + " : " + e.getMessage());      }    }  }  /**   * Constructor   * @param indexDir the name of the folder in which the index should be created   * @throws java.io.IOException when exception creating index.   */  TextFileIndexer(String indexDir) throws IOException {    // the boolean true parameter means to create a new index everytime,     // potentially overwriting any existing files there.  Path dirIndexLocation = FileSystems.getDefault().getPath(indexDir);    FSDirectory dir = FSDirectory.open(dirIndexLocation);    IndexWriterConfig config = new IndexWriterConfig(analyzer);    writer = new IndexWriter(dir, config);  }  /**   * Indexes a file or directory   * @param fileName the name of a text file or a folder we wish to add to the index   * @throws java.io.IOException when exception   */  public void indexFileOrDirectory(String fileName) throws IOException {    //===================================================    //gets the list of files in a folder (if user has submitted    //the name of a folder) or gets a single file name (is user    //has submitted only the file name)     //===================================================    addFiles(new File(fileName));        int originalNumDocs = writer.numDocs();    for (File f : queue) {      FileReader fr = null;      try {        Document doc = new Document();        //===================================================        // add contents of file        //===================================================        fr = new FileReader(f);        doc.add(new TextField("contents", fr));        doc.add(new StringField("path", f.getPath(), Field.Store.YES));        doc.add(new StringField("filename", f.getName(), Field.Store.YES));        writer.addDocument(doc);        System.out.println("Added: " + f);      } catch (Exception e) {        System.out.println("Could not add: " + f);      } finally {        fr.close();      }    }        int newNumDocs = writer.numDocs();    System.out.println("");    System.out.println("************************");    System.out.println((newNumDocs - originalNumDocs) + " documents added.");    System.out.println("************************");    queue.clear();  }  private void addFiles(File file) {    if (!file.exists()) {      System.out.println(file + " does not exist.");    }    if (file.isDirectory()) {      for (File f : file.listFiles()) {        addFiles(f);      }    } else {      String filename = file.getName().toLowerCase();      //===================================================      // Only index text files      //===================================================      if (filename.endsWith(".htm") || filename.endsWith(".html") ||               filename.endsWith(".xml") || filename.endsWith(".txt")) {        queue.add(file);      } else {        System.out.println("Skipped " + filename);      }    }  }  /**   * Close the index.   * @throws java.io.IOException when exception closing   */  public void closeIndex() throws IOException {    writer.close();  }}


0 0
原创粉丝点击