Lucene3.6 Example和一些Tips

来源:互联网 发布:数据挖掘导论 微盘 编辑:程序博客网 时间:2024/06/16 07:05

1 IndexWriterConfig的设置

代码如下所示:
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);IndexWriter indexWriter = new IndexWriter(indexDir, conf);


在Lucene3.X版本中,与前几个版本的不同的地方包括了IndexWriter实例的初始化,其中需要用到IndexWriterConfig这个类

在Lucene的API中可以看到目前IndexWriter类最新的构造函数是最后一种,需要用到IndexWriterConfig这个类。其中需要设置OpenMode属性:

conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

Note:这行代码设置了存放索引的文件夹将以覆盖或者新建的方式建立。如果没有这样设置,并且在原索引文件夹中索引没有被删除的情况下,新的索引文件将会append到原来索引文件之后,这样会导致索引结果发生错误。

以下是没有设置OpenMode的结果截图:


First time:


Second time:


Third time:


由以上过程可以发现,数据是不断变大的,而且Score是越来越高的。因为索引不断增多,导致搜索的时候结果集变大。

IndexWriterConfig.OpenMode这个属性包括三个值,用户可以根据自己的需求进行选择。

Enum Constant SummaryAPPEND 
           CREATE 
           CREATE_OR_APPEND 

2 建立Index代码:

/* * SearchEngine.java * * Created on 31 July 2012, 14:52 * */package lucene.demo.search;import java.io.File;import java.io.IOException;import java.io.StringReader;import org.apache.lucene.document.Field;import org.apache.lucene.document.Document;import lucene.demo.business.Hotel;import lucene.demo.business.HotelDatabase;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;/** * * @author HaoZhang */public class Indexer {        /** Creates a new instance of Indexer */    public Indexer() {    }     private IndexWriter indexWriter = null;        public IndexWriter getIndexWriter(boolean create) throws IOException {        if (indexWriter == null) {            //indexWriter = new IndexWriter("F:\\Lucene\\luceneIndex",new StandardAnalyzer(Version.LUCENE_36),create);        FSDirectory dir = SimpleFSDirectory.open(new File("F:\\Lucene\\luceneIndex"));                    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));                    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);                    indexWriter = new IndexWriter(dir, conf);        }        return indexWriter;   }           public void closeIndexWriter() throws IOException {        if (indexWriter != null) {            indexWriter.close();        }   }        public void indexHotel(Hotel hotel) throws IOException {        System.out.println("Indexing hotel: " + hotel);        IndexWriter writer = getIndexWriter(false);        Document doc = new Document();        doc.add(new Field("id", hotel.getId(), Field.Store.YES, Field.Index.NO));        doc.add(new Field("name", hotel.getName(), Field.Store.YES, Field.Index.ANALYZED));        doc.add(new Field("city", hotel.getCity(), Field.Store.YES, Field.Index.NOT_ANALYZED));        doc.add(new Field("description", hotel.getDescription(), Field.Store.YES, Field.Index.ANALYZED));        String fullSearchableText = hotel.getName() + " " + hotel.getCity() + " " + hotel.getDescription();        doc.add(new Field("content", fullSearchableText, Field.Store.NO, Field.Index.ANALYZED));        writer.addDocument(doc);    }           public void rebuildIndexes() throws IOException {          //          // Erase existing index          //          getIndexWriter(true);          //          // Index all Accommodation entries          //          Hotel[] hotels = HotelDatabase.getHotels();          for(Hotel hotel : hotels) {              indexHotel(hotel);                        }          //          // Don't forget to close the index writer when done          //          closeIndexWriter();     }                  }

2 IndexSearch代码:

/* * SearchEngine.java * * Created on 31 July 2012, 14:52 * */package lucene.demo.search;import java.io.File;import java.io.FileNotFoundException;import java.io.IOException;import java.util.ArrayList;import java.util.List;import lucene.demo.business.Hotel;import lucene.demo.business.HotelDatabase;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Query;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;/** * * @author HaoZhang */public class SearchEngine {    private IndexSearcher searcher = null;        public IndexSearcher getSearcher() {return searcher;}public void setSearcher(IndexSearcher searcher) {this.searcher = searcher;}private QueryParser parser = null;        /** Creates a new instance of SearchEngine */    public SearchEngine() throws IOException {    FSDirectory dir = SimpleFSDirectory.open(new File("F:\\Lucene\\luceneIndex"));    IndexReader reader = IndexReader.open(dir);    searcher = new IndexSearcher(reader);        parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));    }        public TopDocs performSearch(String queryString)    throws IOException, ParseException {        Query query = parser.parse(queryString);                TopDocs hits = searcher.search(query,3);        return hits;    }}

Important Note: There's a very common mistakes that people often make, so I have to mention it here. When you use Lucene, you have to specify the Analyzer twice, once when you create an IndexWriter object (for index construction) and once more when you create a QueryParser (for query parsing).
Please note that it is extremely important that you use the same analyzer for both. In our example, since we createdIndexWriter using StandardAnalyzer before, we are also passing StandardAnalyzer to QueryParser. Otherwise, you will get into all sorts of problems that you do not expect.


原创粉丝点击