IndexWriter基础使用及注意事项

来源:互联网 发布:日历提醒软件 编辑:程序博客网 时间:2024/04/30 08:34

这是关于3.0版本的原文

http://www.cnblogs.com/huangfox/archive/2010/10/15/1852371.html


照上述文章的例子,试了下Lucene 4.6版本


//注意点2:filed实例在多次添加的时候可以重用,节约构造field实例的时间。

import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.StringField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class ceshi0303 {public static void main(String[] args) {IndexWriter writer = null;FSDirectory dir = null;try {dir = SimpleFSDirectory.open(new File("d:\\20140303index"));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);              IndexWriterConfig iwConfig = new IndexWriterConfig(                      Version.LUCENE_46, analyzer);  writer = new IndexWriter(dir, iwConfig);//Field f1 = new StringField("f1", "", Store.YES);//Field f2 = new StringField("f2", "", Store.YES);long s = System.currentTimeMillis();for (int i = 0; i < 500000; i++) {Document doc = new Document();doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));/*f1.setStringValue("f1 hello doc" + i);doc.add(f1);f2.setStringValue("f2 world doc" + i);doc.add(f2);*/writer.addDocument(doc);}System.out.println(System.currentTimeMillis() - s + "ms");System.out.println("over");}catch (IOException e) {e.printStackTrace();}finally {try {writer.close();}catch (IOException e) {e.printStackTrace();}}}}

import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.StringField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class ceshi0303 {public static void main(String[] args) {IndexWriter writer = null;FSDirectory dir = null;try {dir = SimpleFSDirectory.open(new File("d:\\20140303index"));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);              IndexWriterConfig iwConfig = new IndexWriterConfig(                      Version.LUCENE_46, analyzer);  writer = new IndexWriter(dir, iwConfig);Field f1 = new StringField("f1", "", Store.YES);Field f2 = new StringField("f2", "", Store.YES);long s = System.currentTimeMillis();for (int i = 0; i < 500000; i++) {Document doc = new Document();//doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));//doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));f1.setStringValue("f1 hello doc" + i);doc.add(f1);f2.setStringValue("f2 world doc" + i);doc.add(f2);writer.addDocument(doc);}System.out.println(System.currentTimeMillis() - s + "ms");System.out.println("over");}catch (IOException e) {e.printStackTrace();}finally {try {writer.close();}catch (IOException e) {e.printStackTrace();}}}}

上面2分代码我分别跑了5次,取平局值,第一份的时间是4008.6ms,第二次的时间是3892.8ms。原因很简单,节省了大量的GC cost。


而对于注意点1,

//注意点1:在window系统中我们通常使用simpleFSDirectory,而其他操作系统则使用NIOFSDirectory。

我测试了如下代码(就是把SimpleFSDirectory改成了FSDirectory):

import java.io.File;import java.io.IOException;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.StringField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;public class ceshi0303 {public static void main(String[] args) {IndexWriter writer = null;FSDirectory dir = null;try {dir = FSDirectory.open(new File("d:\\20140303index"));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);              IndexWriterConfig iwConfig = new IndexWriterConfig(                      Version.LUCENE_46, analyzer);  writer = new IndexWriter(dir, iwConfig);Field f1 = new StringField("f1", "", Store.YES);Field f2 = new StringField("f2", "", Store.YES);long s = System.currentTimeMillis();for (int i = 0; i < 500000; i++) {Document doc = new Document();//doc.add(new StringField("f1", "\"f1 hello doc\" + i", Store.YES));//doc.add(new StringField("f2", "\"f2 hello doc\" + i", Store.YES));f1.setStringValue("f1 hello doc" + i);doc.add(f1);f2.setStringValue("f2 world doc" + i);doc.add(f2);writer.addDocument(doc);}System.out.println(System.currentTimeMillis() - s + "ms");System.out.println("over");}catch (IOException e) {e.printStackTrace();}finally {try {writer.close();}catch (IOException e) {e.printStackTrace();}}}}
运行平均时间变成了3903ms,而FSDirectory个人认为会自动根据平台来选择具体使用SimpleFSDirectory还是NIOFSDirectory,比较方便。


对于注意点3,目前没有遇到过,感触不深。


0 0
原创粉丝点击