01_java Lucene学习——创建索引

来源:互联网 发布:access如何输入数据 编辑:程序博客网 时间:2024/06/06 14:11

吐槽:

看到视频是3.5,用的jar包是4.0.源代码是4.1.。。悲哀。。

我只有4.9的源码。。。


说明:

1.用的是lucene4.0版本。

2.创建的索引是保存在硬盘上。

3.代码中对日期初始化用的是自己的工具栏,没有上传。common.DateUtil

4.lucene 4.0版本特性 http://www.cnblogs.com/ibook360/archive/2012/12/29/2839094.html


说明二:

1.搜索的过程,其实就是在索引中查找单词,进而找到包含这个单词的文档的过程。

2.查全率:搜索系统中搜索到相关文档的能力;

   查准率:搜索系统中过滤掉不相关文档的能力;


创建索引步骤:

1.创建Directory对象,指定索引类型以及索引存储位置2.创建IndexWriter对象,用于将Document对象写入到Directory指定的存储位置 3.创建Docment对象,包装源数据;4.关闭IndexWriter



导入的基础包:



代码分析:

1.源数据被封装成为document对象时,不同于3.x版本,field的类型变得更加具体了

2.之前的版本中需要需要知道field的存储域(Field.Store)和索引域(Field.Index),4.0不需要指定索引域

是因为有默认选项

如图1

doc = new Document();doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("email", emails[i],Field.Store.YES));doc.add(new TextField("content", contents[i],Field.Store.YES));doc.add(new StringField("name", names[i], Field.Store.YES));// 存储数字doc.add(new IntField("attach", attachs[i],Field.Store.YES));// 存储日期doc.add(new LongField("date", dates[i].getTime(),Field.Store.YES));

StringField为例:

源代码




***如果不需要使用默认的选项,可考虑这种写法





代码功能,创建索引,保存的硬盘中。query方法返回建立的索引记录;

package searcher;import java.io.File;import java.io.IOException;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.IntField;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.MMapDirectory;import org.apache.lucene.store.NIOFSDirectory;import org.apache.lucene.store.SimpleFSDirectory;import org.apache.lucene.util.Version;import common.DateUtil;public class IndexUtil {// 示例源数据:需要创建索引的数据---beginprivate String[] ids = { "1", "2", "3", "4", "5", "6" };private String[] emails = { "aa@163.com", "bb@163.com", "cc@mit.edu","dd@sina.org", "ee@mit.edu", "ff@126.com" };private String[] contents = { "welcome to China,I like book","hello little two b, I like tennis", "my name is cc I like game","I like football", "I like football and I like basketball too","I like movie and swimming" };private Date[] dates = new Date[6];private int[] attachs = { 2, 3, 1, 4, 5, 5 };private String[] names = { "adele", "betty", "kitty", "jetty", "jack","rose" };// 示例源数据:需要创建索引的数据---endprivate static Directory directory = null;public IndexUtil() {initDates();// 创建directory对象try {directory = FSDirectory.open(new File("D:/test/luceneIndex"));// 判断当前directory具体的实例化对象类型if (directory instanceof MMapDirectory) {System.out.println("directory belongs to  MMapDirectory");} else if (directory instanceof SimpleFSDirectory) {System.out.println("directory belongs to SimpleFSDirectory");} else if (directory instanceof NIOFSDirectory) {System.out.println("directory belongs to NIOFSDirectory");}} catch (IOException e) {e.printStackTrace();}// 创建索引index();}/*** 初始化日期数据*/private void initDates() {Date date = new Date();for (int i = 1; i <= 6; i++) {dates[i - 1] = DateUtil.getRelativeDate(date, -i);}}/*** 创建索引基本步骤: * 1.创建Directory对象,指定索引类型以及索引存储位置* 2.创建IndexWriter对象,用于将Document对象写入到Directory指定的存储位置 * 3.创建Docment对象,包装源数据;* 4.关闭IndexWriter* */public void index() {IndexWriter writer = null;try {// 创建writer对象writer = getWriter(directory);if (writer != null) {// 删除原来的索引,若存在writer.deleteAll();// 删除后,commit可以立即生效//writer.commit();// 创建document对象Document doc = null;for (int i = 0; i < ids.length; i++) {doc = new Document();
// 写法一//FieldType fileType = new FieldType();//fileType.setIndexed(false); // 不需要索引//fileType.setStored(true);//doc.add(new Field("id", ids[i], fileType));
// StringField默认需要索引doc.add(new StringField("id", ids[i], Field.Store.YES));doc.add(new StringField("email", emails[i],Field.Store.YES));doc.add(new TextField("content", contents[i],Field.Store.YES));doc.add(new StringField("name", names[i], Field.Store.YES));// 存储数字doc.add(new IntField("attach", attachs[i],Field.Store.YES));// 存储日期doc.add(new LongField("date", dates[i].getTime(),Field.Store.YES));writer.addDocument(doc);}}} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {closeWriter(writer);} catch (IOException e) {e.printStackTrace();}}}/*** 创建IndexWriter对象* @param directory* @return* @throws IOException*/public IndexWriter getWriter(Directory directory) throws IOException {if (directory == null) {return null;}IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)));return writer;}/*** 关闭Writer* @param writer* @throws IOException*/public void closeWriter(IndexWriter writer) throws IOException {if (writer != null) {writer.close();}}public void query() {try {// 获取reader对象IndexReader reader = DirectoryReader.open(directory);// 通过reader可以有效的获取到文档的数量System.out.println("numDocs:" + reader.numDocs());System.out.println("maxDocs:" + reader.maxDoc());System.out.println("deleteDocs:" + reader.numDeletedDocs());reader.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}


0 0