Lucene--003索引创建、域选项

来源:互联网 发布:携程 替代知乎 编辑:程序博客网 时间:2024/04/30 22:18
package com.lk;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import java.io.File;import java.io.IOException;/** * Created by LK on 2016/12/17. */public class IndeUtil {    private String[] ids={"1","2","3","4","5"};    private String[] emails={"ddd@qq.com","kai.liu@budee.com","dd@1.com","8500@qq.com","11@qq.com"};    private String[] contents = {            "welcome to china",            "hello beijing ",            "hello world ,java",            "hello world lucene",            "where is wher,i am a boy"    };    private int[] attachs={2,3,4,5,6};    private String[] names={"zhangsan","zhaosi","john","mki","lilei"};    //先创建一个构造函数    private Directory directory =null;    public IndeUtil(){        try {            directory = FSDirectory.open(new File("d:/lucene/index02"));        } catch (IOException e) {            e.printStackTrace();        }    }    public void query() {        try {            IndexReader reader = IndexReader.open(directory);            //通过reader可以有效的获取到文档的数量            System.out.println("文档的数"+reader.numDocs());            System.out.println("文档总数"+reader.maxDoc());        } catch (IOException e) {            e.printStackTrace();        }    }    public void index(){        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            Document doc = null;            for (int i=0;i<ids.length;i++){                doc = new Document();                doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));                doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));                doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));                doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));                writer.addDocument(doc);            }        } catch (IOException e) {            e.printStackTrace();        }finally {            if (writer!=null) try {                writer.close();            } catch (IOException e) {                e.printStackTrace();            }        }    }}


测试:

package com.lk;import org.junit.Test;/** * Created by LK on 2016/12/17. */public class TestIndex {    @Test    public void testIn(){        IndeUtil iu= new IndeUtil();        iu.index();    }    @Test    public void querytest(){        IndeUtil iu= new IndeUtil();        iu.query();    }}


索引目录:


fnm保存域选项的信息 如  id  name  email content 

fdt  fdx 保存的域中的词, STORE=yes 的数据

frq  出现的次数,做评分和排序的。

nrm 存储评分的信息。

prx 偏移量。

tii tis索引里面内容的信息。





0 0