Lucene简单学习Index章节

来源：互联网发布：辐射4显存低配优化补丁编辑：程序博客网时间：2024/04/29 00:39

在全文索引工具中，都是由这样三部分组成

1、索引部分（Iam a boy）

2、分词部分

3、搜索部分

4、Field.Store.YES/NO:存储域选项

设置为YES，表示会把这个域中的内容完全存储到文件中，方便进行文本的还原

设置为No，表示吧这个域的内容不存储在文件爱你中，但是可以被索引，此时内容无法完全还原（doc.get()）

5、Field.Index（索引域选项）

Field.Index.ANALYZED:进行分词和索引，适用于标题、内容等

Field.Index.NOT_ANALYZED:进行索引，但是不进行分词，如果身份证号，姓名，ID等，适用于精确搜索

Field.Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息，这个norms中包括了创建索引的时间和权值等信息

Field.Index.NOT_ANALYZED_NOT_NORMS:既不进行分词也不存储norms信息。

Index.NO：不进行索引

其最基本的使用方法如下面的步骤介绍

public class HelloLucene {public static void main(String[] args) {}/** * 建立索引 */public void index() {IndexWriter writer = null; try {//1、创建Directory(内存、银盘等等）//Directory directory = new RAMDirectory();//创建内存的Directory directory = FSDirectory.open(new File("d:/lucene/index01"));//2、创建IndexWriterIndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));writer = new IndexWriter(directory, iwc);//3、创建Document对象Document document = null;//4、为Document添加FieldFile file = new File("d:/lucene/example");for(File f:file.listFiles()) {document = new Document();document.add(new Field("content", new FileReader(f)));document.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));//5、通过IndexWriter添加文档到索引中writer.addDocument(document);}} catch (Exception e) {} finally {if(writer!=null) {try {writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}}/** * 搜索 */public void searcher() {try {//1、创建DirectoryDirectory directory = FSDirectory.open(new File("d:/lucene/index01"));//2、创建IndexReaderIndexReader reader = IndexReader.open(directory);//3、根据IndexReader创建IndexSearcherIndexSearcher searcher = new IndexSearcher(reader);//4、创建搜索的Query//创建parser来确定搜索文件的内容，第二个参数表示搜索的域QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));//创建query，表示搜索域为content中包含java的文档Query query = parser.parse("address");//5、根据searcher搜索并且返回TopDocsTopDocs tds = searcher.search(query, 10);//6、根据TopDocs获取ScoreDoc对象ScoreDoc[] sds = tds.scoreDocs;for(ScoreDoc sd:sds){//7、根据Searcher和ScordDoc获取具体的Document对象Document document = searcher.doc(sd.doc);//8、根据Document对象获取需要的值System.out.println(document.get("filename")+"["+document.get("path")+"]");}//9、关闭readerreader.clone();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}

2、

package com.lxp.index;import java.io.File;import java.io.IOException;import java.util.Date;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class IndexUtil {    private String[] ids = {"1","2","3","4","5","6"};    private String[] emails = {"aa@sina.com","bb@123.com","cc@qq.com","dd@uestc.com","ee@qq.com","ff@uestc.com"};    private String[] contents = {            "welcome to visited the space,I like book",            "hello boy, I like pingpeng ball",            "my name is cc I like game",            "I like football",            "I like football and I like basketball too",            "I like movie and swim"    };    private Date[] dates = null;    private int[] attachs = {2,3,1,4,5,5};    private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};    private Directory directory = null;    private Map<String,Float> scores = new HashMap<String,Float>();    private static IndexReader reader = null;    public IndexUtil() {        try {            setDates();            scores.put("uestc.com", 2.0f);            scores.put("sina.com", 1.0f);            directory = FSDirectory.open(new File("d:/lucene/index02"));            reader = IndexReader.open(directory,false);        } catch (IOException e) {            e.printStackTrace();        }    }        public IndexSearcher getSearcher() {        try {            if(reader==null) {                reader = IndexReader.open(directory,false);            } else {                IndexReader tr = IndexReader.openIfChanged(reader);                if(tr!=null) {                    reader.close();                    reader = tr;                }            }            return new IndexSearcher(reader);        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }        return null;    }        private void setDates() {        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");        try {            dates = new Date[ids.length];            dates[0] = sdf.parse("2010-02-18");            dates[1] = sdf.parse("2012-03-24");            dates[2] = sdf.parse("2011-02-18");            dates[3] = sdf.parse("2012-02-18");            dates[4] = sdf.parse("2014-05-18");            dates[5] = sdf.parse("2013-06-30");        } catch (ParseException e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }        public void query() {        try {            IndexReader reader = IndexReader.open(directory);            System.out.println("numDocs:"+reader.numDocs());            System.out.println("maxDocs:"+reader.maxDoc());            System.out.println("deleteDocs:"+reader.numDeletedDocs());            reader.close();        } catch (CorruptIndexException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } catch (IOException e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }        public void delete() {        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            //参数是一个选项，可以是一个query（一系列）也可以是一个term（一个精确查找的值）            //此时删除的文档并不会被完全删除，而是存储在一个回收站，可以恢复            writer.deleteDocuments(new Term("id","1"));            writer.commit();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                if(writer!=null) {                    writer.close();                }            } catch (CorruptIndexException e) {                // TODO Auto-generated catch block                e.printStackTrace();            } catch (IOException e) {                // TODO Auto-generated catch block                e.printStackTrace();            }        }            }        public void delete2() {        try {            reader.deleteDocuments(new Term("id","1"));            reader.close();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {        }            }        public void undelete() {        //使用IndexReader进行delete后的恢复        try {            //恢复时，必须把IndexRader的只读设置为false            IndexReader reader = IndexReader.open(directory,false);            reader.undeleteAll();            reader.close();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }    }        public void forceDelete() {        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            writer.forceMergeDeletes();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                if(writer!=null) {                    writer.close();                }            } catch (CorruptIndexException e) {                // TODO Auto-generated catch block                e.printStackTrace();            } catch (IOException e) {                // TODO Auto-generated catch block                e.printStackTrace();            }        }    }        public void index() {        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            writer.deleteAll();            Document document = null;            for(int i=0;i<ids.length;i++) {                document = new Document();                document.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));                document.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));                document.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));                document.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));                //存储数字                document.add(new NumericField("attachs",Field.Store.YES,true).setIntValue(attachs[i]));                //存储日期                document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));                String et = emails[i].substring(emails[i].lastIndexOf("@")+1);                                System.out.println(et);                if(scores.containsKey(et)) {                    document.setBoost(scores.get(et));                } else {                    document.setBoost(0.5f);                }                writer.addDocument(document);            }        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            // TODO Auto-generated catch block            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                if(writer!=null)                   writer.close();            } catch (CorruptIndexException e) {                // TODO Auto-generated catch block                e.printStackTrace();            } catch (IOException e) {                // TODO Auto-generated catch block                e.printStackTrace();            }        }    }        public void mergeIndex() {        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            //会将索引合并为2段，这两段中的被删除的数据会被清空            writer.forceMerge(2);        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                if(writer!=null) {                    writer.close();                }            } catch (CorruptIndexException e) {                // TODO Auto-generated catch block                e.printStackTrace();            } catch (IOException e) {                // TODO Auto-generated catch block                e.printStackTrace();            }        }    }        public void update() {        IndexWriter writer = null;        try {            writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));            /*             * lucene并没有提供更新，其实是如下两个的合集             * 先删除后添加             */            Document document = new Document();            document.add(new Field("id", "11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));            document.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));            document.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));            document.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));            writer.addDocument(document);            writer.updateDocument(new Term("id","1"), document);        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (LockObtainFailedException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        } finally {            try {                if(writer!=null) {                    writer.close();                }            } catch (CorruptIndexException e) {                // TODO Auto-generated catch block                e.printStackTrace();            } catch (IOException e) {                // TODO Auto-generated catch block                e.printStackTrace();            }        }    }        public void search() {        try {            IndexReader reader = IndexReader.open(directory);            IndexSearcher searcher = new IndexSearcher(reader);            TermQuery query = new TermQuery(new Term("content","like"));            TopDocs tds = searcher.search(query, 10);            for(ScoreDoc sd: tds.scoreDocs) {                Document doc =searcher.doc(sd.doc);                System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+                        doc.get("attachs")+","+doc.get("date"));            }            reader.close();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }    }        public void search2() {        try {            IndexSearcher searcher = getSearcher();            TermQuery query = new TermQuery(new Term("content","like"));            TopDocs tds = searcher.search(query, 10);            for(ScoreDoc sd: tds.scoreDocs) {                Document doc =searcher.doc(sd.doc);                System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+                        doc.get("attachs")+","+doc.get("date"));            }            searcher.close();        } catch (CorruptIndexException e) {            e.printStackTrace();        } catch (IOException e) {            e.printStackTrace();        }    }}

0 0