Lucene简单学习Index章节
来源:互联网 发布:辐射4显存低配优化补丁 编辑:程序博客网 时间:2024/04/29 00:39
在全文索引工具中,都是由这样三部分组成
1、 索引部分(Iam a boy)
2、 分词部分
3、 搜索部分
4、Field.Store.YES/NO:存储域选项
设置为YES,表示会把这个域中的内容完全存储到文件中,方便进行文本的还原
设置为No,表示吧这个域的内容不存储在文件爱你中,但是可以被索引,此时内容无法完全还原(doc.get())
5、Field.Index(索引域选项)
Field.Index.ANALYZED:进行分词和索引,适用于标题、内容等
Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如果身份证号,姓名,ID等,适用于精确搜索
Field.Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息
Field.Index.NOT_ANALYZED_NOT_NORMS:既不进行分词也不存储norms信息。
Index.NO:不进行索引
其最基本的使用方法如下面的步骤介绍
public class HelloLucene {public static void main(String[] args) {}/** * 建立索引 */public void index() {IndexWriter writer = null; try {//1、创建Directory(内存、银盘等等)//Directory directory = new RAMDirectory();//创建内存的Directory directory = FSDirectory.open(new File("d:/lucene/index01"));//2、创建IndexWriterIndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));writer = new IndexWriter(directory, iwc);//3、创建Document对象Document document = null;//4、为Document添加FieldFile file = new File("d:/lucene/example");for(File f:file.listFiles()) {document = new Document();document.add(new Field("content", new FileReader(f)));document.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));//5、通过IndexWriter添加文档到索引中writer.addDocument(document);}} catch (Exception e) {} finally {if(writer!=null) {try {writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}}/** * 搜索 */public void searcher() {try {//1、创建DirectoryDirectory directory = FSDirectory.open(new File("d:/lucene/index01"));//2、创建IndexReaderIndexReader reader = IndexReader.open(directory);//3、根据IndexReader创建IndexSearcherIndexSearcher searcher = new IndexSearcher(reader);//4、创建搜索的Query//创建parser来确定搜索文件的内容,第二个参数表示搜索的域QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));//创建query,表示搜索域为content中包含java的文档Query query = parser.parse("address");//5、根据searcher搜索并且返回TopDocsTopDocs tds = searcher.search(query, 10);//6、根据TopDocs获取ScoreDoc对象ScoreDoc[] sds = tds.scoreDocs;for(ScoreDoc sd:sds){//7、根据Searcher和ScordDoc获取具体的Document对象Document document = searcher.doc(sd.doc);//8、根据Document对象获取需要的值System.out.println(document.get("filename")+"["+document.get("path")+"]");}//9、关闭readerreader.clone();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}2、
package com.lxp.index;import java.io.File;import java.io.IOException;import java.util.Date;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class IndexUtil { private String[] ids = {"1","2","3","4","5","6"}; private String[] emails = {"aa@sina.com","bb@123.com","cc@qq.com","dd@uestc.com","ee@qq.com","ff@uestc.com"}; private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football", "I like football and I like basketball too", "I like movie and swim" }; private Date[] dates = null; private int[] attachs = {2,3,1,4,5,5}; private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"}; private Directory directory = null; private Map<String,Float> scores = new HashMap<String,Float>(); private static IndexReader reader = null; public IndexUtil() { try { setDates(); scores.put("uestc.com", 2.0f); scores.put("sina.com", 1.0f); directory = FSDirectory.open(new File("d:/lucene/index02")); reader = IndexReader.open(directory,false); } catch (IOException e) { e.printStackTrace(); } } public IndexSearcher getSearcher() { try { if(reader==null) { reader = IndexReader.open(directory,false); } else { IndexReader tr = IndexReader.openIfChanged(reader); if(tr!=null) { reader.close(); reader = tr; } } return new IndexSearcher(reader); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } private void setDates() { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); try { dates = new Date[ids.length]; dates[0] = sdf.parse("2010-02-18"); dates[1] = sdf.parse("2012-03-24"); dates[2] = sdf.parse("2011-02-18"); dates[3] = sdf.parse("2012-02-18"); dates[4] = sdf.parse("2014-05-18"); dates[5] = sdf.parse("2013-06-30"); } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void query() { try { IndexReader reader = IndexReader.open(directory); System.out.println("numDocs:"+reader.numDocs()); System.out.println("maxDocs:"+reader.maxDoc()); System.out.println("deleteDocs:"+reader.numDeletedDocs()); reader.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void delete() { IndexWriter writer = null; try { writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); //参数是一个选项,可以是一个query(一系列)也可以是一个term(一个精确查找的值) //此时删除的文档并不会被完全删除,而是存储在一个回收站,可以恢复 writer.deleteDocuments(new Term("id","1")); writer.commit(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) { writer.close(); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void delete2() { try { reader.deleteDocuments(new Term("id","1")); reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { } } public void undelete() { //使用IndexReader进行delete后的恢复 try { //恢复时,必须把IndexRader的只读设置为false IndexReader reader = IndexReader.open(directory,false); reader.undeleteAll(); reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void forceDelete() { IndexWriter writer = null; try { writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.forceMergeDeletes(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) { writer.close(); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void index() { IndexWriter writer = null; try { writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); writer.deleteAll(); Document document = null; for(int i=0;i<ids.length;i++) { document = new Document(); document.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); document.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED)); document.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED)); document.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); //存储数字 document.add(new NumericField("attachs",Field.Store.YES,true).setIntValue(attachs[i])); //存储日期 document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime())); String et = emails[i].substring(emails[i].lastIndexOf("@")+1); System.out.println(et); if(scores.containsKey(et)) { document.setBoost(scores.get(et)); } else { document.setBoost(0.5f); } writer.addDocument(document); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void mergeIndex() { IndexWriter writer = null; try { writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); //会将索引合并为2段,这两段中的被删除的数据会被清空 writer.forceMerge(2); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) { writer.close(); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void update() { IndexWriter writer = null; try { writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); /* * lucene并没有提供更新,其实是如下两个的合集 * 先删除后添加 */ Document document = new Document(); document.add(new Field("id", "11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); document.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED)); document.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED)); document.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS)); writer.addDocument(document); writer.updateDocument(new Term("id","1"), document); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { try { if(writer!=null) { writer.close(); } } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void search() { try { IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); TermQuery query = new TermQuery(new Term("content","like")); TopDocs tds = searcher.search(query, 10); for(ScoreDoc sd: tds.scoreDocs) { Document doc =searcher.doc(sd.doc); System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attachs")+","+doc.get("date")); } reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void search2() { try { IndexSearcher searcher = getSearcher(); TermQuery query = new TermQuery(new Term("content","like")); TopDocs tds = searcher.search(query, 10); for(ScoreDoc sd: tds.scoreDocs) { Document doc =searcher.doc(sd.doc); System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+ doc.get("attachs")+","+doc.get("date")); } searcher.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } }}
0 0
- Lucene简单学习Index章节
- Lucene学习笔记之创建Index
- lucene-Index
- lucene index
- Lucene学习笔记(4):Lucene Index 删除、更新
- Lucene 学习笔记(3) :Hello Lucene(Lucene Index的创建和查找)
- Lucene (create Index) & search Index
- JAVA学习Swing章节按钮组件JButton的简单学习
- JAVA学习Swing章节流布局管理器简单学习
- JAVA学习Swing章节按钮组件JButton的简单学习
- Lucene对index操作
- Lucene Index Format 笔记
- lucene index 锁
- lucene学习01之简单小例子初步认识lucene
- Lucene 学习(一):简单demo
- lucene RAMDirectory org.apache.lucene.index.IndexNotFoundException
- lucene源代码分析(index部分)
- Lucene构建index性能调整
- 卡卡卡的wordpress
- git配置的时候出现connect to host github.com port 22 bad file number解决办法
- 【算法】验证码识别基础方法及源码
- java在本地生成并解析二维码的两种方式
- Android性能测试工具之APT - gether
- Lucene简单学习Index章节
- ClangFormat-Xcode真是好东西( ⊙ o ⊙ )啊!
- oracle远程数据库同步
- dubbo服务笔记一
- (3.1.2)Servlet生命周期
- Android Service完全解析,关于服务你所需知道的一切
- Javascript中最常用的55个经典技巧
- 第五周项目 5 友元类
- 泡泡堂、QQ堂游戏通信架构分析