Lucene 索引

来源:互联网 发布:做淘宝什么最赚钱 编辑:程序博客网 时间:2024/05/17 23:27

1.索引部分(I am a boy)
~1.基本概念
~2.索引过程
~3.索引建立步骤
注意点:indexReader 应该是单例的(重要)

/**
 /**
  * 创建索引
  */
 @Test
 public void creatIndex(){
  try {
   //1.创建Directory 通过Directory的创建指定索引存放位置
      //Directory directory=new RAMDirectory();//创建到内存
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex"));//创建到硬盘
   
   //2.创建IndexWriter 通过IndexWriterConfig的创建指定索引版本和语言词汇分析器
   IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   //IndexWriter它的作用是用来写索引文件
   IndexWriter indexWriter=new IndexWriter(directory, iwc);
   
   //3.创建Docement对象
   Document document=null;
   
   //4.为Document对象增添filed
   File f=new File("F:/Lucene_jar/mytest");
   for (File file : f.listFiles()) {//文件包下所有文件
    document=new Document();
    /**
      * 1.Filed.Store.YES或者NO(存储域选项)
     yes表示把这个域的内容完全存储到文件中,方便进行文本的还原
     no表示把这个域的内容不存储到文件中,但是可以被索引,此时的内容无法完全还原(docget(""))

     2.Filed.Index(索引选项)
     Filed.Index.ANALYZED:进行分词和索引,适应标题,内容等
     Index.NOT_ANALYZED:进行索引,但是不进行分词,如果身份证号,姓名,ID等,适用精确搜索
     Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引时间和权值等信息
     Index.NOT_ANALYZED_NOT_NORMS:既不分词也不存储norms信息
     Index.NO:不进行索引
     */
    document.add(new Field("content",new FileReader(file)));
    document.add(new Field("filename",file.getName(),Store.YES,Index.NOT_ANALYZED));//不分词
    document.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.NOT_ANALYZED));
    
    //5.通过IndexWriter增添文档到索引中  它的作用是用来写索引文件
    indexWriter.addDocument(document);
   }
   //关闭IndexWriter
   if (indexWriter!=null) {
    indexWriter.close();
   }
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 /**
  * 搜索
  */
 @Test
 public void searcher(){
  try {
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex"));
   
   //2.创建IndexReader
   IndexReader indexReader=IndexReader.open(directory);
   
   //3.根据IndexReader创建IndexSearcher
   IndexSearcher indexSearcher=new IndexSearcher(indexReader);
   
   //4.创建搜索的Query
   //创建搜索的parser来确定要搜索的文件内容,第二个参数表示搜索的域
   QueryParser parser=new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
   //搜索保护lmandlyp520的文件
   Query query=parser.parse("lmandlyp520");
   //5.根据searcher搜索并返回TopDocs  返回10条
   TopDocs topDocs=indexSearcher.search(query, 10);
   //6.根据TopDocs获取ScoreDoc对象获取具体的document
   ScoreDoc[] sds=topDocs.scoreDocs;
   for (ScoreDoc scoreDoc : sds) {
    //7.根据searcher和ScoreDoc对象获取具体Document对象
    Document d=indexSearcher.doc(scoreDoc.doc);
    //8.根据document对象获取需要的值
    System.out.println(d.get("filename")+"["+d.get("path")+"]");
    
   }
   System.out.println("maxDoc"+indexReader.maxDoc());
   System.out.println("numDocs"+indexReader.numDocs());
   System.out.println("numDeletedDocs"+indexReader.numDeletedDocs());
   //9.关闭reader
   indexReader.close();
  } catch (Exception e) {
   // TODO: handle exception
  }
 }
~4.索引的更新和删除和恢复(3.6以下才行)
/**
 /**
  * 删除索引
  */
 @Test
 public void deleteIndex(){
  try {
   //1.创建Directory 通过Directory的创建指定索引存放位置
      //Directory directory=new RAMDirectory();//创建到内存
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex"));//创建到硬盘
   
   //2.创建IndexWriter 通过IndexWriterConfig的创建指定索引版本和语言词汇分析器
   IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   //IndexWriter它的作用是用来写索引文件
   IndexWriter indexWriter=new IndexWriter(directory, iwc);
   //这种删除不会完全删除,会在系统中有缓冲   可以删除所有的索引  还可以根据一个query来删除
   indexWriter.deleteDocuments(new Term("content", "java"));
  
   //完全删除,包括缓存的删除临时文件
   //indexWriter.deleteAll();
   //indexWriter.forceMergeDeletes();
   indexWriter.close();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 /**
  * 恢复索引
  */
 @Test
 public void NuDeleteIndex(){
  try {
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex"));
   
   //2.创建IndexReader  false 表示不是只读
   IndexReader indexReader=IndexReader.open(directory, false);
   //恢复删除的索引文件
   indexReader.undeleteAll();
   //关闭
   indexReader.close();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 
 /**
  * 更新索引
  */
 @Test
 public void updateIndex(){
  try {
   //1.创建Directory 通过Directory的创建指定索引存放位置
      //Directory directory=new RAMDirectory();//创建到内存
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex"));//创建到硬盘
   
   //2.创建IndexWriter 通过IndexWriterConfig的创建指定索引版本和语言词汇分析器
   IndexWriterConfig iwc=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   //IndexWriter它的作用是用来写索引文件
   IndexWriter indexWriter=new IndexWriter(directory, iwc);
   Document document=new Document();
   
   File f=new File("F:/Lucene_jar/mytest/java.txt");
   document.add(new Field("content",new FileReader("F:/Lucene_jar/mytest/java.txt")));
   document.add(new Field("filename",f.getName(),Store.YES,Index.NOT_ANALYZED));//不分词
   document.add(new Field("path",f.getAbsolutePath(),Store.YES,Index.NOT_ANALYZED));
   
   //更新索引  本质是先删除后在增添一个
   indexWriter.updateDocument(new Term("filename","java.txt"),document);
   //关闭
   indexWriter.close();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
----------------
/**
  *1. 给索引设置权值和操作
  *2. 给数字格式和时间格式的建索引
  */
 
 String[] ids={"1","2","3","4","5","6"};
 String[] names={"limao1@my","limao2@he","limao3@she","limao4@her","limao5@jj","limao6@my"};
 String[] contents={"I LIKE LYP1","I LIKE LYP2","I LIKE LYP3","I LIKE LYP4","I LIKE LYP5","I LIKE LYP6"};
 
 //数字格式的
 int[] ages={21,23,23,42,12,14};
 //时间格式的
 Date[] dates=null;
 String[] keys=new String[10];
 @Test
 public void createIndex(){
  
  try {
   addDates();
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex2"));
   
   //2.创建indexWriter
   IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   IndexWriter indexWriter=new IndexWriter(directory, indexWriterConfig);
   
   //先删除在建
   indexWriter.deleteAll();
   indexWriter.forceMergeDeletes();
   //3.创建Docement对象
   Document document=null;
   //4.
   for (int i = 0; i < contents.length; i++) {
    document=new Document();
    document.add(new Field("id",ids[i],Store.YES,Index.NOT_ANALYZED));
    document.add(new Field("name",names[i],Store.YES,Index.ANALYZED));
    document.add(new Field("content",contents[i],Store.NO,Index.ANALYZED));
    
    //数字格式的  true表示索引
    document.add(new NumericField("age",Store.YES,true).setIntValue(ages[i]));
    
    //时间格式的
    document.add(new NumericField("date",Store.YES,true).setLongValue(dates[i].getTime()));
    //进行权值操作。
    if(document.get("name").contains("@my")){
     document.setBoost(1.5f);
    }else{
     document.setBoost(0.5f);
    }
    indexWriter.addDocument(document);
   }
   //关闭IndexWriter
   if (indexWriter!=null) {
    indexWriter.close();
   }
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
 }
 
 /**
  * 操作权值问题
  */
 @Test
 public void searcherIndex(){
  try {
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex2"));
   
   IndexReader indexReader=IndexReader.open(directory);
   
   IndexSearcher indexSearcher=new IndexSearcher(indexReader);
   
   QueryParser parser=new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
   //搜索保护lmandlyp520的文件
   Query query=null;
   try {
    query = parser.parse("LIKE");
   } catch (ParseException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
   
   TopDocs topDocs= indexSearcher.search(query, 10);
   
   ScoreDoc[] scoreDocs=topDocs.scoreDocs;
   SimpleDateFormat simpleDateFormat=new SimpleDateFormat("yyyy-mm-dd");
   for (ScoreDoc scoreDoc : scoreDocs) {
    Document d=indexSearcher.doc(scoreDoc.doc);
    //得不到content 因为new Field("content",contents[i],Store.NO,Index.ANALYZED)
    System.out.println("name:"+d.get("name")+"---content:"+d.get("content")+"-----age:"+Integer.parseInt(d.get("age"))
      +"---------date:"+simpleDateFormat.format(new Date(Long.parseLong(d.get("date")))));
   }
   
   indexSearcher.close();
   indexReader.close();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
  
 }
 
 /**
  * 填充时间
  */
 public void addDates(){
  try {
   SimpleDateFormat simpleDateFormat=new SimpleDateFormat("yyyy-mm-dd");
   dates=new Date[6];
   dates[0]=simpleDateFormat.parse("2010-09-22");
   dates[1]=simpleDateFormat.parse("2011-11-24");
   dates[2]=simpleDateFormat.parse("2012-12-12");
   dates[3]=simpleDateFormat.parse("2013-14-12");
   dates[4]=simpleDateFormat.parse("2014-11-26");
   dates[5]=simpleDateFormat.parse("2015-01-09");
  } catch (java.text.ParseException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
--------------------------------------


 /**
  *InderReader单例设计
  *1.IndexReader别在方法中关掉
  *2.
  */
 private static IndexReader reader=null;
 
 private IndexReader IndexReaderUtil(Directory directory){
  try {
   if(reader!=null){
    //有reader 返回新的reader
    IndexReader rr=IndexReader.openIfChanged(reader);
    if(rr!=null){
     reader.close();
     reader=rr;
    }
   }else{
    //没有reader
    reader=IndexReader.open(directory);
   }
   
   return reader;
  } catch (CorruptIndexException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  return null;
 }
 
 String[] ids={"1","2","3","4","5","6"};
 String[] names={"limao1@my","limao2@he","limao3@she","limao4@her","limao5@jj","limao6@my"};
 String[] contents={"I LIKE LYP1","I LIKE LYP2","I LIKE LYP3","I LIKE LYP4","I LIKE LYP5","I LIKE LYP6"};
 
 //数字格式的
 int[] ages={21,23,23,42,12,14};
 //时间格式的
 Date[] dates=null;
 String[] keys=new String[10];
 
 //test单例
 
 @Test
 public void test1(){
  try {
   for (int i = 0; i < 5; i++) {
    searcherIndex();
    System.out.println("--------------------------------------");
    Thread.sleep(5000);
   }
   
  } catch (Exception e) {
   // TODO: handle exception
  }
 }
 
 @Test
 public void createIndex(){
  
  try {
   addDates();
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex3"));
   
   //2.创建indexWriter
   IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   IndexWriter indexWriter=new IndexWriter(directory, indexWriterConfig);
   
   
   //3.创建Docement对象
   Document document=null;
   //4.
   for (int i = 0; i < contents.length; i++) {
    document=new Document();
    document.add(new Field("id",ids[i],Store.YES,Index.NOT_ANALYZED));
    document.add(new Field("name",names[i],Store.YES,Index.ANALYZED));
    document.add(new Field("content",contents[i],Store.NO,Index.ANALYZED));
    
    //数字格式的  true表示索引
    document.add(new NumericField("age",Store.YES,true).setIntValue(ages[i]));
    
    //时间格式的
    document.add(new NumericField("date",Store.YES,true).setLongValue(dates[i].getTime()));
    //进行权值操作。
    if(document.get("name").contains("@my")){
     document.setBoost(1.5f);
    }else{
     document.setBoost(0.5f);
    }
    indexWriter.addDocument(document);
   }
   //关闭IndexWriter
   if (indexWriter!=null) {
    indexWriter.close();
   }
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
 }
 
 /**
  * 操作权值问题
  */
 @Test
 public void searcherIndex(){
  try {
   //1.创建Directory
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex3"));
   
   IndexReader indexReader=IndexReaderUtil(directory);
   
   IndexSearcher indexSearcher=new IndexSearcher(indexReader);
   
   QueryParser parser=new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
   //搜索保护lmandlyp520的文件
   Query query=null;
   try {
    query = parser.parse("LIKE");
   } catch (ParseException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
   }
   
   TopDocs topDocs= indexSearcher.search(query, 10);
   
   ScoreDoc[] scoreDocs=topDocs.scoreDocs;
   SimpleDateFormat simpleDateFormat=new SimpleDateFormat("yyyy-mm-dd");
   for (ScoreDoc scoreDoc : scoreDocs) {
    Document d=indexSearcher.doc(scoreDoc.doc);
    //得不到content 因为new Field("content",contents[i],Store.NO,Index.ANALYZED)
    System.out.println("name:"+d.get("name")+"---content:"+d.get("content")+"-----age:"+Integer.parseInt(d.get("age"))
      +"---------date:"+simpleDateFormat.format(new Date(Long.parseLong(d.get("date")))));
   }
   
   indexSearcher.close();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
  
  
 }
 
 @Test
 public void deleteIndex(){
  try {
   Directory directory=FSDirectory.open(new File("F:/Lucene_jar/mytestindex3"));
   
   //2.创建indexWriter
   IndexWriterConfig indexWriterConfig=new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
   IndexWriter indexWriter=new IndexWriter(directory, indexWriterConfig);
   
   //先删除在建
   indexWriter.deleteDocuments(new Term("id", "1"));
   //indexWriter.deleteAll();
   indexWriter.forceMergeDeletes();
   
   //这里需要提交
   indexWriter.commit();
  } catch (CorruptIndexException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (LockObtainFailedException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 /**
  * 填充时间
  */
 public void addDates(){
  try {
   SimpleDateFormat simpleDateFormat=new SimpleDateFormat("yyyy-mm-dd");
   dates=new Date[6];
   dates[0]=simpleDateFormat.parse("2010-09-22");
   dates[1]=simpleDateFormat.parse("2011-11-24");
   dates[2]=simpleDateFormat.parse("2012-12-12");
   dates[3]=simpleDateFormat.parse("2013-14-12");
   dates[4]=simpleDateFormat.parse("2014-11-26");
   dates[5]=simpleDateFormat.parse("2015-01-09");
  } catch (java.text.ParseException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }

0 0
原创粉丝点击