lucene3.6 中文分词 文件索引

来源:互联网 发布:延伫乎吾将反得乎 编辑:程序博客网 时间:2024/05/21 15:34
 import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.io.File;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.TopScoreDocCollector;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public class HelloWord {   private static Analyzer luceneAnalyzer = new IKAnalyzer();  // private static Directory indexDir = new RAMDirectory();//内存索引   //private static Directory indexDir;      static String indexpath="E://index";   //创立索引的文件夹   public static void main(String[] args) throws Exception {            create();            search();        }           public static void create() throws Exception{  //createIndex                  IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer);            //config.setOpenMode(OpenMode.CREATE);//普通索引    config.setOpenMode(OpenMode.APPEND); //增量索引  第一次创建索引 用这个会出错     IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexpath)),config);                        BufferedReader reader = null;    try {     String file_path="e://files//";     String file_name="我爱的世界.txt";     System.out.println(1);     System.out.println(file_path+file_name);          File file = new File(file_path+file_name);//需要创立索引的文件     //File file = new File("e://files//爱我的人.txt");//需要创立索引的文件          reader = new BufferedReader(new FileReader(file));     String tempString = null;     int line = 1 ;     System.out.println("创建索引开始.....");     Date d1 = new Date();     while((tempString = reader.readLine()) != null) {      Document doc1 = new Document();      doc1.add(new Field("id",""+line,Store.YES,Field.Index.ANALYZED));      doc1.add(new Field("content",tempString,Store.YES,Field.Index.ANALYZED));      doc1.add(new Field("file_name",file_name,Store.YES,Field.Index.ANALYZED));            indexWriter.addDocument(doc1);      System.out.println("已创建 【" + line+ "】行");      line ++;     }     Date d2 = new Date();     System.out.println("创建索引完成!\n");     System.out.println("创建索引耗时:" + (d2.getTime()-d1.getTime()) +"ms");     reader.close();    }catch (IOException e) {     System.out.println("Read Error!");    }      indexWriter.close();    }           public static void search() throws Exception{            String queryString = "妈妈";          String[] fields = {"id","content"};            QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, luceneAnalyzer);            Query query = queryParser.parse(queryString);            IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexpath)));            //IndexReader reader = IndexReader.open(indexDir);            IndexSearcher searcher = new IndexSearcher(reader);                    TopScoreDocCollector results = TopScoreDocCollector.create(10, false);     Date dt1 = new Date();    System.out.println("开始查询时间 :" +dt1.getTime());    System.out.println("查询关键字 : "+ queryString);    searcher.search(query, results);                    Date dt2 = new Date();    System.out.println("结束查询时间 :" +dt2.getTime());    System.out.println();    System.out.println("查询耗时 :" + (dt2.getTime()-dt1.getTime()) + "ms");    TopDocs topDocs = results.topDocs(0, 10);    //显示查询结果前10条记录        System.out.println("命中数: " + topDocs.totalHits);        for(int j=0 ; j<topDocs.scoreDocs.length; j++) {     ScoreDoc scoreDoc = topDocs.scoreDocs[j];     Document doc = searcher.doc(scoreDoc.doc);      System.out.println(doc);      System.out.println("文件名:"+ doc.get("file_name") );        System.out.println("第 "+ doc.get("id") +" 行");                 System.out.println("内容: " + doc.get("content"));    }               }   }


 

原创粉丝点击