lucene3.6 中文分词 文件索引
来源:互联网 发布:延伫乎吾将反得乎 编辑:程序博客网 时间:2024/05/21 15:34
import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.io.File;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.TopScoreDocCollector;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public class HelloWord { private static Analyzer luceneAnalyzer = new IKAnalyzer(); // private static Directory indexDir = new RAMDirectory();//内存索引 //private static Directory indexDir; static String indexpath="E://index"; //创立索引的文件夹 public static void main(String[] args) throws Exception { create(); search(); } public static void create() throws Exception{ //createIndex IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35,luceneAnalyzer); //config.setOpenMode(OpenMode.CREATE);//普通索引 config.setOpenMode(OpenMode.APPEND); //增量索引 第一次创建索引 用这个会出错 IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexpath)),config); BufferedReader reader = null; try { String file_path="e://files//"; String file_name="我爱的世界.txt"; System.out.println(1); System.out.println(file_path+file_name); File file = new File(file_path+file_name);//需要创立索引的文件 //File file = new File("e://files//爱我的人.txt");//需要创立索引的文件 reader = new BufferedReader(new FileReader(file)); String tempString = null; int line = 1 ; System.out.println("创建索引开始....."); Date d1 = new Date(); while((tempString = reader.readLine()) != null) { Document doc1 = new Document(); doc1.add(new Field("id",""+line,Store.YES,Field.Index.ANALYZED)); doc1.add(new Field("content",tempString,Store.YES,Field.Index.ANALYZED)); doc1.add(new Field("file_name",file_name,Store.YES,Field.Index.ANALYZED)); indexWriter.addDocument(doc1); System.out.println("已创建 【" + line+ "】行"); line ++; } Date d2 = new Date(); System.out.println("创建索引完成!\n"); System.out.println("创建索引耗时:" + (d2.getTime()-d1.getTime()) +"ms"); reader.close(); }catch (IOException e) { System.out.println("Read Error!"); } indexWriter.close(); } public static void search() throws Exception{ String queryString = "妈妈"; String[] fields = {"id","content"}; QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, luceneAnalyzer); Query query = queryParser.parse(queryString); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexpath))); //IndexReader reader = IndexReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector results = TopScoreDocCollector.create(10, false); Date dt1 = new Date(); System.out.println("开始查询时间 :" +dt1.getTime()); System.out.println("查询关键字 : "+ queryString); searcher.search(query, results); Date dt2 = new Date(); System.out.println("结束查询时间 :" +dt2.getTime()); System.out.println(); System.out.println("查询耗时 :" + (dt2.getTime()-dt1.getTime()) + "ms"); TopDocs topDocs = results.topDocs(0, 10); //显示查询结果前10条记录 System.out.println("命中数: " + topDocs.totalHits); for(int j=0 ; j<topDocs.scoreDocs.length; j++) { ScoreDoc scoreDoc = topDocs.scoreDocs[j]; Document doc = searcher.doc(scoreDoc.doc); System.out.println(doc); System.out.println("文件名:"+ doc.get("file_name") ); System.out.println("第 "+ doc.get("id") +" 行"); System.out.println("内容: " + doc.get("content")); } } }
- lucene3.6 中文分词 文件索引
- Lucene3与中文分词
- lucene3 中文IKAnalyzer分词例子
- lucene3 中文IKAnalyzer分词例子
- lucene3.0+版本中文分词测试+搜索结果+创建索引测试
- Lucene3 分词
- 【Lucene3.6.2入门系列】第14节_SolrJ操作索引和搜索文档以及整合中文分词
- 【Lucene3.6.2入门系列】第14节_SolrJ操作索引和搜索文档以及整合中文分词
- Lucene3.3、Lucene3.4中文分词——庖丁解牛分词实例
- Lucene3.6之索引CRUD
- lucene 中文分词 内存索引
- Lucene3.0分词系统
- lucene3.0入门(索引的文件写入+索引关键词查找)
- 3.Lucene3.x API分析,Director 索引操作目录,Document,分词器
- Lucene3.4索引文件创建过程(有源码)
- 【Lucene3.6.2入门系列】第04节_中文分词器
- 【Lucene3.6.2入门系列】第04节_中文分词器
- lucene3.0分词结果显示
- Antenna Impedance
- test
- C++中通过溢出覆盖虚函数指针列表执行代码
- 梦境
- Arrays的toString()方法
- lucene3.6 中文分词 文件索引
- hdu 2222
- replace/replace_if详解
- 改变apache的端口
- Android游戏开发系统控件-TabSpec与TabHost
- replace_copy/replace_copy_if
- 【内存池系列】内存池设计与应用
- lucene 中文分词 内存索引
- Android中实现滑动翻页—使用ViewFlipper