luence之全文检索

来源:互联网 发布:刘义军 知乎 编辑:程序博客网 时间:2024/04/28 17:41

1.简介:

lucene是一个设计非常优秀的软件,优秀在于简单易用,它屏蔽了复杂的实现过程,只要使用它提供的一些类和相应的api就能进行全文检索,并提供了hits分页功能。是不是很激动?现在我们就一起走进luence吧。微笑

2.下面看一个集成lucene经典案例图

3.有人到这时会问lucene到底会做些什么呢:

其实使用lucene可以在应用程序中添加索引和搜索功能,例如百度,这样可以提高文件检索效率

4.现在就做一个lucene的创建索引和搜索的示例

a.首先建一个普通的javaproject

b.其次在官网下载lucene包把lucene-analyzers-common-4.4.0.jar、lucene-core-4.4.0.jar、lucene-queryparser-4.4.0.jar三个jar导入项目

c.最后献上示例代码

package luence;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.TextField;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class CreateIndex {/** * dir目录 */private static Directory dir;/** * 索引文件存放位置 */private static final String  pathFile = "E:\\luence";/** * 创建索引 获得IndexWriter对象 */protected static IndexWriter getWriter() throws Exception {Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);return new IndexWriter(dir, iwc);}/** * 创建索引 * @param map */public static void createIndex(Map<String, Object> map) {System.out.println("createIndex进");String text = "";System.out.println("createIndexpath=="+pathFile);File file = new File(pathFile);IndexWriter writer = null;try {if(!file.isDirectory()){file.mkdirs();}dir = FSDirectory.open(new File(pathFile));writer = getWriter();// 定义搜索体if (map != null) {Document doc = new Document();for (Entry<String, Object> e : map.entrySet()) {text += e.getValue() + " ";doc.add(new TextField(e.getKey(), e.getValue().toString(),Store.YES));}doc.add(new TextField("content", text, Store.YES));//content为收缩域writer.addDocument(doc);}System.out.println("content"+"=" + text);System.out.println("init ok?");} catch (Exception e) {e.printStackTrace();} finally {try {if (writer != null) {// 关闭索引writer.close();}} catch (IOException e) {e.printStackTrace();}}}/** *查询,这里返回的是map集合 */public static List<Map<String, Object>> createSearch(String content,String searchIndex,String[] strArr) {System.out.println("createSearch 进");// 定义索引路径System.out.println("createSearchpath="+pathFile);Directory dir = null;IndexReader reader = null;List<Map<String, Object>> retuList = new ArrayList<Map<String, Object>>();// 返回结果集try {dir = FSDirectory.open(new File(pathFile));reader = DirectoryReader.open(dir);IndexSearcher searcher = new IndexSearcher(reader);Term term = new Term(content, searchIndex);System.out.println("sarchContent=" + content);// 创建模糊查询(还有很多种查询方式这里不再介绍,感兴趣的哥们可以查看相关资料《luence in action》)Query query = new FuzzyQuery(term);TopDocs topdocs = searcher.search(query, 100);//显示队列的Size为100ScoreDoc[] scoreDocs = topdocs.scoreDocs;System.out.println("查询结果总数:" + topdocs.totalHits);for (int i = 0; i < scoreDocs.length; i++) {Map<String, Object> retuMap = new HashMap<String, Object>();// 查询结果mapint doc = scoreDocs[i].doc;Document document = searcher.doc(doc);if(strArr != null){for (String para : strArr) {retuMap.put(para, document.get(para));//打印出搜索值(测试)System.out.println(para + "=" + document.get(para));}}retuList.add(retuMap);}} catch (IOException e) {e.printStackTrace();} finally {if (reader != null) {try {reader.close();//关闭流} catch (IOException e) {e.printStackTrace();}}}return retuList;}public static void main(String[] args) {//一组要创建索引的对象List<Map<String,Object>> listMap = new ArrayList<Map<String,Object>>();for (int i = 0; i < 6; i++) {//初始化对象(这里对象用map集合封装)Map<String,Object> map = new HashMap<String, Object>();map.put("code", "code" + i);map.put("city", "city" + i);listMap.add(map);}//批量创建索引        for (Map<String, Object> map2 : listMap) {        createIndex(map2);}        //查询        String[] strArr = {"code","city"};        System.out.println(createSearch("content", "code", strArr));}}

5.把上面代码贴在项目测试即可