Lucene搜索实例
来源:互联网 发布:淘客程序源码一键部署 编辑:程序博客网 时间:2024/06/06 12:49
废话不多说上代码,一般企业级的综合搜索,多条件搜索完全够用
添加个效果图
- package com.lucene.util;
- import java.io.File;
- import java.io.IOException;
- import java.util.ArrayList;
- import org.apache.log4j.Logger;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field.Index;
- import org.apache.lucene.document.Field.Store;
- import org.apache.lucene.index.CorruptIndexException;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.index.LogByteSizeMergePolicy;
- import org.apache.lucene.index.LogMergePolicy;
- import org.apache.lucene.index.Term;
- import org.apache.lucene.search.BooleanQuery;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.NumericRangeQuery;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopScoreDocCollector;
- import org.apache.lucene.search.BooleanClause.Occur;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.QueryScorer;
- import org.apache.lucene.search.highlight.SimpleFragmenter;
- import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
- import org.apache.lucene.search.highlight.TokenSources;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.LockObtainFailedException;
- import org.apache.lucene.util.Version;
- import org.apache.lucene.document.Field;
- import org.springframework.context.ApplicationContext;
- import org.springframework.context.support.ClassPathXmlApplicationContext;
- import org.wltea.analyzer.lucene.IKQueryParser;
- import org.wltea.analyzer.lucene.IKSimilarity;
- import com.lucene.LuceneConfig;
- import com.lucene.data.LuceneData;
- import com.model.Model;
- import com.model.Novel;
- import com.service.NovelService;
- /**
- * lucene工具类
- *
- * @author Administrator
- *
- */
- public class LuceneUtil {
- /**
- * 日志
- */
- static Logger logger = Logger.getLogger(LuceneUtil.class);
- public static Integer totalNum=0;
- /**
- * 创建索引
- * @param data 要放入索引的一条记录
- * @return
- */
- public static synchronized boolean createIndex(LuceneData data) {
- IndexWriter indexWriter = null;
- Directory d = null;
- try {
- d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH));
- IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,
- AnalyzerUtil.getIkAnalyzer());
- // 3.6以后不推荐用optimize,使用LogMergePolicy优化策略
- conf.setMergePolicy(optimizeIndex());
- // 创建索引模式:CREATE,覆盖模式; APPEND,追加模式
- File file = new File(LuceneConfig.INDEX_PATH);
- File[] f = file .listFiles();
- if(f.length==0)
- conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
- else
- conf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
- indexWriter = new IndexWriter(d, conf);
- //因为id是唯一的,如果之前存在就先删除原来的,在创建新的
- Term term = new Term("id", data.getId());
- indexWriter.deleteDocuments(term);
- Document doc = getDocument(data);
- indexWriter.addDocument(doc);
- logger.debug("索引结束,共有索引{}个" + indexWriter.numDocs());
- //System.out.println("索引结束,共有索引{}个" + indexWriter.numDocs()+":"+doc.get("id")+":"+doc.get("author"));
- // 自动优化合并索引文件,3.6以后不推荐用optimize,使用LogMergePolicy优化策略
- // indexWriter.optimize();
- indexWriter.commit();
- return true;
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } catch (LockObtainFailedException e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引不存在", e);
- } catch (Exception e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } finally {
- if (indexWriter != null) {
- try {
- indexWriter.close();
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } finally {
- try {
- if (d != null && IndexWriter.isLocked(d)) {
- IndexWriter.unlock(d);
- }
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("解锁异常", e);
- }
- }
- }
- }
- return false;
- }
- /**
- * 更新索引
- *
- * @param data
- * @return
- */
- public static boolean updateIndex(LuceneData data) {
- IndexWriter indexWriter = null;
- Directory d = null;
- try {
- d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH));
- while (d != null && IndexWriter.isLocked(d)) {// 如果文件锁住,等待解锁
- Thread.sleep(1000);
- logger.error("索引已经锁住,正在等待....");
- }
- IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_31,
- AnalyzerUtil.getIkAnalyzer());
- // 3.6以后不推荐用optimize,使用LogMergePolicy优化策略
- conf.setMergePolicy(optimizeIndex());
- indexWriter = new IndexWriter(d, conf);
- Term term = new Term("id", data.getId());
- // 不管更新与否,先删除原来的
- indexWriter.deleteDocuments(term);
- Document doc = getDocument(data);
- indexWriter.addDocument(doc);
- // indexWriter.optimize();
- indexWriter.commit();
- logger.debug("更新索引,文章ID为{}" + data.getId());
- logger.debug("共有索引{}个" + indexWriter.numDocs());
- return true;
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } catch (LockObtainFailedException e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引不存在", e);
- } catch (Exception e) {
- e.printStackTrace();
- logger.error("索引添加异常", e);
- } finally {
- if (indexWriter != null) {
- try {
- indexWriter.close();
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } finally {
- try {
- if (d != null && IndexWriter.isLocked(d)) {
- IndexWriter.unlock(d);
- }
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("解锁异常", e);
- }
- }
- }
- }
- return false;
- }
- /**
- * 根据id删除索引(id对应的那条document)
- *
- * @param id
- * document的id
- * @return
- */
- public static boolean deleteIndex(String id) {
- IndexWriter indexWriter = null;
- Directory d = null;
- try {
- d = FSDirectory.open(new File(LuceneConfig.INDEX_PATH));
- while (d != null && IndexWriter.isLocked(d)) {// 如果文件锁住,等待解锁
- Thread.sleep(1000);
- logger.error("索引已经锁住,正在等待....");
- }
- IndexWriterConfig indexWriterConfig = new IndexWriterConfig(
- Version.LUCENE_36, AnalyzerUtil.getIkAnalyzer());
- indexWriter = new IndexWriter(d, indexWriterConfig);
- Term term = new Term("id", id);
- indexWriter.deleteDocuments(term);
- indexWriter.optimize();
- indexWriter.commit();
- logger.debug("删除文章ID:{}的索引..." + id);
- logger.debug("共有索引{}个" + indexWriter.numDocs());
- indexWriter.close();
- return true;
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引删除异常", e);
- } catch (LockObtainFailedException e) {
- e.printStackTrace();
- logger.error("索引删除异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引不存在", e);
- } catch (Exception e) {
- e.printStackTrace();
- logger.error("索引删除异常", e);
- } finally {
- if (indexWriter != null) {
- try {
- indexWriter.close();
- } catch (CorruptIndexException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("索引关闭异常", e);
- } finally {
- try {
- if (d != null && IndexWriter.isLocked(d)) {
- IndexWriter.unlock(d);
- }
- } catch (IOException e) {
- e.printStackTrace();
- logger.error("解锁异常", e);
- }
- }
- }
- }
- return false;
- }
- /**
- * @param fileds 要查询的综合字段 ex【 new String[]{ "contentTitle", "contentContext","keywords"};】
- * @param occurs 要查询的字段出现可能 ex【new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD };】
- * @param keyWord 要查询的关键字
- * @param page 当前页
- * @param pageSize 分页数
- * @return
- */
- public static ArrayList<LuceneData> search(String[] fileds, Occur[] occurs,String keyWord,Integer page,Integer pageSize) {
- return search(fileds, occurs, keyWord,"","", page, pageSize);
- }
- /**
- * @param fileds 要查询的综合字段 ex【 new String[]{ "contentTitle", "contentContext","keywords"};】
- * @param occurs 要查询的字段出现可能 ex【new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD };】
- * @param keyWord 要查询的关键字
- * @param subType 主类型
- * @param type 主类型下的子类型
- * @param page 当前页
- * @param pageSize 分页数
- * @return
- */
- public static ArrayList<LuceneData> search(String[] fileds, Occur[] occurs,String keyWord,String bigtype,String subType,Integer page,Integer pageSize) {
- try {
- // ---------初始化---------------------------------------------------
- IndexReader reader = IndexReader.open(FSDirectory.open(new File(LuceneConfig.INDEX_PATH)));
- IndexSearcher searcher = new IndexSearcher(reader);
- // 在索引器中使用IKSimilarity相似度评估器
- searcher.setSimilarity(new IKSimilarity());
- // ----------设置过滤器------------------------------------------------
- BooleanQuery booleanquery = new BooleanQuery();
- // 综合查询 (查询条件1)
- Query likequery = IKQueryParser.parseMultiField(fileds, keyWord,occurs);
- booleanquery.add(likequery, Occur.MUST);
- //主类型过滤 (查询条件2)
- if(bigtype.length()>0)
- {
- Query subquery = IKQueryParser.parse("bigtype", bigtype);
- booleanquery.add(subquery, Occur.MUST);
- }
- //从类型过滤 (查询条件3)
- if(subType.length()>0)
- {
- Query subquery = IKQueryParser.parse("type", subType);
- booleanquery.add(subquery, Occur.MUST);
- }
- //过滤数字区间
- //NumericRangeQuery<Integer> spanquery = NumericRangeQuery.newIntRange("id", begin, end, true, true);
- //booleanquery.add(spanquery, Occur.MUST);
- //过滤时间区间(时间的getTime比大小)
- //NumericRangeQuery<Integer> spanquery = NumericRangeQuery.newLongRange("id", begin, end, true, true);
- //booleanquery.add(spanquery, Occur.MUST);
- //-------------过滤filter--------------------------------------------------
- //-------------设置权值(其中一个方法在doc创建Field时field.setBoost)--------------------
- //-------------排序--------------------------------------------------------
- /*多字段排序,设置在前面的会优先排序 //true:降序 false:升序
- * SortField[] sortFields = new SortField[3];
- * SortField top = new SortField("isTop", SortField.INT, true);
- * SortField hits = new SortField("contentHits", SortField.INT,true);
- * SortField pubtime = new SortField("publishTime",SortField.LONG, true);
- * sortFields[0] = top;
- * sortFields[1] = hits;
- * sortFields[2] = pubtime;
- * Sort sort = new Sort(sortFields);
- */
- //-------------搜索--------------------------------------------------------
- //分页查询,lucene不支持分页查询,因为查询速度很快,所以我们就设置查询上限
- TopScoreDocCollector topCollector = TopScoreDocCollector.create(page*pageSize, false);//上限
- searcher.search(booleanquery, topCollector);
- //查询结果的总数量
- totalNum=topCollector.getTotalHits();
- ScoreDoc[] docs = topCollector.topDocs((page - 1) * pageSize, pageSize).scoreDocs;//返回所需数据
- //高亮显示
- SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
- Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(booleanquery));
- highlighter.setTextFragmenter(new SimpleFragmenter(100));
- ArrayList<LuceneData> list = new ArrayList<LuceneData>();
- LuceneData data=null;
- for (ScoreDoc scdoc : docs) {
- Document document = searcher.doc(scdoc.doc);
- data=new LuceneData();
- //设置高壳
- TokenStream tokenStream=null;
- String name = document.get("name");
- tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "name", AnalyzerUtil.getIkAnalyzer());
- name = highlighter.getBestFragment(tokenStream, name);
- if(name==null)
- name=document.get("name");
- String author = document.get("author");
- tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "author", AnalyzerUtil.getIkAnalyzer());
- author = highlighter.getBestFragment(tokenStream, author);
- if(author==null)
- author=document.get("author");
- String outline = document.get("outline");
- tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "outline", AnalyzerUtil.getIkAnalyzer());
- outline = highlighter.getBestFragment(tokenStream, outline);
- if(outline==null)
- outline=document.get("outline");
- String type = document.get("type");
- tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "type", AnalyzerUtil.getIkAnalyzer());
- type = highlighter.getBestFragment(tokenStream, type);
- if(type==null)
- type=document.get("type");
- data.setId(document.get("id"));
- data.setName(name);
- data.setAuthor(author);
- data.setOutline(outline);
- data.setType(type);
- data.setTypeid(document.get("typeid")) ;
- data.setBigtype(document.get("bigtype"));
- data.setUpdateTime(document.get("updateTime"));
- data.setImgPath(document.get("imgPath"));
- data.setImgUrlPath(document.get("imgUrlPath"));
- data.setContent(document.get("content"));
- data.setLink_url(document.get("link_url"));
- data.setHot(Long.parseLong(document.get("hot")));
- data.setClickPoint(Long.parseLong(document.get("clickPoint")));
- list.add(data);
- }
- return list;
- } catch (Exception e) {
- e.printStackTrace();
- logger.error("搜索异常", e);
- return new ArrayList<LuceneData>();
- }
- }
- /**
- * 把传入的数据类型转换成Document
- *
- * @param data
- * @return
- */
- private static Document getDocument(LuceneData data) {
- Document doc = new Document();
- doc.add(new Field("id", data.getId(), Store.YES, Index.NOT_ANALYZED));
- doc.add(new Field("name", data.getName(), Store.YES, Index.ANALYZED));
- doc.add(new Field("author", data.getAuthor(), Store.YES,Index.ANALYZED));
- doc.add(new Field("outline", data.getOutline(), Store.YES,Index.ANALYZED));
- doc.add(new Field("type", data.getType(), Store.YES, Index.ANALYZED));
- doc.add(new Field("updateTime", data.getUpdateTime(), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("imgPath", data.getImgPath(), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("imgUrlPath", data.getImgUrlPath()==null?"":data.getImgUrlPath(), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("content", data.getContent()==null?"":data.getContent(), Store.YES,Index.ANALYZED));
- doc.add(new Field("link_url", data.getLink_url(), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("hot", Long.toString(data.getHot()), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("clickPoint", Long.toString(data.getClickPoint()),Store.YES, Index.NOT_ANALYZED));
- doc.add(new Field("bigtype", data.getBigtype(), Store.YES,Index.NOT_ANALYZED));
- doc.add(new Field("typeid", data.getTypeid(), Store.YES,Index.NOT_ANALYZED));
- return doc;
- }
- /**
- * 优化索引,返回优化策略
- *
- * @return
- */
- private static LogMergePolicy optimizeIndex() {
- LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
- // 设置segment添加文档(Document)时的合并频率
- // 值较小,建立索引的速度就较慢
- // 值较大,建立索引的速度就较快,>10适合批量建立索引
- // 达到50个文件时就和合并
- mergePolicy.setMergeFactor(50);
- // 设置segment最大合并文档(Document)数
- // 值较小有利于追加索引的速度
- // 值较大,适合批量建立索引和更快的搜索
- mergePolicy.setMaxMergeDocs(5000);
- // 启用复合式索引文件格式,合并多个segment
- mergePolicy.setUseCompoundFile(true);
- return mergePolicy;
- }
- /**
- * 转换类型成lucene的data类型
- * @param list
- * @return
- */
- public static ArrayList<LuceneData> transformation_Novel(ArrayList<Novel> list){
- ArrayList<LuceneData> transforlist=new ArrayList<LuceneData>();
- LuceneData data=new LuceneData();
- for(Model model : list)
- {
- if(model instanceof Novel)
- {
- data=new LuceneData();
- Novel novel=(Novel)model;
- data.setId(novel.getId()+"");
- data.setName(novel.getName());
- data.setAuthor(novel.getAuthor());
- data.setOutline(novel.getOutline());
- data.setType(novel.getNovelType().getName());
- data.setTypeid(novel.getNovelType().getId()+"");
- data.setBigtype("小说");
- data.setUpdateTime(novel.getUpdateTime()+"");
- data.setImgPath(novel.getImgPath());
- data.setImgUrlPath(novel.getImgUrlPath());
- data.setContent(novel.getContent());
- data.setLink_url(novel.getLink_url());
- data.setHot(novel.getHot());
- data.setClickPoint(novel.getClickPoint());
- transforlist.add(data);
- }
- }
- return transforlist;
- }
- /**
- * 测试
- * @param args
- */
- public static void main(String[] args)
- {
- //---------------------创建
- // ApplicationContext springContext = new ClassPathXmlApplicationContext(new String[]{"classpath:com/springResource/*.xml"});
- // NovelService novelService = (NovelService)springContext.getBean("novelService");
- // System.out.println("novelService"+novelService);
- //
- // ArrayList<Novel> list=novelService.getNovelList(21, 100);
- // ArrayList<LuceneData> transforlist=LuceneService.transformation(list);
- // for(LuceneData data : transforlist)
- // {
- // System.out.println("in"+LuceneService.createIndex(data));
- // }
- //---------------------搜索
- String[] fileds=new String[]{ "name", "author","outline","type"};
- Occur[] occurs=new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD ,Occur.SHOULD };
- ArrayList<LuceneData> list=LuceneUtil.search(fileds, occurs, "初雪", 1, 10);
- for(LuceneData data:list)
- {
- System.out.println(data);
- System.out.println(data.getId()+":"+data.getAuthor());
- }
- System.out.println(list.size());
- }
- }
- package com.lucene.util;
- import org.apache.lucene.analysis.Analyzer;
- import org.wltea.analyzer.lucene.IKAnalyzer;
- /**
- * 分词器工具,设定分词器
- * @author Administrator
- *
- */
- public class AnalyzerUtil {
- private static Analyzer analyzer;
- public static Analyzer getIkAnalyzer() {
- if (analyzer == null) {
- // 当为true时,分词器迚行最大词长切分 ;当为false时,分词器迚行最细粒度切
- analyzer = new IKAnalyzer(true);
- }
- return analyzer;
- }
- }
- package com.lucene.data;
- /**
- * 数据类
- * @author Administrator
- *
- */
- public class LuceneData {
- private String id;
- private String name;
- private String author;
- private String imgPath;
- private String outline; //描述
- private String type; //类型
- private String typeid;//类型 id
- private String bigtype; // 总类型
- private String updateTime;
- private String imgUrlPath;
- private String content;
- private String link_url;
- private Long hot=0l;
- private Long clickPoint=0l;
- public String getId() {
- return id;
- }
- public void setId(String id) {
- this.id = id;
- }
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public String getAuthor() {
- return author;
- }
- public void setAuthor(String author) {
- this.author = author;
- }
- public String getImgPath() {
- return imgPath;
- }
- public void setImgPath(String imgPath) {
- this.imgPath = imgPath;
- }
- public String getOutline() {
- return outline;
- }
- public void setOutline(String outline) {
- this.outline = outline;
- }
- public String getType() {
- return type;
- }
- public void setType(String type) {
- this.type = type;
- }
- public String getUpdateTime() {
- return updateTime;
- }
- public void setUpdateTime(String updateTime) {
- this.updateTime = updateTime;
- }
- public String getImgUrlPath() {
- return imgUrlPath;
- }
- public void setImgUrlPath(String imgUrlPath) {
- this.imgUrlPath = imgUrlPath;
- }
- public String getContent() {
- return content;
- }
- public void setContent(String content) {
- this.content = content;
- }
- public String getLink_url() {
- return link_url;
- }
- public void setLink_url(String linkUrl) {
- link_url = linkUrl;
- }
- public Long getHot() {
- return hot;
- }
- public void setHot(Long hot) {
- this.hot = hot;
- }
- public Long getClickPoint() {
- return clickPoint;
- }
- public void setClickPoint(Long clickPoint) {
- this.clickPoint = clickPoint;
- }
- public String getBigtype() {
- return bigtype;
- }
- public void setBigtype(String bigtype) {
- this.bigtype = bigtype;
- }
- @Override
- public String toString() {
- return "LuceneData [author=" + author + ", bigtype=" + bigtype
- + ", clickPoint=" + clickPoint + ", content=" + content
- + ", hot=" + hot + ", id=" + id + ", imgPath=" + imgPath
- + ", imgUrlPath=" + imgUrlPath + ", link_url=" + link_url
- + ", name=" + name + ", outline=" + outline + ", type=" + type
- + ", updateTime=" + updateTime + "]";
- }
- public String getTypeid() {
- return typeid;
- }
- public void setTypeid(String typeid) {
- this.typeid = typeid;
- }
- }
0 0
- lucene 搜索入门实例
- lucene搜索实例解析
- Lucene搜索实例
- 搜索篇:lucene简单实例
- 搜索篇:lucene简单实例
- lucene搜索简单的实例
- lucene 近实时搜索实例
- Lucene之完整搜索实例
- 搜索篇:lucene的简单实例
- Lucene之——搜索实例
- Lucene入门项目实例---比较Lucene方式搜索与传统String方式搜索的效率
- lucene(三) lucene搜索
- lucene搜索
- lucene 搜索
- Lucene搜索
- lucene 搜索
- Lucene 搜索
- lucene搜索
- Flume TailDir 基本流程
- .NET 4.0下使用 SignalR
- UIDocumentInteractionController之程序间文档共享
- 从myeclipse导入工程到eclipse中报错
- JAVA IO操作
- Lucene搜索实例
- 什么是0长数组?
- iOS内存优化常用方法
- springMVC实现多文件的上传
- Winodws删除目录失败--查找文件句柄没有关闭
- 为什么到北上广深
- QTcpSocket使用过程中的一些问题记录
- 浅谈Dialog样式Activty
- Java数组