lucene 4.6 为数据库建立增量索引

来源:互联网 发布:mac chrome 网银插件 编辑:程序博客网 时间:2024/05/17 07:20
lucene 4.6 为数据库建立增量索引

首先去官网下载lucene地址:http://www.apache.org/dyn/closer.cgi/lucene/java/4.6.0

下载IK分词源码,地址 :  http://code.google.com/p/ik-analyzer/downloads/list

添加lucene jar包:


导入分词源码:



这里以一个商品表为例,对该表建立索引,并进行查询

商品表对应的bean:
 
public class Goods implements java.io.Serializable {// Fieldsprivate Integer id;private String name;private String describe;private Timestamp uploadTime;private Double price;private Integer newOld;private String imageName;private Integer userId;private Integer specialGoodsId;private Boolean state;private Integer needSpecialGoodsId;private String needName;        //....省略getter和setter


为方便以后为数据库其他表建索引,采用模板模式,建一个抽象类,把建索引的方法和
转化为bean的方法写成抽象方法,并使用泛型,方便子类继承,代码如下
:
package com.sms.web.lucene;import java.io.File;import java.io.IOException;import java.io.StringReader;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;public abstract class LuceneSearch<T> {public File indexDir; // 存放索引文件的目录protected static Analyzer analyzer = new IKAnalyzer(); // 分词器public LuceneSearch(File indexDir) {this.indexDir = indexDir;}/** * 为数据库检索数据创建索引 *  * @param <T> */public void createIndex(List<T> items) {Directory directory = null;IndexWriter indexWriter = null;try {directory = FSDirectory.open(indexDir);IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);// 设置打开索引模式为创建或追加indexWriter = new IndexWriter(directory, config);// 装配成documentList<Document> docs = getDoc(items);for (Document doc : docs) {indexWriter.addDocument(doc);}} catch (IOException e) {e.printStackTrace();} finally {if (indexWriter != null)try {indexWriter.close();} catch (IOException e) {e.printStackTrace();}if (directory != null) {try {directory.close();} catch (IOException e) {e.printStackTrace();}}}}/** * 搜索索引 *  * @param queryStr * @param queryField * @return */public List<T> search(String queryStr, String queryField) {List<T> hitItem = null;IndexReader reader = null;IndexSearcher indexSearcher = null;try {reader = DirectoryReader.open(FSDirectory.open(indexDir));indexSearcher = new IndexSearcher(reader);// analyzer = new IKAnalyzer();QueryParser parser = new QueryParser(Version.LUCENE_46, queryField, analyzer);Query query = parser.parse(queryStr);ScoreDoc[] hits = indexSearcher.search(query, 100).scoreDocs;hitItem = toBean(indexSearcher, query, hits);} catch (Exception e) {e.printStackTrace();} finally {if (reader != null)try {reader.close();} catch (IOException e) {e.printStackTrace();}}return hitItem;}/** * 装配成document对象 *  * @param goods * @return */public abstract List<Document> getDoc(List<T> items);/** * 将搜索结果还原成Bean *  * @param indexSearcher * @param query * @param hits * @return */public abstract List<T> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits);/** * 高亮设置 *  * @param query * @param doc * @param field * @return */protected String toHighlighter(Query query, Document doc, String field) {try {SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));return highlighterStr == null ? doc.get(field) : highlighterStr;} catch (IOException e) {e.printStackTrace();} catch (InvalidTokenOffsetsException e) {e.printStackTrace();}return null;}}

构建一个子类,继承上面的抽象类,实现器抽象方法:
package com.sms.web.lucene;import java.io.File;import java.io.IOException;import java.sql.Timestamp;import java.util.ArrayList;import java.util.List;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.FieldType;import org.apache.lucene.document.StoredField;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import com.sms.web.model.Goods;public class SearchGood extends LuceneSearch<Goods> {public SearchGood(File indexDir) {super(indexDir);}@Overridepublic List<Document> getDoc(List<Goods> goods) {List<Document> docs = new ArrayList<Document>();FieldType ftIndex = new FieldType(); // 索引类型ftIndex.setIndexed(true); // 设置索引为trueftIndex.setStored(true); // 设置保存为trueftIndex.setTokenized(true); // 设置分词为truefor (Goods good : goods) {Document doc = new Document();Document docNeedName = new Document();Field name = new Field("name", good.getName(), ftIndex);Field needName = new Field("needName", good.getNeedName(), ftIndex);Field id = new StoredField("id", good.getId());Field describe = new StoredField("describe", good.getDescribe());Field uploadTime = new StoredField("uploadTime", good.getUploadTime().toString());Field price = new StoredField("price", good.getPrice());Field newOld = new StoredField("newOld", good.getNewOld());Field imageName = new StoredField("imageName", good.getImageName());Field userId = new StoredField("userId", good.getUserId());Field specialGoodsId = new StoredField("specialGoodsId", good.getSpecialGoodsId());Field state = new StoredField("state", good.getState().toString());Field needSpecialGoodsId = new StoredField("needSpecialGoodsId", good.getNeedSpecialGoodsId());//混合名Field mixName1=new Field("mixName", good.getName(), ftIndex);Field mixName2=new Field("mixName", good.getNeedName(), ftIndex);doc.add(name);doc.add(needName);doc.add(id);doc.add(describe);doc.add(uploadTime);doc.add(price);doc.add(newOld);doc.add(imageName);doc.add(userId);doc.add(specialGoodsId);doc.add(state);doc.add(needSpecialGoodsId);doc.add(mixName1);docNeedName.add(mixName2);docs.add(doc);docs.add(docNeedName);}return docs;}@Overridepublic List<Goods> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits) {List<Goods> hitGoods = new ArrayList<Goods>();try {for (int i = 0; i < hits.length; i++) {Goods good = new Goods();Document hitDoc = indexSearcher.doc(hits[i].doc);String id = hitDoc.get("id");// 高亮关键字String name = toHighlighter(query, hitDoc, "name");// 高亮关键字String needName = toHighlighter(query, hitDoc, "needName");String describe = hitDoc.get("describe");String uploadTime = hitDoc.get("uploadTime");String price = hitDoc.get("price");String newOld = hitDoc.get("newOld");String imageName = hitDoc.get("imageName");String userId = hitDoc.get("userId");String specialGoodsId = hitDoc.get("specialGoodsId");String state = hitDoc.get("state");String needSpecialGoodsId = hitDoc.get("needSpecialGoodsId");good.setId(Integer.parseInt(id));good.setName(name);good.setDescribe(describe);good.setUploadTime(Timestamp.valueOf(uploadTime));good.setPrice(Double.parseDouble(price));good.setNewOld(Integer.parseInt(newOld));good.setImageName(imageName);good.setUserId(Integer.parseInt(userId));good.setSpecialGoodsId(Integer.parseInt(specialGoodsId));good.setState(Boolean.parseBoolean(state));good.setNeedSpecialGoodsId(Integer.parseInt(needSpecialGoodsId));good.setNeedName(needName);hitGoods.add(good);}} catch (IOException e) {e.printStackTrace();}return hitGoods;}}

对数据库建立索引:
从数据库中查找出所有的商品记录,调用上面的createIndex方法进行建索引,
建完索引后如何维护,也就是说以后数据库中记录发生变化后,索引如何进行更新?
对于数据库记录只有增加的情况来说,我们可以在表中加1个flag字段,标志是否已
为其建立了索引,建立索引时把其置为1,再把建索引的方法写成spring的定时任务
下次建索引时,只为flag为0的建增量索引。
但是对于数据库的更新,删除记录,如何使索引与其一致,我现在还没想到合适的
方法(菜鸟一个,功力不够啊!),
希望广大网友能够提点建议。。。这也是我写这篇博客最主要的目的。



1 0
原创粉丝点击