15-使用NRTManager和TimeManager实现近实时搜索

来源:互联网 发布:html怎么用js设置条件 编辑:程序博客网 时间:2024/06/09 19:43

TestIndex.java


package org.itat.lucene.test;import org.itat.lucene.util.IndexUtil;import org.junit.Test;/** * @ProjectName:lucene_index * @ClassName:TestIndex * @Description:索引 * @date: 2015-4-20下午06:07:37 * @author: 半仙儿 * @version: V1.0 * @date:2015-4-20下午06:07:37 */public class TestIndex {/** *@MethodName:testIndex *@Description:创建索引 *@author:半仙儿 *@return void *@date:2015-4-27下午01:56:51 */@Testpublic void testIndex() {IndexUtil iu = new IndexUtil();iu.index();}/** *@MethodName:testQuery *@Description:查询索引有多少条 numDocs\maxDocs\deleteDocs *@author:半仙儿 *@return void *@date:2015-4-27下午01:56:44 */@Testpublic void testQuery() {IndexUtil iu = new IndexUtil();iu.query();}/** *@MethodName:testSearch02 *@Description:NRT搜索 *@author:半仙儿 *@return void *@date:2015-4-27下午02:13:11 */@Testpublic void testSearch02() {IndexUtil iu = new IndexUtil();for (int i = 0; i < 5; i++) {// 进行查询iu.searcher02();System.out.println("------------------------------------");// 删除id为11的iu.delete();if (i == 2) {// 更新iu.update();}try {Thread.sleep(2000);} catch (Exception e) {e.printStackTrace();}}// 提交iu.commit();}}
IndexUtil.java

package org.itat.lucene.util;import java.io.File;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.GregorianCalendar;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.NRTManager;import org.apache.lucene.search.NRTManagerReopenThread;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.SearcherManager;import org.apache.lucene.search.SearcherWarmer;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexUtil {private String[] ids = { "1", "2", "3", "4", "5", "6" };private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org","dd@sina.org", "ee@zttc.edu", "ff@itat.org" };private String[] contents = { "welcome to visited the space,I like book","hello boy,I like pingpeng ball", "my name is cc I like game","I like football", "I like football and I like basketball too","I like movie and swim" };// 邮件日期private Date[] dates = null;private int[] attachs = { 2, 3, 1, 4, 5, 5 };private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike","jake" };private Directory directory = null;// NRT搜索private SearcherManager mgr = null;private NRTManager nrtMgr = null;private IndexWriter writer = null;// 加权private Map<String, Float> scores = new HashMap<String, Float>();// 构造方法实例化Directorypublic IndexUtil() {try {setDates();scores.put("itat.org", 2.0f);scores.put("zttc.edu", 1.5f);directory = FSDirectory.open(new File("d:/lucene/index02"));// mgr = new SearcherManager(directory, new SearcherWarmer() {// @Override// public void warm(IndexSearcher s) throws IOException {// System.out.println("has changed");// }// }, Executors.newCachedThreadPool());writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));nrtMgr = new NRTManager(writer, new SearcherWarmer() {@Overridepublic void warm(IndexSearcher arg0) throws IOException {System.out.println("reopen");}});// 0.0025*10000=25秒重新打开一次searcherNRTManagerReopenThread reopen = new NRTManagerReopenThread(nrtMgr,5.0, 0.025);// 后台进程reopen.setDaemon(true);reopen.setName("Nrt Manager Reopen Thread");// 启动NRTManager的reopen线程reopen.start();// true表示引起索引的更新,该提交的进行提交mgr = nrtMgr.getSearcherManager(true);} catch (Exception e) {e.printStackTrace();}}/** *@MethodName:delete *@Description:删除索引 *@author:半仙儿 *@return void *@date:2015-4-14上午09:58:24 */public void delete() {try {// 删除全部索引// writer.deleteAll();nrtMgr.deleteDocuments(new Term("id", "11"));} catch (Exception e) {e.printStackTrace();}}/** *@MethodName:update *@Description:更新索引 *@author:半仙儿 *@return void *@date:2015-4-14上午09:58:42 */public void update() {try {// lucene的更新是先删除再添加Document doc = new Document();doc.add(new Field("id", "11", Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email", emails[0], Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("content", contents[0], Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name", names[0], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));nrtMgr.updateDocument(new Term("id", "1"), doc);} catch (Exception e) {e.printStackTrace();}}/** *@MethodName:query *@Description:查询索引 *@author:半仙儿 *@return void *@date:2015-4-14上午09:58:52 */public void query() {try {IndexReader reader = IndexReader.open(directory);// 通过reader可以有效的获取到文档的数量System.out.println("numDocs:" + reader.numDocs());System.out.println("maxDocs:" + reader.maxDoc());System.out.println("deleteDocs:" + reader.numDeletedDocs());reader.close();} catch (Exception e) {e.printStackTrace();}}/** *@MethodName:index *@Description:创建索引 *@author:半仙儿 *@return void *@date:2015-4-14上午09:59:01 */public void index() {IndexWriter writer = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));Document doc = null;// 清空writer.deleteAll();for (int i = 0; i < ids.length; i++) {doc = new Document();doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email", emails[i], Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("content", contents[i], Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name", names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i]));doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));// 获取邮箱后缀String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);System.out.println(et);if (scores.containsKey(et)) {doc.setBoost(scores.get(et));} else {doc.setBoost(0.5f);}writer.addDocument(doc);}} catch (Exception e) {e.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (Exception e) {e.printStackTrace();}}}}public void setDates() {SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");dates = new Date[ids.length];try {dates[0] = sdf.parse("2010-02-19");dates[1] = sdf.parse("2012-01-11");dates[2] = sdf.parse("2010-09-19");dates[3] = sdf.parse("2010-12-22");dates[4] = sdf.parse("2012-01-01");dates[5] = sdf.parse("2011-05-19");} catch (Exception e) {e.printStackTrace();}}/** *@MethodName:formateNumToDateString *@Description:将字符串的数字格式的日期转换为日期格式 *@param num *@author:半仙儿 *@return String *@date:2015-4-14下午04:03:18 */public String formateNumToDateString(Long num) {Date dat = new Date(num);GregorianCalendar gc = new GregorianCalendar();gc.setTime(dat);java.text.SimpleDateFormat format = new java.text.SimpleDateFormat("yyyy-MM-dd");String sb = format.format(gc.getTime());return sb;}/** *@MethodName:searcher02 *@Description:NTR实时搜索 *@author:半仙儿 *@return void *@date:2015-4-27下午03:04:17 */public void searcher02() {// 使用searcherManager进行获取IndexSearcher------NRTIndexSearcher searcher = mgr.acquire();try {// 是否要重新打开一个searcher// mgr.maybeReopen();TermQuery query = new TermQuery(new Term("content", "like"));TopDocs tds = searcher.search(query, 10);for (ScoreDoc sd : tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id") + "---->" + doc.get("name")+ "[" + doc.get("email") + "]-->" + doc.get("id") + ","+ doc.get("attach") + "," + doc.get("date"));}} catch (Exception e) {e.printStackTrace();} finally {try {// 释放mgr.release(searcher);} catch (IOException e) {e.printStackTrace();}}}/** *@MethodName:commit *@Description:提交操作 *@author:半仙儿 *@return void *@date:2015-4-27下午02:55:56 */public void commit() {try {writer.commit();} catch (Exception e) {e.printStackTrace();}}}



0 0