分享下自己的lucene工具类

来源:互联网 发布:下列网络系统安全原则 编辑:程序博客网 时间:2024/05/20 09:43

分享下自己的lucene工具类,不足之处欢迎吐槽!

0.2版


package com.jiuxing.qa.util.lucene;import java.io.File;import java.io.IOException;import java.io.StringReader;import java.lang.reflect.InvocationTargetException;import java.lang.reflect.Method;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Arrays;import java.util.Date;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.regex.Matcher;import java.util.regex.Pattern;import java.util.regex.PatternSyntaxException;import net.paoding.analysis.analyzer.PaodingAnalyzer;import net.paoding.analysis.knife.Paoding;import net.paoding.analysis.knife.PaodingMaker;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;import com.jiuxing.qa.util.PropertyUtil;/** * luence操作工具类 提供索引创建、查询功能 lucene vsrsion 3.6.1 *  * @author jiaojun [junjiao.j@gmail.com] * @version v0.0.2 * @param <T> * @date 2012-08-20 */public class LuceneUtil<T> {private static Log log = LogFactory.getLog(LuceneUtil.class);/** * 索引优化后文件段的数量,数量越大,优化效率越大 */private static final int DEFAULT_MAX_NUM_SEGMENTS = 3;/** * 低版本的查询索引存活周期 */private static final long STALE_INDEXREADER_SURVIVAL_TIME = 60000;private static Map<String, IndexWriter> writerPool = new HashMap<String, IndexWriter>();private static Map<String, IndexReader> readerPool = new HashMap<String, IndexReader>();/** * 存放IndexReader的Map,Map里存放的都是已经实例化好的IndexReader */private static Map<Long, IndexReader> stalereaderPool = new HashMap<Long, IndexReader>();private static LuceneUtil util = null;private LuceneUtil() {}public synchronized static LuceneUtil getInstance() {if (util == null) {util = new LuceneUtil();}return util;}static {init();}/** * 始化索引池初 */public static void init() {log.info("索引池初始化开始");String indexDir = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.dir");String pool = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.pool");for (String poolDir : pool.split(",")) {synchronized (writerPool) {try {IndexWriter iw = createIndexWriter(indexDir + poolDir);if (iw != null)writerPool.put(poolDir, iw);} catch (IOException e) {log.error("writerPool初始化失败,原因:" + e.getMessage());}}synchronized (readerPool) {try {IndexReader ir = IndexReader.open(FSDirectory.open(getIndexFile(indexDir + poolDir)),false);if (ir != null)readerPool.put(poolDir, ir);} catch (Exception e) {log.error("readerPool初始化失败,原因:" + e.getMessage());}}}log.info("索引池初始化完成");}/** * 创建索引池初始化 */public static void initIndexWriter() {log.info("【创建索引池】初始化开始");String indexDir = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.dir");String pool = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.pool");for (String poolDir : pool.split(",")) {synchronized (writerPool) {try {IndexWriter iw = createIndexWriter(indexDir + poolDir);if (iw != null)writerPool.put(poolDir, iw);} catch (IOException e) {log.error("writerPool初始化失败,原因:" + e.getMessage());}}}log.info("【创建索引池】初始化完成");}/** * 创建索引,建议定时更新即可 *  * @param <T> * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param list *            需要创建索引的数据 * @param clz *            数据绑定的对象 * @param fields *            须创建索引的属性(小写) * @throws IOException * @throws NoSuchMethodException * @throws SecurityException * @throws InvocationTargetException * @throws IllegalAccessException * @throws IllegalArgumentException */public static <T> void createIndex(String indexDir, String poolDir,List<?> list, Class<T> clz, String[] fields) throws IOException,SecurityException, NoSuchMethodException, IllegalArgumentException,IllegalAccessException, InvocationTargetException {createIndex(indexDir,poolDir,list,clz, fields,false);}/** * 创建索引,建议定时更新即可 *  * @param <T> * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param list *            需要创建索引的数据 * @param clz *            数据绑定的对象 * @param fields *            须创建索引的属性(小写) * @param isDel  *   是否删除原索引重新创建 * @throws IOException * @throws NoSuchMethodException * @throws SecurityException * @throws InvocationTargetException * @throws IllegalAccessException * @throws IllegalArgumentException */public static <T> void createIndex(String indexDir, String poolDir,List<?> list, Class<T> clz, String[] fields,boolean isDel) throws IOException,SecurityException, NoSuchMethodException, IllegalArgumentException,IllegalAccessException, InvocationTargetException {log.info("索引开始创建,服务于 " + clz + " | " + fields.toString());long start = new Date().getTime();IndexWriter writer = getIndexWriter(indexDir, poolDir);if (null == writer) {log.error("IndexWriter获取失败");return;}// 删除全部索引if(isDel){//writer.deleteAll();}SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");if (null != list && list.size() > 0) {for (int i = 0; i < list.size(); i++) {Document doc = new Document();java.lang.reflect.Field[] cfs = clz.getDeclaredFields();for (java.lang.reflect.Field cf : cfs) {String fieldName = cf.getName();String stringLetter = fieldName.substring(0, 1).toUpperCase();String getName = "get" + stringLetter+ fieldName.substring(1);// String setName="set"+stringLetter+fieldName.substring(1);Method getMethod = clz.getMethod(getName);// Method setMethod=clz.getMethod(setName, new// Class[]{cf.getType()});Object value = getMethod.invoke((T) list.get(i));if (Arrays.asList(fields).contains(fieldName)) {if (value != null && !"".equals(value.toString())) {String tmp = "";if (cf.getGenericType().toString().equals("class java.util.Date")) {tmp = simpleDateFormat.format(value);} else {tmp = value.toString();}doc.add(new Field(fieldName, tmp, Field.Store.YES,Field.Index.ANALYZED));}}}if(!isDel){/** * 先将fields[0]的索引查找到,然后再删除,最后将新的索引添加到索引文件中 */if(null !=  doc.get(fields[0])){writer.updateDocument(new Term(fields[0], doc.get(fields[0])),doc);}}}log.info("索引创建完成,保存目录:" + indexDir + poolDir + ",索引创建/记录:"+ writer.maxDoc() + "/" + list.size() + "条,花费时间:"+ (new Date().getTime() - start) / 1000 + "秒!" + writer);list.clear();}writer.forceMerge(DEFAULT_MAX_NUM_SEGMENTS);writer.commit();}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, int pageSize, int currentPage)throws IOException, InvalidTokenOffsetsException {return searchPage(indexDir, poolDir, keyWords, fields, true, pageSize,currentPage);}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param isHighlighter *            是否高亮显示 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, boolean isHighlighter,int pageSize, int currentPage) throws IOException,InvalidTokenOffsetsException {return searchPage(indexDir, poolDir, keyWords, fields, true, pageSize,currentPage,true);}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param isHighlighter *            是否高亮显示 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @param isPage *            是否分页,如无需分页只查条数的话,直接传入条数即可,大大优化索引查询效率 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, boolean isHighlighter,int pageSize, int currentPage,boolean isPage) throws IOException,InvalidTokenOffsetsException {//将关键字中的特殊符号过滤if(null != keyWords && keyWords.length>0){String[] tmp = new String[keyWords.length];for(int i = 0;i<keyWords.length;i++){tmp[i] = stringFilter(keyWords[i]);}keyWords = tmp;}SearchResult searchResult = new SearchResult();IndexSearcher searcher = null;try {IndexReader reader = getIndexReader(indexDir, poolDir);if (null != reader) {reader = refreshIndexReader(poolDir, reader);}if(null == reader){log.error("索引文件为空,请检查!");return null;}searcher = new IndexSearcher(reader);searcher.setDefaultFieldSortScoring(true, false);Analyzer analyzer = getAnalyzer();if (keyWords[0].length() < 2) {analyzer = new StandardAnalyzer(Version.LUCENE_36);}Query query = MultiFieldQueryParser.parse(Version.LUCENE_36,keyWords, fields, analyzer);// query.setBoost(0.1f);/* * 1.被排序的字段必须被索引过(Indexecd),在索引时不能 用 Field.Index.TOKENIZED * (用UN_TOKENIZED可以正常实现.用NO时查询正常,但排序不能正常设置升降序) 2.SortField类型 * SCORE、DOC、AUTO、STRING、INT、FLOAT、CUSTOM 此类型主要是根据字段的类型选择 * 3.SortField的第三个参数代表是否是降序true:降序 false:升序 */Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE,new SortField(fields[0], SortField.STRING, true) });TopDocs topDocs = null;if(isPage){topDocs = searcher.search(query, searcher.maxDoc(), sort);}else{int searchNum = pageSize<searcher.maxDoc()?pageSize:searcher.maxDoc();topDocs = searcher.search(query, searchNum, sort);}ScoreDoc[] hits = topDocs.scoreDocs;int begin = pageSize * (currentPage - 1);int end = Math.min(begin + pageSize, hits.length);List<Document> documents = new ArrayList<Document>();for (int i = begin; i < end; i++) {Document document = searcher.doc(hits[i].doc);if (isHighlighter) {document.getField(fields[0]).setValue(toHighlighter(query, document, fields[0],analyzer));}documents.add(document);// hits[i].score 匹配度分值}searchResult.setPageSize(pageSize);searchResult.setCurrentPage(currentPage);searchResult.setDocuments(documents);searchResult.setTotalCount(hits.length);} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (ParseException e) {e.printStackTrace();}return searchResult;}/** * 使用Field信息来批量删除文档 * @description:   <br> * @author:jiaojun * @param indexDir * @param poolDir * @param field * @param keyWord * @throws IOException  * @throws CorruptIndexException  */public static void deleteIndex(String indexDir, String poolDir,String field,String keyWord) {IndexWriter writer = null;try {writer = getIndexWriter(indexDir, poolDir);} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}if (null == writer) {log.error("IndexWriter获取失败");return;}Term term=new Term(field,keyWord); //分别代表FieldName,和field的值。try {writer.deleteDocuments(term);writer.forceMerge(DEFAULT_MAX_NUM_SEGMENTS);writer.commit();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}//reader.close();//在调用close方法前的删除只是标记删除,只有调用了writer.optimize后才//是真正的在物理上删除,否则是可以使用reader.undeleteAll(),方法进行恢复的}/** * 释放索引 */public static void destroy() {synchronized (writerPool) {Iterator<Entry<String, IndexWriter>> iterator = writerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexWriter> entry = iterator.next();IndexWriter indexWriter = entry.getValue();try {indexWriter.commit();indexWriter.close();} catch (Exception e) {log.error("writerPool销毁失败,原因:" + e.getMessage());}}writerPool.clear();}synchronized (readerPool) {Iterator<Entry<String, IndexReader>> iterator = readerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexReader> entry = iterator.next();IndexReader indexReader = entry.getValue();try {indexReader.close();} catch (Exception e) {log.error("readerPool销毁失败,原因:" + e.getMessage());}}readerPool.clear();}}/** * 释放创建索引 */public static void destroyIndexWriter() {synchronized (writerPool) {Iterator<Entry<String, IndexWriter>> iterator = writerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexWriter> entry = iterator.next();IndexWriter indexWriter = entry.getValue();try {indexWriter.close();} catch (Exception e) {log.error("writerPool销毁失败,原因:" + e.getMessage());}}writerPool.clear();}log.info("【创建索引池】完成销毁");}/** * 释放旧查询索引 */public static void destroyIndexReader(Map<Long, IndexReader> readerPool) {synchronized (readerPool) {Iterator<Entry<Long, IndexReader>> iterator = readerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<Long, IndexReader> entry = iterator.next();if ((System.currentTimeMillis() - entry.getKey()) >= STALE_INDEXREADER_SURVIVAL_TIME) {IndexReader indexReader = entry.getValue();try {indexReader.close();log.info("【查询索引池】完成销毁" + entry.getValue());} catch (Exception e) {log.error("readerPool销毁失败,原因:" + e.getMessage());}}}readerPool.clear();}}/** * 刷新指定的indexReader--加载新的索引数据,若产生新的indexReader, * 则在indexReaderMap里替换旧的indexReader *  * @param indexDirName * @param indexReader * @return {@link IndexReader} */private synchronized static IndexReader refreshIndexReader(String poolDir,IndexReader indexReader) {try {destroyIndexReader(stalereaderPool);IndexReader newIndexReader = indexReader.reopen(false);if (newIndexReader != indexReader) {IndexReader oldIndexReader = indexReader;stalereaderPool.put(System.currentTimeMillis(), oldIndexReader);readerPool.put(poolDir, newIndexReader);}} catch (Exception e) {log.error("刷新索引失败" + e.getMessage());}// return newest IndexReaderreturn readerPool.get(poolDir);}/** * 过滤特殊符号 *  * @param str * @return * @throws PatternSyntaxException */public static String stringFilter(String str) throws PatternSyntaxException {String regEx = "[`~!@#$%^&*()+=|{ }':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?·\'\"\\-\t\n\r]";Pattern p = Pattern.compile(regEx);Matcher m = p.matcher(str);return m.replaceAll("").trim();}/** * 高亮设置 *  * @param query * @param doc * @param field * @return */private static String toHighlighter(Query query, Document doc,String field, Analyzer analyzer) {try {SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));highlighter.setTextFragmenter(new SimpleFragmenter(doc.get(field).length() + 100));TokenStream tokenStream = analyzer.tokenStream(field,new StringReader(doc.get(field)));String highlighterStr = highlighter.getBestFragment(tokenStream,doc.get(field));return highlighterStr == null ? doc.get(field) : highlighterStr;} catch (IOException e) {log.error(e.getMessage());} catch (InvalidTokenOffsetsException e) {log.error(e.getMessage());}return null;}@SuppressWarnings("static-access")private static IndexWriter getIndexWriter(String indexDir, String poolDir)throws CorruptIndexException, LockObtainFailedException,IOException {IndexWriter writer = writerPool.get(poolDir);if (writer == null) {synchronized (writerPool) {if (!writerPool.containsKey(poolDir)) {try {writer = createIndexWriter(indexDir + poolDir);if (writer != null)writerPool.put(poolDir, writer);} catch (IOException e) {if (IndexWriter.isLocked(FSDirectory.open(getIndexFile(indexDir + poolDir)))) {IndexWriter.unlock(FSDirectory.open(getIndexFile(indexDir + poolDir)));}log.error(e.getMessage());e.printStackTrace();destroy();}}}}return writer;}private static IndexReader getIndexReader(String indexDir, String poolDir)throws CorruptIndexException, IOException {IndexReader reader = readerPool.get(poolDir);synchronized (readerPool) {if (!readerPool.containsKey(poolDir)) {try {reader = IndexReader.open(FSDirectory.open(getIndexFile(indexDir + poolDir)),false);if (reader != null)readerPool.put(poolDir, reader);} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}return reader;}private static IndexWriter createIndexWriter(String dir)throws CorruptIndexException, LockObtainFailedException,IOException {/* * mmseg4j:ComplexAnalyzer 适用于高匹配度的中文 lucene标准:StandardAnalyzer */IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,getAnalyzer());/* * 创建索引模式:CREATE,覆盖模式; conf.setOpenMode(OpenMode.CREATE); *  * APPEND,追加模式 conf.setOpenMode(OpenMode.CREATE_OR_APPEND); */conf.setOpenMode(OpenMode.CREATE_OR_APPEND);if (IndexWriter.isLocked(FSDirectory.open(getIndexFile(dir)))) {IndexWriter.unlock(FSDirectory.open(getIndexFile(dir)));}IndexWriter writer = new IndexWriter(FSDirectory.open(getIndexFile(dir)), conf);return writer;}/** * 获取分词模式 paodingAnalyer Paoding paoding = PaodingMaker.make(); return * PaodingAnalyzer.writerMode(paoding); //writer mode意味要同时支持最大和最小切词 * lucene标准:StandardAnalyzer new StandardAnalyzer(Version.LUCENE_36); *  * @return */private static Analyzer getAnalyzer() {Paoding paoding = PaodingMaker.make();return PaodingAnalyzer.writerMode(paoding);}private static File getIndexFile(String dir) {return new File(new StringBuilder(new File(LuceneUtil.class.getResource("/").getPath()).getParentFile().getParentFile().getPath().replace('\\', '/').toString()).append(dir).toString());}public  static void main(String[] args) {System.out.println(stringFilter("[不懂就要问]请问 H6不能插u盘听歌吗  知道的说下  谢谢!"));// init();//for (int i = 0; i < 50; i++) {//new Thread(new Runnable() {////@Override//public void run() {//try {//Thread.currentThread().sleep(500);//} catch (InterruptedException e) {//e.printStackTrace();//}////IndexWriter close = null;//IndexWriter noClose = null;//IndexWriter searchLog = null;//try {//close = getIndexWriter("/WEB-INF/index/", "close");//noClose = getIndexWriter("/WEB-INF/index/", "noClose");//searchLog = getIndexWriter("/WEB-INF/index/",//"searchLog");////IndexReader readerc = getIndexReader("/WEB-INF/index/",//"close");//IndexReader readern = getIndexReader("/WEB-INF/index/",//"noClose");//IndexReader readers = getIndexReader("/WEB-INF/index/",//"searchLog");////// System.out.println(readerc);//// System.out.println(readern);//// System.out.println(readers);////} catch (CorruptIndexException e) {//e.printStackTrace();//} catch (LockObtainFailedException e) {//e.printStackTrace();//} catch (IOException e) {//e.printStackTrace();//}////if (close == null || noClose == null) {//System.out.println("-----------");//}//// System.out.println(close);//// System.out.println(noClose);//// System.out.println(searchLog);////}//}).start();//}////// destroy();}}





0.1版


package com.junjiao.util.search;import java.io.File;import java.io.IOException;import java.io.StringReader;import java.lang.reflect.InvocationTargetException;import java.lang.reflect.Method;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Arrays;import java.util.Date;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.regex.Matcher;import java.util.regex.Pattern;import java.util.regex.PatternSyntaxException;import net.paoding.analysis.analyzer.PaodingAnalyzer;import net.paoding.analysis.knife.Paoding;import net.paoding.analysis.knife.PaodingMaker;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;import com.junjiao.util.java.PropertyUtil;/** * luence操作工具类 提供索引创建、查询功能 lucene vsrsion 3.6.1 *  * 【索引的检查与修复】 * CheckIndex在lucene-core jar包的org.apache.lucene.index目录下。它的功能是检查索引的的健康情况和修复索引。<br/> * 如果检查出某些segments有错误, 可以通过-fix参数执行修复操作,修复的过程就是创建一个新的segments,把所有引 <br/> * 用错误segments的索引数据删除。 * *cd /var/www/virtualhost/qa.51auto.cn/WEB-INF/lib *java -cp /var/www/virtualhost/qa.51auto.cn/WEB-INF/lib/lucene-core-3.6.1.jar -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex /var/www/virtualhost/qa.51auto.cn/WEB-INF/index/all *检查 *java -cp lucene-core-3.6.1.jar -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex /var/www/virtualhost/qa.51auto.cn/WEB-INF/index/all *修复 *java -cp lucene-core-3.6.1.jar -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex /var/www/virtualhost/qa.51auto.cn/WEB-INF/index/tag -fix * @author jiaojun [junjiao.j@gmail.com] * @version v0.0.1 * @param <T> * @date 2012-08-20 */public class LuceneUtil<T> {private static Log log = LogFactory.getLog(LuceneUtil.class);/** * 索引优化后文件段的数量,数量越大,优化效率越大 */private static final int DEFAULT_MAX_NUM_SEGMENTS = 3;/** * 低版本的查询索引存活周期 */private static final long STALE_INDEXREADER_SURVIVAL_TIME = 60000;private static Map<String, IndexWriter> writerPool = new HashMap<String, IndexWriter>();private static Map<String, IndexReader> readerPool = new HashMap<String, IndexReader>();/** * 存放IndexReader的Map,Map里存放的都是已经实例化好的IndexReader */private static Map<Long, IndexReader> stalereaderPool = new HashMap<Long, IndexReader>();private static LuceneUtil util = null;private LuceneUtil() {}public synchronized static LuceneUtil getInstance() {if (util == null) {util = new LuceneUtil();}return util;}static {init();}/** * 始化索引池初 */public static void init() {log.info("索引池初始化开始");String indexDir = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.dir");String pool = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.pool");for (String poolDir : pool.split(",")) {synchronized (writerPool) {try {IndexWriter iw = createIndexWriter(indexDir + poolDir);if (iw != null)writerPool.put(poolDir, iw);} catch (IOException e) {log.error("writerPool初始化失败,原因:" + e.getMessage());}}synchronized (writerPool) {try {IndexReader ir = IndexReader.open(FSDirectory.open(getIndexFile(indexDir + poolDir)));if (ir != null)readerPool.put(poolDir, ir);} catch (Exception e) {log.error("readerPool初始化失败,原因:" + e.getMessage());}}}log.info("索引池初始化完成");}/** * 创建索引池初始化 */public static void initIndexWriter() {log.info("【创建索引池】初始化开始");String indexDir = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.dir");String pool = PropertyUtil.getPropertiesByKey("lucene.properties","lucene.index.pool");for (String poolDir : pool.split(",")) {synchronized (writerPool) {try {IndexWriter iw = createIndexWriter(indexDir + poolDir);if (iw != null)writerPool.put(poolDir, iw);} catch (IOException e) {log.error("writerPool初始化失败,原因:" + e.getMessage());}}}log.info("【创建索引池】初始化完成");}/** * 创建索引,建议定时更新即可 *  * @param <T> * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param list *            需要创建索引的数据 * @param clz *            数据绑定的对象 * @param fields *            须创建索引的属性(小写) * @throws IOException * @throws NoSuchMethodException * @throws SecurityException * @throws InvocationTargetException * @throws IllegalAccessException * @throws IllegalArgumentException */public static <T> void createIndex(String indexDir, String poolDir,List<?> list, Class<T> clz, String[] fields) throws IOException,SecurityException, NoSuchMethodException, IllegalArgumentException,IllegalAccessException, InvocationTargetException {createIndex(indexDir,poolDir,list,clz, fields,false);}/** * 创建索引,建议定时更新即可 *  * @param <T> * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param list *            需要创建索引的数据 * @param clz *            数据绑定的对象 * @param fields *            须创建索引的属性(小写) * @param isDel  *   是否删除原索引重新创建 * @throws IOException * @throws NoSuchMethodException * @throws SecurityException * @throws InvocationTargetException * @throws IllegalAccessException * @throws IllegalArgumentException */public static <T> void createIndex(String indexDir, String poolDir,List<?> list, Class<T> clz, String[] fields,boolean isDel) throws IOException,SecurityException, NoSuchMethodException, IllegalArgumentException,IllegalAccessException, InvocationTargetException {log.info("索引开始创建,服务于 " + clz + " | " + fields.toString());long start = new Date().getTime();IndexWriter writer = getIndexWriter(indexDir, poolDir);if (null == writer) {log.error("IndexWriter获取失败");return;}// 删除全部索引if(isDel){writer.deleteAll();}SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");if (null != list && list.size() > 0) {for (int i = 0; i < list.size(); i++) {Document doc = new Document();java.lang.reflect.Field[] cfs = clz.getDeclaredFields();for (java.lang.reflect.Field cf : cfs) {String fieldName = cf.getName();String stringLetter = fieldName.substring(0, 1).toUpperCase();String getName = "get" + stringLetter+ fieldName.substring(1);// String setName="set"+stringLetter+fieldName.substring(1);Method getMethod = clz.getMethod(getName);// Method setMethod=clz.getMethod(setName, new// Class[]{cf.getType()});Object value = getMethod.invoke((T) list.get(i));if (Arrays.asList(fields).contains(fieldName)) {if (value != null && !"".equals(value.toString())) {String tmp = "";if (cf.getGenericType().toString().equals("class java.util.Date")) {tmp = simpleDateFormat.format(value);} else {tmp = value.toString();}doc.add(new Field(fieldName, tmp, Field.Store.YES,Field.Index.ANALYZED));}}}if(!isDel){/** * 先将fields[0]的索引查找到,然后再删除,最后将新的索引添加到索引文件中 */if(null !=  doc.get(fields[0])){writer.updateDocument(new Term(fields[0], doc.get(fields[0])),doc);}}}log.info("索引创建完成,保存目录:" + indexDir + poolDir + ",索引创建/记录:"+ writer.maxDoc() + "/" + list.size() + "条,花费时间:"+ (new Date().getTime() - start) / 1000 + "秒!" + writer);list.clear();}writer.forceMerge(DEFAULT_MAX_NUM_SEGMENTS);writer.commit();}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, int pageSize, int currentPage)throws IOException, InvalidTokenOffsetsException {return searchPage(indexDir, poolDir, keyWords, fields, true, pageSize,currentPage);}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param isHighlighter *            是否高亮显示 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, boolean isHighlighter,int pageSize, int currentPage) throws IOException,InvalidTokenOffsetsException {return searchPage(indexDir, poolDir, keyWords, fields, true, pageSize,currentPage,true);}/** * 分页查询索引 排序就默认按传入的fields属性的第一个元素的匹配度降序排列 *  * @param indexDir *            索引根保存位置 * @param poolDir *            索引池保存位置 * @param keyWords *            关键词 * @param fields *            属性 * @param isHighlighter *            是否高亮显示 * @param pageSize *            每页记录数 * @param currentPage *            当前页数 * @param isPage *            是否分页,如无需分页只查条数的话,直接传入条数即可,大大优化索引查询效率 * @throws IOException * @return SearchResult 查询结果集 * @throws IOException * @throws InvalidTokenOffsetsException */public static SearchResult searchPage(String indexDir, String poolDir,String[] keyWords, String[] fields, boolean isHighlighter,int pageSize, int currentPage,boolean isPage) throws IOException,InvalidTokenOffsetsException {//将关键字中的特殊符号过滤if(null != keyWords && keyWords.length>0){String[] tmp = new String[keyWords.length];for(int i = 0;i<keyWords.length;i++){tmp[i] = stringFilter(keyWords[i]);}keyWords = tmp;}SearchResult searchResult = new SearchResult();IndexSearcher searcher = null;try {IndexReader reader = getIndexReader(indexDir, poolDir);if (null != reader) {reader = refreshIndexReader(poolDir, reader);}if(null == reader){log.error("索引文件为空,请检查!");return null;}searcher = new IndexSearcher(reader);searcher.setDefaultFieldSortScoring(true, false);Analyzer analyzer = getAnalyzer();if (keyWords[0].length() < 2) {analyzer = new StandardAnalyzer(Version.LUCENE_36);}Query query = MultiFieldQueryParser.parse(Version.LUCENE_36,keyWords, fields, analyzer);// query.setBoost(0.1f);/* * 1.被排序的字段必须被索引过(Indexecd),在索引时不能 用 Field.Index.TOKENIZED * (用UN_TOKENIZED可以正常实现.用NO时查询正常,但排序不能正常设置升降序) 2.SortField类型 * SCORE、DOC、AUTO、STRING、INT、FLOAT、CUSTOM 此类型主要是根据字段的类型选择 * 3.SortField的第三个参数代表是否是降序true:降序 false:升序 */Sort sort = new Sort(new SortField[] { SortField.FIELD_SCORE,new SortField(fields[0], SortField.STRING, true) });TopDocs topDocs = null;if(isPage){topDocs = searcher.search(query, searcher.maxDoc(), sort);}else{int searchNum = pageSize<searcher.maxDoc()?pageSize:searcher.maxDoc();topDocs = searcher.search(query, searchNum, sort);}ScoreDoc[] hits = topDocs.scoreDocs;int begin = pageSize * (currentPage - 1);int end = Math.min(begin + pageSize, hits.length);List<Document> documents = new ArrayList<Document>();for (int i = begin; i < end; i++) {Document document = searcher.doc(hits[i].doc);if (isHighlighter) {document.getField(fields[0]).setValue(toHighlighter(query, document, fields[0],analyzer));}documents.add(document);// hits[i].score 匹配度分值}searchResult.setPageSize(pageSize);searchResult.setCurrentPage(currentPage);searchResult.setDocuments(documents);searchResult.setTotalCount(hits.length);} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (ParseException e) {e.printStackTrace();}return searchResult;}/** * 释放索引 */public static void destroy() {synchronized (writerPool) {Iterator<Entry<String, IndexWriter>> iterator = writerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexWriter> entry = iterator.next();IndexWriter indexWriter = entry.getValue();try {indexWriter.commit();indexWriter.close();} catch (Exception e) {log.error("writerPool销毁失败,原因:" + e.getMessage());}}writerPool.clear();}synchronized (readerPool) {Iterator<Entry<String, IndexReader>> iterator = readerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexReader> entry = iterator.next();IndexReader indexReader = entry.getValue();try {indexReader.close();} catch (Exception e) {log.error("readerPool销毁失败,原因:" + e.getMessage());}}readerPool.clear();}}/** * 释放创建索引 */public static void destroyIndexWriter() {synchronized (writerPool) {Iterator<Entry<String, IndexWriter>> iterator = writerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<String, IndexWriter> entry = iterator.next();IndexWriter indexWriter = entry.getValue();try {indexWriter.close();} catch (Exception e) {log.error("writerPool销毁失败,原因:" + e.getMessage());}}writerPool.clear();}log.info("【创建索引池】完成销毁");}/** * 释放旧查询索引 */public static void destroyIndexReader(Map<Long, IndexReader> readerPool) {synchronized (readerPool) {Iterator<Entry<Long, IndexReader>> iterator = readerPool.entrySet().iterator();while (iterator.hasNext()) {Entry<Long, IndexReader> entry = iterator.next();if ((System.currentTimeMillis() - entry.getKey()) >= STALE_INDEXREADER_SURVIVAL_TIME) {IndexReader indexReader = entry.getValue();try {indexReader.close();log.info("【查询索引池】完成销毁" + entry.getValue());} catch (Exception e) {log.error("readerPool销毁失败,原因:" + e.getMessage());}}}readerPool.clear();}}/** * 刷新指定的indexReader--加载新的索引数据,若产生新的indexReader, * 则在indexReaderMap里替换旧的indexReader *  * @param indexDirName * @param indexReader * @return {@link IndexReader} */private synchronized static IndexReader refreshIndexReader(String poolDir,IndexReader indexReader) {try {destroyIndexReader(stalereaderPool);IndexReader newIndexReader = indexReader.reopen();if (newIndexReader != indexReader) {IndexReader oldIndexReader = indexReader;stalereaderPool.put(System.currentTimeMillis(), oldIndexReader);readerPool.put(poolDir, newIndexReader);}} catch (Exception e) {log.error("刷新索引失败" + e.getMessage());}// return newest IndexReaderreturn readerPool.get(poolDir);}/** * 过滤特殊符号 *  * @param str * @return * @throws PatternSyntaxException */public static String stringFilter(String str) throws PatternSyntaxException {String regEx = "[`~!@#$%^&*()+=|{ }':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?·\'\"\\-\t\n\r]";Pattern p = Pattern.compile(regEx);Matcher m = p.matcher(str);return m.replaceAll("").trim();}/** * 高亮设置 *  * @param query * @param doc * @param field * @return */private static String toHighlighter(Query query, Document doc,String field, Analyzer analyzer) {try {SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"red\">", "</font>");Highlighter highlighter = new Highlighter(simpleHtmlFormatter,new QueryScorer(query));highlighter.setTextFragmenter(new SimpleFragmenter(doc.get(field).length() + 100));TokenStream tokenStream = analyzer.tokenStream(field,new StringReader(doc.get(field)));String highlighterStr = highlighter.getBestFragment(tokenStream,doc.get(field));return highlighterStr == null ? doc.get(field) : highlighterStr;} catch (IOException e) {log.error(e.getMessage());} catch (InvalidTokenOffsetsException e) {log.error(e.getMessage());}return null;}@SuppressWarnings("static-access")private static IndexWriter getIndexWriter(String indexDir, String poolDir)throws CorruptIndexException, LockObtainFailedException,IOException {IndexWriter writer = writerPool.get(poolDir);if (writer == null) {synchronized (writerPool) {if (!writerPool.containsKey(poolDir)) {try {writer = createIndexWriter(indexDir + poolDir);if (writer != null)writerPool.put(poolDir, writer);} catch (IOException e) {if (IndexWriter.isLocked(FSDirectory.open(getIndexFile(indexDir + poolDir)))) {IndexWriter.unlock(FSDirectory.open(getIndexFile(indexDir + poolDir)));}log.error(e.getMessage());e.printStackTrace();destroy();}}}}return writer;}private static IndexReader getIndexReader(String indexDir, String poolDir)throws CorruptIndexException, IOException {IndexReader reader = readerPool.get(poolDir);synchronized (readerPool) {if (!readerPool.containsKey(poolDir)) {try {reader = IndexReader.open(FSDirectory.open(getIndexFile(indexDir + poolDir)));if (reader != null)readerPool.put(poolDir, reader);} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}return reader;}private static IndexWriter createIndexWriter(String dir)throws CorruptIndexException, LockObtainFailedException,IOException {/* * mmseg4j:ComplexAnalyzer 适用于高匹配度的中文 lucene标准:StandardAnalyzer */IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36,getAnalyzer());/* * 创建索引模式:CREATE,覆盖模式; conf.setOpenMode(OpenMode.CREATE); *  * APPEND,追加模式 conf.setOpenMode(OpenMode.CREATE_OR_APPEND); */conf.setOpenMode(OpenMode.CREATE_OR_APPEND);if (IndexWriter.isLocked(FSDirectory.open(getIndexFile(dir)))) {IndexWriter.unlock(FSDirectory.open(getIndexFile(dir)));}IndexWriter writer = new IndexWriter(FSDirectory.open(getIndexFile(dir)), conf);return writer;}/** * 获取分词模式 paodingAnalyer Paoding paoding = PaodingMaker.make(); return * PaodingAnalyzer.writerMode(paoding); //writer mode意味要同时支持最大和最小切词 * lucene标准:StandardAnalyzer new StandardAnalyzer(Version.LUCENE_36); *  * @return */private static Analyzer getAnalyzer() {Paoding paoding = PaodingMaker.make();return PaodingAnalyzer.writerMode(paoding);}private static File getIndexFile(String dir) {return new File(new StringBuilder(new File(LuceneUtil.class.getResource("/").getPath()).getParentFile().getParentFile().getPath().replace('\\', '/').toString()).append(dir).toString());}public  static void main(String[] args) {System.out.println(stringFilter("[不懂就要问]请问 H6不能插u盘听歌吗  知道的说下  谢谢!"));// init();//for (int i = 0; i < 50; i++) {//new Thread(new Runnable() {////@Override//public void run() {//try {//Thread.currentThread().sleep(500);//} catch (InterruptedException e) {//e.printStackTrace();//}////IndexWriter close = null;//IndexWriter noClose = null;//IndexWriter searchLog = null;//try {//close = getIndexWriter("/WEB-INF/index/", "close");//noClose = getIndexWriter("/WEB-INF/index/", "noClose");//searchLog = getIndexWriter("/WEB-INF/index/",//"searchLog");////IndexReader readerc = getIndexReader("/WEB-INF/index/",//"close");//IndexReader readern = getIndexReader("/WEB-INF/index/",//"noClose");//IndexReader readers = getIndexReader("/WEB-INF/index/",//"searchLog");////// System.out.println(readerc);//// System.out.println(readern);//// System.out.println(readers);////} catch (CorruptIndexException e) {//e.printStackTrace();//} catch (LockObtainFailedException e) {//e.printStackTrace();//} catch (IOException e) {//e.printStackTrace();//}////if (close == null || noClose == null) {//System.out.println("-----------");//}//// System.out.println(close);//// System.out.println(noClose);//// System.out.println(searchLog);////}//}).start();//}////// destroy();}}


原创粉丝点击