lucene管理IndexReader和IndexWriter的最佳实践

来源:互联网 发布:淘宝最好的推广方法 编辑:程序博客网 时间:2024/06/07 03:38
实例化IndexReader需要加载索引文件,所以实例化它是非常耗资源的。 

IndexReader是线程安全的,通常一个索引目录,我们只实例化一个IndexReader就够了。 

当索引数据足够大(G的数量级)的时候,一般把索引资源按照某种规则散列到多个文件目录里(如:index-0,index-1,index-2.... 或者 blog,posts....),当然这些文件目录应该放在同一个根目录下---这时,最好的方式就是用一个Pool去维护这些IndexReader:保证一个文件目录只有一个实例,且不同的IndexReader可以根据名字动态的组合。 


StandardIndexReaderPool.java 
Java代码  收藏代码
  1. package com.qiu.search.pool.impl;  
  2.   
  3. import java.io.File;  
  4. import java.io.IOException;  
  5. import java.util.Iterator;  
  6. import java.util.List;  
  7. import java.util.Map;  
  8. import java.util.Map.Entry;  
  9. import java.util.concurrent.ConcurrentHashMap;  
  10.   
  11. import org.apache.commons.lang.builder.ToStringBuilder;  
  12. import org.apache.lucene.index.CorruptIndexException;  
  13. import org.apache.lucene.index.IndexReader;  
  14. import org.apache.lucene.store.FSDirectory;  
  15. import org.springframework.util.Assert;  
  16.   
  17. import com.dukuai.search.exception.PoolException;  
  18. import com.dukuai.search.pool.IndexReaderPool;  
  19. import com.dukuai.search.util.IndexResourceUtil;  
  20. import com.spinn3r.log5j.Logger;  
  21.   
  22. /** 
  23.  * {@link IndexReaderPool}的实现类。{@link StandardIndexReaderPool}确保一个目录至多只有一个{@link IndexReader},它是线程安全的, 
  24.  * {@link IndexReader}也是线程安全 
  25.  *  
  26.  */  
  27.   
  28. public class StandardIndexReaderPool implements IndexReaderPool {  
  29.     private static final Logger LOG = Logger.getLogger(StandardIndexReaderPool.class);  
  30.     /** 低版本的IndexReader的存活时间 5s */  
  31.     private static final int STALE_INDEXREADER_SURVIVAL_TIME = 5000;  
  32.   
  33.     private String name = null;  
  34.     // 索引文件的根目录的路径  
  35.     private String indexRootDirectory = null;  
  36.     // 索引文件目录名列表,目录名不包含路径  
  37.     private List<String> indexDirNameList = null;  
  38.     /** 
  39.      * 存放IndexReader的Map,Map里存放的都是已经实例化好的IndexReader 
  40.      */  
  41.     private final Map<String, IndexReader> indexReaderMap = new ConcurrentHashMap<String, IndexReader>();  
  42.     /** 
  43.      * 待关闭的IndexReader。indexReader.reopen()之后,会产生新的IndexReader。但是旧的IndexReader有可能还被其他线程调用着。 
  44.      * 旧的IndexReader都要放置到staleIndexReadersMap里,5秒之后再释放资源。 
  45.      */  
  46.     private final Map<Long, IndexReader> staleIndexReadersMap = new ConcurrentHashMap<Long, IndexReader>();  
  47.   
  48.     @Override  
  49.     public void setIndexDirNameList(List<String> indexDirNameList) {  
  50.         this.indexDirNameList = indexDirNameList;  
  51.     }  
  52.   
  53.     public void init() {  
  54.         LOG.info("%s begin initialize", getName());  
  55.         for (String indexDirName : indexDirNameList) {  
  56.             try {  
  57.                 IndexReader indexReader = createIndexReader(indexDirName);  
  58.                 if (indexReader != null)  
  59.                     indexReaderMap.put(indexDirName, indexReader);  
  60.             } catch (IOException e) {// 若初始化时出错,就直接抛错,终止程序再执行下去  
  61.                 throw new PoolException(e);  
  62.             }  
  63.         }  
  64.         LOG.info("%s initialization complete", getName());  
  65.     }  
  66.   
  67.     /** 
  68.      * 根据indexDirPath,返回IndexReader。 
  69.      *  
  70.      * @param indexDirName 文件目录名 
  71.      * @return IndexReader 
  72.      */  
  73.     public IndexReader getIndexReader(String indexDirName) {  
  74.         Assert.hasText(indexDirName, "this indexDirName must not be empty");  
  75.   
  76.         IndexReader indexReader = indexReaderMap.get(indexDirName);  
  77.         if (indexReader != null)  
  78.             return refreshIndexReader(indexDirName, indexReader);  
  79.   
  80.         synchronized (indexReaderMap) {  
  81.             if (!indexReaderMap.containsKey(indexDirName)) {  
  82.                 try {  
  83.                     indexReader = createIndexReader(indexDirName);  
  84.                 } catch (CorruptIndexException e) {  
  85.                     LOG.error("CorruptIndexException while creating IndexReader of %s,the root cause is %s",  
  86.                             indexDirName, e.getMessage());  
  87.                 } catch (IOException e) {  
  88.                     LOG.error("IOException while creating IndexReader of %s,%s", indexDirName, e.getMessage());  
  89.                 }  
  90.                 if (indexReader != null)  
  91.                     indexReaderMap.put(indexDirName, indexReader);  
  92.             }  
  93.         }  
  94.         return indexReaderMap.get(indexDirName);  
  95.     }  
  96.   
  97.     /** 
  98.      * 刷新指定的indexReader--加载新的索引数据,若产生新的indexReader,则在indexReaderMap里替换旧的indexReader 
  99.      *  
  100.      * @param indexDirName 
  101.      * @param indexReader 
  102.      * @return {@link IndexReader} 
  103.      */  
  104.     private synchronized IndexReader refreshIndexReader(String indexDirName, IndexReader indexReader) {  
  105.         try {  
  106.             closeStaleIndexReaders(staleIndexReadersMap);  
  107.             LOG.debug("hashCode of indexReader is %s", indexReader.hashCode());  
  108.             IndexReader newIndexReader = indexReader.reopen();  
  109.             if (newIndexReader != indexReader) {  
  110.                 // this indexReader are old version  
  111.                 IndexReader oldIndexReader = indexReader;  
  112.                 /** 
  113.                  * may be this oldIndexReader was invoke by other thread,so put 
  114.                  * oldIndexReader to staleIndexReadersMap,closing it after 5s; 
  115.                  */  
  116.                 staleIndexReadersMap.put(System.currentTimeMillis(), oldIndexReader);  
  117.                 LOG.debug("hashCode of oldIndexReader is %s", oldIndexReader.hashCode());  
  118.                 // replace old version IndexReader with newIndexReader  
  119.                 indexReaderMap.put(indexDirName, newIndexReader);  
  120.                 LOG.debug("hashCode of newIndexReader is %s", newIndexReader.hashCode());  
  121.             }  
  122.         } catch (Exception e) {  
  123.             LOG.error("Exception while getting IndexReader of %s,the root cause is %s", indexDirName, e.getMessage());  
  124.         }  
  125.         // return newest IndexReader  
  126.         return indexReaderMap.get(indexDirName);  
  127.     }  
  128.   
  129.     /** 
  130.      * 关闭所有低版本的IndexReaders 
  131.      *  
  132.      * @param staleIndexReadersMap 
  133.      */  
  134.     private void closeStaleIndexReaders(Map<Long, IndexReader> staleIndexReadersMap) {  
  135.         Iterator<Entry<Long, IndexReader>> entryIterator = staleIndexReadersMap.entrySet().iterator();  
  136.         while (entryIterator.hasNext()) {  
  137.             Entry<Long, IndexReader> entry = entryIterator.next();  
  138.             if ((System.currentTimeMillis() - entry.getKey()) >= STALE_INDEXREADER_SURVIVAL_TIME) {  
  139.                 try {  
  140.                     entry.getValue().close();  
  141.                     LOG.debug("a stale IndexReader whose hashCode is %s has bean closed", entry.getValue().hashCode());  
  142.                 } catch (IOException e) {  
  143.                     LOG.error("IOException while colsing IndexReader,%s", e.getMessage());  
  144.                 } finally {  
  145.                     entryIterator.remove();  
  146.                     LOG.debug("delete a stale IndexReader from pool,hashCode:" + entry.getValue().hashCode());  
  147.                 }  
  148.             }  
  149.         }  
  150.     }  
  151.   
  152.     public void destroy() {  
  153.         Iterator<Entry<String, IndexReader>> iterator = indexReaderMap.entrySet().iterator();  
  154.         while (iterator.hasNext()) {  
  155.             Entry<String, IndexReader> entry = iterator.next();  
  156.             IndexReader indexReader = entry.getValue();  
  157.             try {  
  158.                 indexReader.close();  
  159.                 indexReader = null;  
  160.             } catch (IOException e) {  
  161.                 LOG.info("IOException while closing IndexReader whose indexDirName is %s", entry.getKey());  
  162.             }  
  163.         }  
  164.         indexReaderMap.clear();  
  165.         LOG.info("%s destroyed", getName());  
  166.     }  
  167.     /** 
  168.      * 根据索引目录名实例化{@link IndexReader},有可能返回null,调用者需要判断返回的{@link IndexReader}是否为null 
  169.      *  
  170.      * @param indexDirName 
  171.      * @return {@link IndexReader} 
  172.      *         返回indexDirName对应的IndexReader,如果对应的目录不存在就返回null, 
  173.      */  
  174.     private IndexReader createIndexReader(String indexDirName) throws CorruptIndexException, IOException {  
  175.         File indexFile = new File(IndexResourceUtil.getDirPath(indexRootDirectory, indexDirName));  
  176.         if (IndexResourceUtil.isEmptyIndexDir(indexFile)) {  
  177.             LOG.warn("%s is empty,no index resource", indexDirName);  
  178.             return null;  
  179.         }  
  180.         if (indexFile.exists() && indexFile.isDirectory()) {// 判断索引目录是否存在。  
  181.             return IndexReader.open(FSDirectory.getDirectory(indexFile));  
  182.         }  
  183.         return null;  
  184.     }  
  185.   
  186.     public int size() {  
  187.         return indexReaderMap.size();  
  188.     }  
  189.   
  190.     @Override  
  191.     public String toString() {  
  192.         return (new ToStringBuilder(this).append("name", getName()).append("indexRootDirectory", indexRootDirectory)  
  193.                 .append("size", size()).append("indexReader Set", indexReaderMap.keySet())).toString();  
  194.     }  
  195.   
  196.     public String getName() {  
  197.         return name;  
  198.     }  
  199.   
  200.     /** spring inject */  
  201.     public void setIndexRootDirectory(String indexRootDirectory) {  
  202.         this.indexRootDirectory = indexRootDirectory;  
  203.     }  
  204.     public void setName(String name) {  
  205.         this.name = name;  
  206.     }  
  207.   
  208. }  


IndexWriter也需要Pool来管理 

StandardIndexWriterPool.java 
Java代码  收藏代码
  1. package com.dukuai.search.pool.impl;  
  2.   
  3. import java.io.File;  
  4. import java.io.IOException;  
  5. import java.util.ArrayList;  
  6. import java.util.Iterator;  
  7. import java.util.List;  
  8. import java.util.Map;  
  9. import java.util.Map.Entry;  
  10. import java.util.concurrent.ConcurrentHashMap;  
  11.   
  12. import org.apache.commons.lang.builder.ToStringBuilder;  
  13. import org.apache.lucene.analysis.standard.StandardAnalyzer;  
  14. import org.apache.lucene.index.CorruptIndexException;  
  15. import org.apache.lucene.index.IndexWriter;  
  16.   
  17. import com.dukuai.search.exception.PoolException;  
  18. import com.dukuai.search.pool.IndexWriterPool;  
  19. import com.dukuai.search.util.IndexResourceUtil;  
  20. import com.dukuai.search.util.MetisUtil;  
  21. import com.spinn3r.log5j.Logger;  
  22.   
  23. /** 
  24.  * <code>IndexWriterPool</code>的实现类。<code>StandardIndexWriterPool</code>是线程安全的 
  25.  
  26.  
  27. public class StandardIndexWriterPool implements IndexWriterPool { 
  28.     private static final Logger LOG = Logger.getLogger(); 
  29.     /** 
  30.      * 索引优化后文件段的数量,数量越大,优化效率月到 
  31.      */  
  32.     private static final int DEFAULT_MAX_NUM_SEGMENTS = 2;  
  33.       
  34.     private String indexRootDirectory = null;  
  35.     private String name = null;  
  36.     /** 
  37.      * 索引优化后块的数量,数字越大优化速度越快、优化效果越不显著。 
  38.      */  
  39.     private int maxNumSegments = DEFAULT_MAX_NUM_SEGMENTS;  
  40.     /** 
  41.      * 存放IndexWriter的map 
  42.      */  
  43.     private Map<String, IndexWriter> indexWriterMap = new ConcurrentHashMap<String, IndexWriter>();  
  44.   
  45.     private List<String> indexDirNameList = null;  
  46.   
  47.     @Override  
  48.     public void setIndexDirNameList(List<String> indexDirNameList) {  
  49.         this.indexDirNameList = indexDirNameList;  
  50.     }  
  51.     /** 
  52.      * <code>StandardIndexWriterPool</code>的初始化,预加载<code>IndexWriter</code>。 
  53.      */  
  54.     public void init() {  
  55.         LOG.info("%s begin initialize", getName());  
  56.         synchronized (indexWriterMap) {  
  57.             for (String indexDirName : indexDirNameList) {  
  58.                 indexWriterMap.put(indexDirName, createIndexWriter(indexDirName));  
  59.             }  
  60.         }  
  61.         LOG.info("%s initialization complete", getName());  
  62.     }  
  63.   
  64.     /** 
  65.      * 返回一个indexWriter,indexWriter是线程安全的,允许多个线程同时使用IndexWriter。但一个索引目录只能初始化一个IndexWriter 
  66.      *  
  67.      * @param indexDirName 
  68.      * @return IndexWriter 
  69.      */  
  70.     public IndexWriter getIndexWriter(String indexDirName) {  
  71.         if (!indexWriterMap.containsKey(indexDirName)) {  
  72.             synchronized (indexWriterMap) {  
  73.                 if (!indexWriterMap.containsKey(indexDirName)) {  
  74.                     indexWriterMap.put(indexDirName, createIndexWriter(indexDirName));  
  75.                     LOG.info("added a new IndexWriter whose name is %s to pool,the pool size:%s", indexDirName, size());  
  76.                 }  
  77.             }  
  78.         }  
  79.         return indexWriterMap.get(indexDirName);  
  80.     }  
  81.   
  82.     /** 
  83.      * 创建一个新的IndexWriter,不允许多个线程同时调用,因为方法是私有的,能确保不会同时被调用,所以就免去锁了。 
  84.      *  
  85.      * @param indexDirName 
  86.      * @return {@link IndexWriter} 
  87.      */  
  88.     private IndexWriter createIndexWriter(String indexDirName) {  
  89.         final String indexDirPath = getIndexDirPath(indexDirName);  
  90.         boolean create = IndexResourceUtil.isEmptyIndexDir(indexDirPath);  
  91.         try {  
  92.             return new IndexWriter(indexDirPath, new StandardAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED);  
  93.         } catch (Exception e) {  
  94.             throw new PoolException(e.getMessage());  
  95.         }  
  96.     }  
  97.     /** 
  98.      * 提交索引,只有提交的索引才能被检索的到。 见{@link IndexWriter#commit()} 
  99.      */  
  100.     public void commit() {  
  101.         LOG.info("begin to commit all IndexWiters of pool,the pool size:%s", size());  
  102.         synchronized (indexWriterMap) {  
  103.             Iterator<Entry<String, IndexWriter>> iterator = indexWriterMap.entrySet().iterator();  
  104.             while (iterator.hasNext()) {  
  105.                 Entry<String, IndexWriter> entry = iterator.next();  
  106.                 IndexWriter indexWriter = entry.getValue();  
  107.                 try {  
  108.                     indexWriter.commit();  
  109.                 } catch (Exception e) {  
  110.                     LOG.error("exception while commiting pending updates,indexDir:%s,exception:%s", entry.getKey(), e  
  111.                             .getMessage());  
  112.                     destoryIndexWriter(iterator, indexWriter);  
  113.                 }  
  114.             }  
  115.         }  
  116.         LOG.info("%s IndexWiters had committed pending updates", size());  
  117.     }  
  118.   
  119.     /** 
  120.      * 优化索引,提升检索速度。另注意事项见{@link IndexWriter#optimize} 
  121.      */  
  122.     public void optimize() {  
  123.         LOG.info("begin to optimize at %s", MetisUtil.getCurrentDisplayFormatTime());  
  124.         synchronized (indexWriterMap) {  
  125.             Iterator<Entry<String, IndexWriter>> iterator = indexWriterMap.entrySet().iterator();  
  126.             while (iterator.hasNext()) {  
  127.                 Entry<String, IndexWriter> entry = iterator.next();  
  128.                 IndexWriter indexWriter = entry.getValue();  
  129.                 try {  
  130.                     indexWriter.commit();  
  131.                     indexWriter.optimize(maxNumSegments);  
  132.                 } catch (Exception e) {  
  133.                     LOG.error("Exception while optimizing %s,the root cause:%s", entry.getKey(), e.getMessage());  
  134.                     destoryIndexWriter(iterator, indexWriter);  
  135.                 }  
  136.             }  
  137.         }  
  138.         LOG.info("end optimize at %s", MetisUtil.getCurrentDisplayFormatTime());  
  139.     }  
  140.   
  141.     /** 
  142.      * 重新加载所有的{@link IndexWriter},{@link IndexWriter}不会及时释放哪些在创建索引过程中产生的索引文件碎片,哪怕哪些索引文件已经消失。 
  143.      * {@link #reload()}就是为了释放哪些文件句柄,防止进程持有过多的文件句柄。 
  144.      */  
  145.     public void reload() {  
  146.         LOG.info("begin to reload %s at %s", name, MetisUtil.getCurrentDisplayFormatTime());  
  147.         // 需要重新加载的索引目录列表  
  148.         List<String> indexDirNameList = new ArrayList<String>();  
  149.         synchronized (indexWriterMap) {  
  150.             Iterator<Entry<String, IndexWriter>> iterator = indexWriterMap.entrySet().iterator();  
  151.             while (iterator.hasNext()) {  
  152.                 Entry<String, IndexWriter> entry = iterator.next();  
  153.                 indexDirNameList.add(entry.getKey());  
  154.                 IndexWriter indexWriter = entry.getValue();  
  155.                 try {  
  156.                     indexWriter.commit();  
  157.                 } catch (Exception e) {  
  158.                     LOG.error("Exception while commiting %s,the root cause:%s", entry.getKey(), e.getMessage());  
  159.                 } finally {  
  160.                     destoryIndexWriter(iterator, indexWriter);  
  161.                 }  
  162.             }  
  163.   
  164.             for (String indexDirName : indexDirNameList) {  
  165.                 indexWriterMap.put(indexDirName, createIndexWriter(indexDirName));  
  166.             }  
  167.         }  
  168.         LOG.info("%s reload end at %s", name, MetisUtil.getCurrentDisplayFormatTime());  
  169.     }  
  170.     /** 
  171.      * 销毁指定的{@link IndexWriter} 
  172.      */  
  173.     private void destoryIndexWriter(Iterator<Entry<String, IndexWriter>> iterator, IndexWriter indexWriter) {  
  174.         try {  
  175.             indexWriter.close();  
  176.         } catch (CorruptIndexException e) {  
  177.             LOG.error("CorruptIndexException while closing indexWriter,the root cause:%s", e.getMessage());  
  178.         } catch (IOException e) {  
  179.             LOG.error("IOException while closing indexWriter,the root cause:%s", e.getMessage());  
  180.         }  
  181.         iterator.remove();  
  182.         LOG.info("destory a indexWriter,current pool's size:%s", size());  
  183.     }  
  184.     /** 
  185.      * 销毁{@link StandardIndexWriterPool},释放持有的资源。 
  186.      */  
  187.     public void destroy() {  
  188.         synchronized (indexWriterMap) {  
  189.             Iterator<Entry<String, IndexWriter>> iterator = indexWriterMap.entrySet().iterator();  
  190.             while (iterator.hasNext()) {  
  191.                 Entry<String, IndexWriter> entry = iterator.next();  
  192.                 IndexWriter indexWriter = entry.getValue();  
  193.                 try {  
  194.                     indexWriter.commit();  
  195.                     indexWriter.close();  
  196.                 } catch (Exception e) {  
  197.                     LOG.error("Exception while closing %s,the root cause:%s", entry.getKey(), e.getMessage());  
  198.                     destoryIndexWriter(iterator, indexWriter);  
  199.                 }  
  200.             }  
  201.             indexWriterMap = null;  
  202.             LOG.info("%s destoryed", getName());  
  203.         }  
  204.     }  
  205.   
  206.     private String getIndexDirPath(String indexDirName) {  
  207.         return (new StringBuffer(indexRootDirectory).append(File.separatorChar).append(indexDirName)).toString();  
  208.     }  
  209.   
  210.     public int size() {  
  211.         return this.indexWriterMap.size();  
  212.     }  
  213.   
  214.     public String getName() {  
  215.         return name;  
  216.     }  
  217.   
  218.     @Override  
  219.     public String toString() {  
  220.         ToStringBuilder builder = new ToStringBuilder(this);  
  221.         builder.append("name"this.name);  
  222.         builder.append("indexRootDirectory"this.indexRootDirectory);  
  223.         builder.append("size"this.size());  
  224.         builder.append("IndexWriter Set", indexWriterMap.keySet());  
  225.         return builder.toString();  
  226.     }  
  227.   
  228.     /** spring inject */  
  229.     public void setName(String name) {  
  230.         this.name = name;  
  231.     }  
  232.     public void setIndexRootDirectory(String indexRootDirectory) {  
  233.         this.indexRootDirectory = indexRootDirectory;  
  234.     }  
  235.   
  236.     public void setMaxNumSegments(int maxNumSegments) {  
  237.         this.maxNumSegments = maxNumSegments;  
  238.     }  
  239. }  
原创粉丝点击