lucene + spring

来源：互联网发布：粉底液推荐知乎编辑：程序博客网时间：2024/05/16 02:48

目录结果

一，lucene的索引工具类

[java] view plaincopy
package com.hwt.lucene.index;  
  
import java.io.File;  
import java.io.IOException;  
import java.util.List;  
  
import net.paoding.analysis.analyzer.PaodingAnalyzer;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.index.CorruptIndexException;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.IndexWriter;  
import org.apache.lucene.index.IndexWriterConfig;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.FSDirectory;  
import org.apache.lucene.store.LockObtainFailedException;  
import org.apache.lucene.util.Version;  
  
/** 
 * lucene的索引工具类 
 *  
 * @author 黄文韬 
 *  
 */  
public class IndexUtils {  
    // 庖丁解牛分词器（单例）  
    private static Analyzer ANALYZER = new PaodingAnalyzer();  
    // 索引的路径  
    private static final String indexPath = "WebRoot/lucene/index";  
  
    /** 
     * 得到庖丁解牛分词器 
     *  
     * @return 
     */  
    public static Analyzer getAnalyzer() {  
        return ANALYZER;  
    }  
  
    /** 
     * 得到路径对象 
     *  
     * @param path 相对路径 
     * @return 
     */  
    public static Directory getDirectory(String path) {  
        Directory directory = null;  
        try {  
            directory = FSDirectory.open(new File(path));  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return directory;  
    }  
  
    /** 
     * 得到读索引类 
     * @return 
     */  
    public static IndexReader getIndexReader() {  
        IndexReader reader = null;  
        try {  
            reader = IndexReader.open(getDirectory(indexPath));  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return reader;  
    }  
  
    /** 
     * 得到些索引类 
     * @return 
     */  
    public static IndexWriter getIndexWriter() {  
        IndexWriter writer = null;  
        try {  
            writer = new IndexWriter(getDirectory(indexPath),  
                    new IndexWriterConfig(Version.LUCENE_36, ANALYZER));  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (LockObtainFailedException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
        return writer;  
    }  
  
    /** 
     * 得到索引搜索类 
     * @return 
     */  
    public static IndexSearcher getIndexSearcher() {  
        IndexSearcher searcher = null;  
        try {  
            searcher = new IndexSearcher(getIndexReader());  
        } catch (Exception e) {  
            e.printStackTrace();  
        }  
        return searcher;  
    }  
  
    /** 
     * 创建索引 
     *  
     * @param result 
     */  
    public static void createIndex(List<IndexField> result) {  
        // 得到输出索引类  
        IndexWriter indexWriter = null;  
        // 索引类  
        try {  
            indexWriter = getIndexWriter();  
            Document doc = new Document();  
            for (IndexField findx : result) {  
                // 是否存储：Store.YES/Store.NO  
                // 是否分词：  
                // Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS  
                doc.add(new Field(findx.getFieldName(), findx.getFieldValue(),  
                        findx.getFieldStore(), findx.getFieldAnalyzed()));  
            }  
            indexWriter.addDocument(doc);  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                // 关闭writer  
                indexWriter.close();  
            } catch (CorruptIndexException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
  
    /** 
     * 优化索引 
     */  
    public static void mergeIndex() {  
        IndexWriter indexWriter = null;  
        // 强制优化索引  
        try {  
            indexWriter = getIndexWriter();  
            indexWriter.forceMerge(1);  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                indexWriter.close();  
            } catch (CorruptIndexException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
  
    /** 
     * 更新所有 
     * @param fields 新的document字段信息 
     * @param term 需要替换的查找条件 
     */  
    public static void updateIndex(List<IndexField> fields, Term term) {  
        // 得到输出索引类  
        IndexWriter indexWriter = null;  
        // 索引类  
        try {  
            indexWriter = getIndexWriter();  
            Document doc = new Document();  
            // 是否存储：Store.YES/Store.NO  
            // 是否分词：  
            // Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS  
            for (IndexField field : fields) {  
                doc.add(new Field(field.getFieldName(), field.getFieldValue(),  
                        field.getFieldStore(), field.getFieldAnalyzed()));  
            }  
            indexWriter.updateDocument(term, doc, ANALYZER);  
            indexWriter.forceMerge(1);  
        } catch (CorruptIndexException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                // 关闭writer  
                indexWriter.close();  
            } catch (CorruptIndexException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
  
    /** 
     * 删除全部索引文件 
     */  
    public static void deleteAll() {  
        IndexWriter writer = null;  
        try {  
            writer = getIndexWriter();  
            writer.deleteAll();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                writer.close();  
            } catch (CorruptIndexException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
  
    /** 
     * 根据条件删除索引 
     * @param term 条件 
     */  
    public static void delete(Term term) {  
        IndexWriter writer = null;  
        IndexReader reader = getIndexReader();  
        try {  
            writer = getIndexWriter();  
            writer.deleteDocuments(term);  
            writer.forceMerge(1);  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                writer.close();  
            } catch (CorruptIndexException e) {  
                e.printStackTrace();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
    }  
  
}  

二，文件类型的搜索

[java] view plaincopy
package com.hwt.lucene.index;  
  
import java.io.BufferedReader;  
import java.io.File;  
import java.io.FileInputStream;  
import java.io.FileNotFoundException;  
import java.io.IOException;  
import java.io.InputStreamReader;  
  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field;  
import org.apache.lucene.document.Field.Index;  
import org.apache.lucene.document.Field.Store;  
  
/** 
 * 文件类型的搜索 
 * @author 黄文韬 
 * 
 */  
public class FileDocument {  
      
    /** 
     * 将文件转换为一个document对象 
     * @param file 文件 
     * @return 
     */  
    public Document fileToDocument(File file){  
        Document document=new Document();  
        document.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));  
        document.add(new Field("content", this.readFileRetStr(file), Store.YES, Index.ANALYZED));  
        return document;  
    }  
      
    /** 
     * 将名字、内容字段转为document 
     * @param content  内容 
     * @param name 文件名字 
     * @return 
     */  
    public Document stringToDocumet(String name,String content){  
        Document document=new Document();  
        document.add(new Field("name",name, Store.YES, Index.ANALYZED));  
        document.add(new Field("content", content, Store.YES, Index.ANALYZED));  
        return document;  
    }  
      
    /** 
     * 将文件内容转为string类型 
     * @param file 文件 
     * @return 
     */  
    public String readFileRetStr(File file){  
        FileInputStream fStream = null;  
        String tempStr = "";  
        StringBuffer sBuffer = new StringBuffer();  
        try {  
            fStream = new FileInputStream(file);  
            BufferedReader bReader=new BufferedReader(new InputStreamReader(fStream,"UTF-8"));  
            while((tempStr=bReader.readLine())!=null){  
                sBuffer.append(tempStr);  
            }  
        } catch (FileNotFoundException e) {  
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace();  
        } finally {  
            try {  
                fStream.close();  
            } catch (IOException e) {  
                e.printStackTrace();  
            }  
        }  
        return sBuffer.toString();  
    }  
}  

三，封装索引字段类

[java] view plaincopy
package com.hwt.lucene.index;  
  
import java.io.Serializable;  
  
import org.apache.lucene.document.Field.Index;  
import org.apache.lucene.document.Field.Store;  
  
/** 
 * 封装索引字段类 
 * @author hwt 
 * 
 */  
public class IndexField implements Serializable{  
    private String fieldName;  
    private String fieldValue;  
    private Store fieldStore;//是否存储：Store.YES/Store.NO   
    private Index fieldAnalyzed;//是否分词： Index.ANALYZED/Index.NOT_ANALYZED/Index.NO/Index.ANALYZED_NO_NORMS  
      
    public String getFieldName() {  
        return fieldName;  
    }  
    public void setFieldName(String fieldName) {  
        this.fieldName = fieldName;  
    }  
    public String getFieldValue() {  
        return fieldValue;  
    }  
    public void setFieldValue(String fieldValue) {  
        this.fieldValue = fieldValue;  
    }  
    public Store getFieldStore() {  
        return fieldStore;  
    }  
    public void setFieldStore(Store fieldStore) {  
        this.fieldStore = fieldStore;  
    }  
    public Index getFieldAnalyzed() {  
        return fieldAnalyzed;  
    }  
    public void setFieldAnalyzed(Index fieldAnalyzed) {  
        this.fieldAnalyzed = fieldAnalyzed;  
    }  
      
}  

四，分页缓存类

[java] view plaincopy
package com.hwt.lucene.index;  
  
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.HashMap;  
import java.util.List;  
import java.util.Map;  
  
import org.apache.log4j.Logger;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.Sort;  
import org.apache.lucene.search.TopDocs;  
  
public class CachePage {  
    private static final Logger LOGGER = Logger.getLogger(CachePage.class);  
    private int pageStart = 1; // 页码  
    private int pageSize = 15; // 每页显示的大小  
    private int pageNum = 0;  //总页数  
    private int totalNum = 0; //总记录条数  
    private int cacheSize = 100; // 缓存大小  
    private List<Document> cacheList = new ArrayList<Document>(); // 缓存列表  
      
    /** 
     * 构造方法 
     * @param pageSize 每页大小 
     * @param cacheSize 缓存大小 
     */  
    public CachePage(Integer pageSize, Integer cacheSize) {  
        this.pageSize = pageSize;  
        if (cacheSize != null) {  
            this.cacheSize = cacheSize;  
        }  
    }  
  
    /** 
     * 判断是否存在缓存中 
     *  
     * @param page 
     *            页码 
     * @return 
     */  
    public boolean inCache(int page) {  
        // 当前缓存对象的个数  
       int cacheNum = cacheList.size();  
       if (cacheNum > 0) {  
            if (page <= 0) {  
                page = 1;  
            }  
            // 判断当前页是不是在缓存中  
            if (page >= pageStart && (page - pageStart) * pageSize <= cacheNum) {  
                return true;  
            } else {  
                return false;  
            }  
        }else {  
            return false;  
        }  
    }  
  
    /** 
     * 清空缓存 
     * @param pageNum 起始页 
     */  
    public void refleshCache() {  
//      this.isFirst = true;  
        for (int i = cacheList.size() -1 ; i  >= 0; i--) {  
            cacheList.remove(i);  
        }  
    }  
  
    /** 
     * 新增缓存 
     *  
     * @param doc 
     */  
    public void addCache(Document doc) {  
        if (this.cacheList.size() < cacheSize) {  
            this.cacheList.add(doc);  
        } else {  
            LOGGER.info("缓存池已满");  
        }  
    }  
      
    /** 
     * 读缓存中的数据 
     * @param page 
     * @return 
     */  
    public Map readCache(int page) {  
        // 判断是否存在于缓存池中  
        int start = (page - pageStart) * pageSize;  
        int end = start + pageSize > cacheList.size() ? cacheList.size()  
                : start + pageSize;  
        //缓存中的结果集  
        List<Document> cacheRs = new ArrayList<Document>();  
        for (int i = start; i < end; i++) {  
            cacheRs.add(cacheList.get(i));  
        }  
          
        //缓存结果集  
        Map resultMap = new HashMap();  
        resultMap.put("currentPage", page); //当前页  
        resultMap.put("totalNum", totalNum); //总记录条数  
        resultMap.put("pageNum", pageNum); //总页数  
        resultMap.put("list", cacheRs);  
          
        return resultMap;  
    }  
      
    /** 
     * 搜索 
     * @param query query对象 
     * @param sort 排序对象 
     * @param page 页码 
     * @return 
     */  
    public Map search(Query query,Sort sort,int page){  
        if (page < 0) {  
            page = 1;  
        }  
        //如果存在缓存中  
        if (inCache(page)) {  
            return readCache(page);  
        }else {//如果不在缓存中  
            IndexSearcher searcher = IndexUtils.getIndexSearcher();  
            try {  
                //显示条数  
                int querySize = (page*pageSize / cacheSize + 1 )*100;  
                //设置查询、查询显示的条数、排序对象  
                TopDocs topDocs = searcher.search(query, querySize , sort);  
                  
                //总共记录条数  
                int totalNum = topDocs.totalHits;  
                int pageNum = totalNum % pageSize == 0 ? totalNum / pageSize : totalNum / pageSize + 1;  
                  
                if (page > pageNum) {  
                    page = pageNum;  
                }  
                  
                //得到记录集  
                ScoreDoc[] docs = topDocs.scoreDocs;  
                  
                //保存当前页的前后两页放入缓存中  
                int startPage = 1;  
                int endPage   = 1;  
                if (page < 3) { //前五页  
                    startPage = 1;  
                    endPage = startPage + 4 > pageNum ? pageNum : startPage + 4;  
                }else if(page > pageNum - 2){ //后五页  
                    endPage = pageNum ;  
                    startPage = endPage - 4 < 0 ? 1 : endPage - 4;  
                } else { //中间页  
                    startPage = page - 2 <= 0 ? 1 : page - 2;  
                    endPage   = page + 2 > pageNum ? pageNum : page + 2;  
                }  
                  
                //清空缓存  
                refleshCache();  
                  
                int startSize = (startPage - 1)*pageSize ;  
                int endSize = startSize + cacheSize > totalNum ? totalNum : startSize + cacheSize ;  
                  
                //将对象加入缓存中  
                for (int i = startSize ; i < endSize; i++) {  
                    Document doc = searcher.doc(docs[i].doc);  
                    addCache(doc);  
                }  
                  
                //替换缓存集合  
                this.pageNum = pageNum;  
                this.totalNum = totalNum;  
                this.pageStart = startPage;  
                  
                return readCache(page);  
                  
            } catch (IOException e) {  
                e.printStackTrace();  
                return null;  
            }  
        }  
    }  
  
    public Integer getPageSize() {  
        return pageSize;  
    }  
  
    public void setPageSize(Integer pageSize) {  
        this.pageSize = pageSize;  
    }  
  
    public Integer getPageStart() {  
        return pageStart;  
    }  
  
    public void setPageStart(Integer pageStart) {  
        this.pageStart = pageStart;  
    }  
  
    public Integer getCacheSize() {  
        return cacheSize;  
    }  
  
    public void setCacheSize(Integer cacheSize) {  
        this.cacheSize = cacheSize;  
    }  
  
    public List<Document> getCacheList() {  
        return cacheList;  
    }  
  
    public void setCacheList(List<Document> cacheList) {  
        this.cacheList = cacheList;  
    }  
      
//  public boolean isFirst() {  
//      return isFirst;  
//  }  
//  
//  public void setFirst(boolean isFirst) {  
//      this.isFirst = isFirst;  
//  }  
}  

测试类：

[java] view plaincopy
package test;  
  
import java.io.File;  
import java.io.IOException;  
import java.util.ArrayList;  
import java.util.List;  
import java.util.Map;  
  
import javax.print.Doc;  
  
import org.apache.lucene.analysis.Analyzer;  
import org.apache.lucene.analysis.cjk.CJKAnalyzer;  
import org.apache.lucene.document.Document;  
import org.apache.lucene.document.Field.Index;  
import org.apache.lucene.document.Field.Store;  
import org.apache.lucene.index.IndexReader;  
import org.apache.lucene.index.Term;  
import org.apache.lucene.queryParser.ParseException;  
import org.apache.lucene.queryParser.QueryParser;  
import org.apache.lucene.search.BooleanClause.Occur;  
import org.apache.lucene.search.BooleanQuery;  
import org.apache.lucene.search.IndexSearcher;  
import org.apache.lucene.search.Query;  
import org.apache.lucene.search.ScoreDoc;  
import org.apache.lucene.search.Searcher;  
import org.apache.lucene.search.Sort;  
import org.apache.lucene.search.SortField;  
import org.apache.lucene.search.TermQuery;  
import org.apache.lucene.search.TopDocs;  
import org.apache.lucene.store.Directory;  
import org.apache.lucene.store.FSDirectory;  
import org.apache.lucene.util.Version;  
import org.springframework.context.ApplicationContext;  
import org.springframework.context.support.ClassPathXmlApplicationContext;  
  
import com.hwt.lucene.index.CachePage;  
import com.hwt.lucene.index.IndexField;  
import com.hwt.lucene.index.IndexUtils;  
  
public class Test {  
      
    public static void main(String[] args) throws IOException, ParseException {  
        List<IndexField> fieldIndexs2 = new ArrayList<IndexField>();  
        IndexField  ind3 = new IndexField();  
        ind3.setFieldName("title");  
        ind3.setFieldValue("美国攻打伊朗");  
        ind3.setFieldStore(Store.YES);  
        ind3.setFieldAnalyzed(Index.ANALYZED);  
        fieldIndexs2.add(ind3);  
  
        IndexField  ind = new IndexField();  
        ind.setFieldName("content");  
        ind.setFieldValue("美国派兵3333，航母出发了，中国航公出发");  
        ind.setFieldStore(Store.YES);  
        ind.setFieldAnalyzed(Index.ANALYZED);  
        fieldIndexs2.add(ind);  
          
        IndexField  ind2 = new IndexField();  
        ind2.setFieldName("Id");  
        ind2.setFieldValue("12");  
        ind2.setFieldStore(Store.YES);  
        ind2.setFieldAnalyzed(Index.NOT_ANALYZED);  
        fieldIndexs2.add(ind2);  
          
        //创建索引  
//      IndexUtils.createIndex(fieldIndexs2);  
        //删除索引  
//      IndexUtils.delete(new Term("Id","2"));  
        //修改索引  
//      IndexUtils.updateIndex(fieldIndexs2, new Term("Id","2"));  
          
          
        Analyzer analyzer = IndexUtils.getAnalyzer();  
        QueryParser titleParser = new QueryParser(Version.LUCENE_36,"title",analyzer);  
        QueryParser contentParser = new QueryParser(Version.LUCENE_36,"content",analyzer);  
          
//      Query contentQuery = new TermQuery(new Term("title","美国"));  
          
        Query titleQuery = titleParser.parse("美国");  
        Query contentQuery = contentParser.parse("美国");  
          
        BooleanQuery query = new BooleanQuery();  
        query.add(titleQuery, Occur.MUST);  
        query.add(contentQuery,Occur.SHOULD);  
          
        IndexSearcher searcher = IndexUtils.getIndexSearcher();  
          
        //排序对象:排序字段，排序字段类型，是否降序（默认false升序）  
        Sort sort = new Sort(new SortField("Id",SortField.INT, true));  
        //对多个字段进行排序  
//      Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.INT, true),  
//      new SortField("title",SortField.INT, true)});  
          
        CachePage cachePage = new CachePage(1, 100);  
        Map map = cachePage.search(query, sort, 1);  
        System.out.println("起始页："+ cachePage.getPageStart());  
        System.out.println("总页数："+map.get("pageNum"));  
        System.out.println("总条数："+map.get("totalNum"));  
        List<Document> docs = (List<Document>) map.get("list");  
        for (Document document : docs) {  
            System.out.println(document.get("Id"));  
            System.out.println(document.get("title"));  
            System.out.println(document.get("content"));  
        }  
          
        System.out.println("+++++++++++++++++++");  
        cachePage.refleshCache();  
//        
        //查询缓存的  
        Map map2 = cachePage.search(query, sort, 4);  
        System.out.println("起始页："+ cachePage.getPageStart());  
        System.out.println("总页数："+map2.get("pageNum"));  
        System.out.println("总条数："+map2.get("totalNum"));  
        List<Document> docs2 = (List<Document>) map2.get("list");  
        for (Document document : docs2) {  
            System.out.println(document.get("Id"));  
            System.out.println(document.get("title"));  
            System.out.println(document.get("content"));  
        }  
//        
//      System.out.println("+++++++++++++++++++");  
//      Map map3 = cachePage.search(query, sort, 5);  
//      System.out.println("总页数："+map3.get("pageNum"));  
//      System.out.println("总条数："+map3.get("totalNum"));  
//      List<Document> docs4 = (List<Document>) map3.get("list");  
//      for (Document document : docs4) {  
//          System.out.println(document.get("Id"));  
//          System.out.println(document.get("title"));  
//          System.out.println(document.get("content"));  
//      }  
          
        //对对个字段进行排序  
//      Sort sort = new Sort(new SortField[]{new SortField("Id",SortField.STRING, true),  
//                    new SortField("title",SortField.STRING, true)});  
//      TopDocs docs = searcher.search(query,100,sort); //返回前100条记录  
  
//      docs.totalHits是所有的记录条数，与上面设置的100无关  
          
//      System.out.println("共找到"+docs.totalHits+"条记录");  
//        
//      ScoreDoc[] scoreDocs = docs.scoreDocs;  
//  
//        for (int i = 0,len = scoreDocs.length ; i < len; i++) {  
//          System.out.println(scoreDocs[i].doc);  
//      }  
//      for (ScoreDoc scoreDoc : scoreDocs) {  
//          int docid = scoreDoc.doc;  
//          Document document = searcher.doc(docid);  
//          System.out.println(document.get("Id"));  
//          System.out.println(document.get("title"));  
//          System.out.println(document.get("content"));  
//          System.out.println("===============================");  
//      }  
          
//      IndexSearcher indexSearcher = IndexUtils.getIndexSearcher();  
//  
//      TopDocs topDocs = indexSearcher.search(query, 10);  
//  
//      ScoreDoc[] docs = topDocs.scoreDocs;  
//      System.out.println("共找到:"+docs.length);  
  
//      for (ScoreDoc scoreDoc : docs) {  
//          int docid = scoreDoc.doc;  
//          Document document = indexSearcher.doc(docid);  
//          System.out.println(document.get("Id"));  
//          System.out.println(document.get("title"));  
//          System.out.println(document.get("content"));  
//          System.out.println("===============================");  
//      }  
//        
//  }  
          
          
          
//      IndexUtils.deleteAll();  
    }  
}  

0 0