Lucene 初试

来源:互联网 发布:中文域名好吗 编辑:程序博客网 时间:2024/04/19 22:40

项目数据量较大,如果从数据库查询,效率较低,所以用到了lucene。


针对项目的需求,写了一个工具类。还要更多的修改。


日期排序,将日期转换成long类型的。


Lucene版本3.6.2

IKAnalyzer2012_u6

package t.util;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Date;import java.util.Iterator;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.TermVector;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;import t.Constants;import t.model.News;/** * Lucene 索引工具类 *  * @auhtor: tangjing * @date:2013-2-4 */public class LuceneUtil {/** * lucene 索引文件夹地址 */public static final String LUCENE_INDEX_DIR = "c://luceneTest";/** * 新闻ID 索引域名 */public static final String FIELDNAME_NEWS_ID = "id";/** * 新闻内容 索引域名 */public static final String FIELDNAME_NEWS_CONTENT = "content";/** * 新闻发布时间 索引域名 */public static final String FIELDNAME_NEWS_DATE = "date";/** * 新闻来源 索引域名 */public static final String FIELDNAME_NEWS_SOURCE = "source";/** * 创建索引 单个对象 *  * @param news * @auhtor: tangjing * @date:2013-2-1 */public static void createIndexByNews(News news) {try {if (news != null) {IndexWriter indexWriter = getIndexWriter();indexWriter.addDocument(getDocumentByNews(news));indexWriter.close();}} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** * 创建索引 news的集合 *  * @param news * @auhtor: tangjing * @date:2013-2-1 */public static void createIndexByNewsList(List<News> newsList) {try {if (newsList != null) {IndexWriter indexWriter = getIndexWriter();for (Iterator<News> iterator = newsList.iterator(); iterator.hasNext();) {News news = (News) iterator.next();indexWriter.addDocument(getDocumentByNews(news));}indexWriter.close();}} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** * 默认排序 *  * @param keywords * @param size * @return * @auhtor: tangjing * @date:2013-2-1 */public static List<News> searchNewsIndex(String keywords, int size) {return searchNewsIndex(keywords, size, new Sort());}/** * 根据时间排序 *  * @param keywords * @param size * @return * @auhtor: tangjing * @date:2013-2-1 */public static List<News> searchNewsIndexOrderByDate(String keywords,int size) {Sort sort = new Sort(new SortField(FIELDNAME_NEWS_DATE, SortField.LONG,true));return searchNewsIndex(keywords, size, sort);}/** *  * @param keywords *            关键词 * @param size *            查询的条数 * @param sore *            查询的排序方式 如果为空,默认以相关性排序 * @return * @auhtor: tangjing * @date:2013-2-1 */private static List<News> searchNewsIndex(String keywords, int size,Sort sort) {// 搜索List<News> list = null;try {Directory directory = FSDirectory.open(getIndexFile());IndexReader indexReader = IndexReader.open(directory);IndexSearcher searcher = new IndexSearcher(indexReader);Analyzer analyzer = new IKAnalyzer();QueryParser parser = new QueryParser(Version.LUCENE_36,FIELDNAME_NEWS_CONTENT, analyzer);// 设置词条之间的关系是AND 这里如果不设置,就是默认是OR// parser.setDefaultOperator(QueryParser.AND_OPERATOR);Query query = parser.parse(keywords);TopDocs topDocs = searcher.search(query, size, sort);list = new ArrayList<News>();ScoreDoc[] docs = topDocs.scoreDocs;for (ScoreDoc doc : docs) {Document d = searcher.doc(doc.doc);list.add(getNewsByDocument(d));}} catch (NumberFormatException e) {e.printStackTrace();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (ParseException e) {e.printStackTrace();}return list;}/** * 根据新闻对象,返回lucene文档对象 *  * @param news * @return * @auhtor: tangjing * @date:2013-2-4 */private static Document getDocumentByNews(News news) {Document document = new Document();// ID不用建立索引document.add(new Field(FIELDNAME_NEWS_ID, news.getId() + "",Field.Store.YES, Field.Index.NO));document.add(new Field(FIELDNAME_NEWS_CONTENT, news.getContent(),Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));document.add(new Field(FIELDNAME_NEWS_DATE, news.getCreateDate().getTime() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));// 网站可以建立索引,不用分词document.add(new Field(FIELDNAME_NEWS_SOURCE, news.getNetsite(),Field.Store.YES, Field.Index.NOT_ANALYZED));return document;}/** * 根据索引文档,转换为news对象 *  * @param document * @return * @auhtor: tangjing * @date:2013-2-4 */private static News getNewsByDocument(Document document) {News news = new News();news.setId(Integer.parseInt(document.get(FIELDNAME_NEWS_ID)));news.setContent(document.get(FIELDNAME_NEWS_CONTENT));news.setNetsite(document.get(FIELDNAME_NEWS_SOURCE));Date date = new Date(Long.parseLong(document.get(FIELDNAME_NEWS_DATE)));news.setCreateDate(date);return news;}/** * 获得IndexWriter对象 *  * @return * @auhtor: tangjing * @date:2013-2-4 */private static IndexWriter getIndexWriter() {IndexWriter indexWriter = null;try {// IK分词器Analyzer analyzer = new IKAnalyzer();Directory directory = FSDirectory.open(getIndexFile());IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);indexWriter = new IndexWriter(directory, writerConfig);} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return indexWriter;}/** * 索引路径 *  * @return * @throws IOException * @auhtor: tangjing * @date:2013-2-4 */private static File getIndexFile() throws IOException {File indexFile = new File(Constants.LUCENE_INDEX_DIR);if (!indexFile.exists()) {indexFile.createNewFile();}return indexFile;}}




原创粉丝点击