Lucene 初试
来源:互联网 发布:中文域名好吗 编辑:程序博客网 时间:2024/04/19 22:40
项目数据量较大,如果从数据库查询,效率较低,所以用到了lucene。
针对项目的需求,写了一个工具类。还要更多的修改。
日期排序,将日期转换成long类型的。
Lucene版本3.6.2
IKAnalyzer2012_u6
package t.util;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Date;import java.util.Iterator;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.TermVector;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.ParseException;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;import org.wltea.analyzer.lucene.IKAnalyzer;import t.Constants;import t.model.News;/** * Lucene 索引工具类 * * @auhtor: tangjing * @date:2013-2-4 */public class LuceneUtil {/** * lucene 索引文件夹地址 */public static final String LUCENE_INDEX_DIR = "c://luceneTest";/** * 新闻ID 索引域名 */public static final String FIELDNAME_NEWS_ID = "id";/** * 新闻内容 索引域名 */public static final String FIELDNAME_NEWS_CONTENT = "content";/** * 新闻发布时间 索引域名 */public static final String FIELDNAME_NEWS_DATE = "date";/** * 新闻来源 索引域名 */public static final String FIELDNAME_NEWS_SOURCE = "source";/** * 创建索引 单个对象 * * @param news * @auhtor: tangjing * @date:2013-2-1 */public static void createIndexByNews(News news) {try {if (news != null) {IndexWriter indexWriter = getIndexWriter();indexWriter.addDocument(getDocumentByNews(news));indexWriter.close();}} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** * 创建索引 news的集合 * * @param news * @auhtor: tangjing * @date:2013-2-1 */public static void createIndexByNewsList(List<News> newsList) {try {if (newsList != null) {IndexWriter indexWriter = getIndexWriter();for (Iterator<News> iterator = newsList.iterator(); iterator.hasNext();) {News news = (News) iterator.next();indexWriter.addDocument(getDocumentByNews(news));}indexWriter.close();}} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}/** * 默认排序 * * @param keywords * @param size * @return * @auhtor: tangjing * @date:2013-2-1 */public static List<News> searchNewsIndex(String keywords, int size) {return searchNewsIndex(keywords, size, new Sort());}/** * 根据时间排序 * * @param keywords * @param size * @return * @auhtor: tangjing * @date:2013-2-1 */public static List<News> searchNewsIndexOrderByDate(String keywords,int size) {Sort sort = new Sort(new SortField(FIELDNAME_NEWS_DATE, SortField.LONG,true));return searchNewsIndex(keywords, size, sort);}/** * * @param keywords * 关键词 * @param size * 查询的条数 * @param sore * 查询的排序方式 如果为空,默认以相关性排序 * @return * @auhtor: tangjing * @date:2013-2-1 */private static List<News> searchNewsIndex(String keywords, int size,Sort sort) {// 搜索List<News> list = null;try {Directory directory = FSDirectory.open(getIndexFile());IndexReader indexReader = IndexReader.open(directory);IndexSearcher searcher = new IndexSearcher(indexReader);Analyzer analyzer = new IKAnalyzer();QueryParser parser = new QueryParser(Version.LUCENE_36,FIELDNAME_NEWS_CONTENT, analyzer);// 设置词条之间的关系是AND 这里如果不设置,就是默认是OR// parser.setDefaultOperator(QueryParser.AND_OPERATOR);Query query = parser.parse(keywords);TopDocs topDocs = searcher.search(query, size, sort);list = new ArrayList<News>();ScoreDoc[] docs = topDocs.scoreDocs;for (ScoreDoc doc : docs) {Document d = searcher.doc(doc.doc);list.add(getNewsByDocument(d));}} catch (NumberFormatException e) {e.printStackTrace();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (ParseException e) {e.printStackTrace();}return list;}/** * 根据新闻对象,返回lucene文档对象 * * @param news * @return * @auhtor: tangjing * @date:2013-2-4 */private static Document getDocumentByNews(News news) {Document document = new Document();// ID不用建立索引document.add(new Field(FIELDNAME_NEWS_ID, news.getId() + "",Field.Store.YES, Field.Index.NO));document.add(new Field(FIELDNAME_NEWS_CONTENT, news.getContent(),Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));document.add(new Field(FIELDNAME_NEWS_DATE, news.getCreateDate().getTime() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));// 网站可以建立索引,不用分词document.add(new Field(FIELDNAME_NEWS_SOURCE, news.getNetsite(),Field.Store.YES, Field.Index.NOT_ANALYZED));return document;}/** * 根据索引文档,转换为news对象 * * @param document * @return * @auhtor: tangjing * @date:2013-2-4 */private static News getNewsByDocument(Document document) {News news = new News();news.setId(Integer.parseInt(document.get(FIELDNAME_NEWS_ID)));news.setContent(document.get(FIELDNAME_NEWS_CONTENT));news.setNetsite(document.get(FIELDNAME_NEWS_SOURCE));Date date = new Date(Long.parseLong(document.get(FIELDNAME_NEWS_DATE)));news.setCreateDate(date);return news;}/** * 获得IndexWriter对象 * * @return * @auhtor: tangjing * @date:2013-2-4 */private static IndexWriter getIndexWriter() {IndexWriter indexWriter = null;try {// IK分词器Analyzer analyzer = new IKAnalyzer();Directory directory = FSDirectory.open(getIndexFile());IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);indexWriter = new IndexWriter(directory, writerConfig);} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}return indexWriter;}/** * 索引路径 * * @return * @throws IOException * @auhtor: tangjing * @date:2013-2-4 */private static File getIndexFile() throws IOException {File indexFile = new File(Constants.LUCENE_INDEX_DIR);if (!indexFile.exists()) {indexFile.createNewFile();}return indexFile;}}
- 初试lucene
- Lucene 初试
- lucene 2.0 初试
- 初试lucene站内搜索
- Lucene 搜索引擎开发初试 (1)
- Lucene学习--1.基本知识与初试
- 初试Lucene.net搜索及高亮分页
- Solr搜索引擎开发初试(2)(接"Lucene搜索引擎开发初试(1)")
- 初试
- 初试
- 初试
- 初试
- Lucene初试——关于大文本建立索引和中文乱码以及QueryParser检索的一些体会
- lucene
- Lucene
- lucene
- lucene
- Lucene
- ZTE将在MWC大会上展示Firefox操作系统手机
- php 登录之后,返回会员上一次访问的路径网址
- iPhone开发【二十四】数据持久化总结之第4篇—sqlite3数据库
- errors running builder 'Android Pre Compiler' on project'项目名称'
- GDI坐标系统(下)
- Lucene 初试
- 《敏捷个人》周刊 第16期
- ALUA --- Asymmetric Logic Unit Access
- [C#.NET] 使用 Google Maps API 查某一個地址的經度和緯度
- Linux-ASoC驱动归纳总结:
- 推荐几款最为流行的Java IDE
- CakePHP 2.x CookBook 中文版 第七章 模型 之 检索数据
- Flex 屏幕截图
- App应用之提交到各大市场渠道