初始Lucene

来源:互联网 发布:第一次吃海底捞 知乎 编辑:程序博客网 时间:2024/05/29 17:30

Lucene是一个软件库,一个开发工具包,而不是一个具有完整特征的搜索应用程序。

它采用的是一种称为反向索引(invertedindex)的机制。反向索引简单理解就是维护一个词/短语表,对于这个表中的每个词/短语,都有一个相关信息描述了有哪些文档包含了这个词/短语。这样在用户输入查询条件的时候,就能非常快的得到搜索结果,它本身只关注文本的索引和搜索。Lucene使你可以为你的应用程序添加索引和搜索能力。通过lucene学习,我们就可以为自已的项目增加全文检索的功能。

接下来我们开始Lucene环境开发案例:

1,创建一个java工程导入包(下载地址:http://pan.baidu.com/s/1slROGd3),新建实体类Goods,下面是我的工程目录


2,Goods的代码如下 :

package cn.wxz.entity;import java.io.Serializable;public class Goods implements Serializable {private static final long serialVersionUID = 6341267507850856097L;private Integer goodsId;//商品的idprivate String goodsName;//商品的名称private Double goodsPrice;//商品的价格private String goodsRemark;//商品的备注@Overridepublic String toString() {return "Goods [goodsId=" + goodsId + ", goodsName=" + goodsName+ ", goodsPrice=" + goodsPrice + ", goodsRemark=" + goodsRemark+ "]";}public Integer getGoodsId() {return goodsId;}public void setGoodsId(Integer goodsId) {this.goodsId = goodsId;}public String getGoodsName() {return goodsName;}public void setGoodsName(String goodsName) {this.goodsName = goodsName;}public Double getGoodsPrice() {return goodsPrice;}public void setGoodsPrice(Double goodsPrice) {this.goodsPrice = goodsPrice;}public String getGoodsRemark() {return goodsRemark;}public void setGoodsRemark(String goodsRemark) {this.goodsRemark = goodsRemark;}}

LuceneDao代码如下:

package cn.wxz.entity;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.Field.Index;import org.apache.lucene.document.Field.Store;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriter.MaxFieldLength;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class LuceneDao {/*1.构建索引库Directory directory = FSDirectory.open(new File("索引库目录"));2.指定分词器,版本一般指定为最高Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);3.创建文档对象,并添加相关字段值Document doc = new Document();doc.add(new Field("goodsId",goods.getGoodsId().toString(),Store.YES,Index.NOT_ANALYZED));4.创建增删改索引库的操作对象,添加文档并提交IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);indexWriter.addDocument(doc);indexWriter.commit();5.关闭操作对象*/public void saveGoods(Goods goods){IndexWriter indexWriter = null;try {Directory  directory = FSDirectory.open(new File("d:\\wxz\\luceneDir"));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);Document document = new Document();//Store.YES表示数据在存到文档库,//Index.ANALYZED表示按规则进行分词;Index.NO_ANALYZED表示把整体的值作为关键字;Index.NO表示不作为索引document.add(new Field("goodsId",goods.getGoodsId().toString(),Store.YES,Index.ANALYZED));document.add(new Field("goodsName",goods.getGoodsName(),Store.YES,Index.ANALYZED));document.add(new Field("goodsPrice",goods.getGoodsPrice().toString(),Store.YES,Index.ANALYZED));document.add(new Field("goodsRemark",goods.getGoodsRemark(),Store.YES,Index.ANALYZED));//通过操作类进行数据的存放indexWriter  = new IndexWriter(directory,analyzer,MaxFieldLength.LIMITED);indexWriter.addDocument(document);indexWriter.commit();} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{if(indexWriter!=null){try {indexWriter.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}/*1.打开索引库directory = FSDirectory.open(new File("索引库目录"));2。创建查询分词器,版本号与写入文档的查询分词器一样Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);3。创建查询解析器,参数为版本号,查询字段名,分词器QueryParser parser = new QueryParser(Version.LUCENE_30, "goodsName", analyzer);4。构建查询信息对象Query query = parser.parse(keyWord);5。构建查询工具searcher = new IndexSearcher(directory);6。通过查询工具执行查询。参数1,查询信息对象;参数2。返回记录数;TopDocs包括总记录数、文档编号等TopDocs topDocx=searcher.search(query, 20);7。根据文档编号遍历真正的文档ScoreDoc sd[] = topDocx.scoreDocs;for(ScoreDoc scoreDoc:sd){。。。Document doc = searcher.doc(scoreDoc.doc);8。转为java对象goods.setGoodsId(Integer.parseInt(doc.get("goodsId")));   lists.add(goods);9.关闭查询操作对象*/public List<Goods> selectGoods(String keyWord){List<Goods> list= new ArrayList<Goods>();IndexSearcher indexSearcher = null;try {Directory directory = FSDirectory.open(new File("d:\\wxz\\luceneDir"));Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);QueryParser parser = new QueryParser(Version.LUCENE_30,"goodsName",analyzer);Query query = parser.parse(keyWord);indexSearcher = new IndexSearcher(directory);TopDocs topDocs = indexSearcher.search(query, 20);System.out.println("总记录数:"+topDocs.totalHits);ScoreDoc[] scoreDocs = topDocs.scoreDocs;for(ScoreDoc s:scoreDocs){System.out.println("文档编号:"+s.doc);//通过文档编号取出文档Document document = indexSearcher.doc(s.doc);//把文档对象的值给bean对象Goods goods = new Goods();goods.setGoodsId(Integer.parseInt(document.get("goodsId")));goods.setGoodsName(document.get("goodsName"));goods.setGoodsPrice(Double.parseDouble(document.get("goodsPrice")));goods.setGoodsRemark(document.get("goodsRemark"));list.add(goods);}} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}finally{if(indexSearcher!=null){try {indexSearcher.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}return list;}}

接下来 进行测试 测试代码如下:

package cn.wxz.entity;import static org.junit.Assert.*;import java.util.List;import org.junit.AfterClass;import org.junit.BeforeClass;import org.junit.Test;public class TestLucene {private static LuceneDao luceneDao;@BeforeClasspublic static void setUpBeforeClass() throws Exception {luceneDao = new LuceneDao();}@AfterClasspublic static void tearDownAfterClass() throws Exception {luceneDao = null;}@Testpublic void testSaveGoods() {Goods goods = new Goods();goods.setGoodsId(1);goods.setGoodsName("milk");goods.setGoodsPrice(20.4);goods.setGoodsRemark("the milk is good");luceneDao.saveGoods(goods);Goods goods2 = new Goods();goods2.setGoodsId(1);goods2.setGoodsName("beef");goods2.setGoodsPrice(50.3);goods2.setGoodsRemark("beef is good");luceneDao.saveGoods(goods2);Goods goods3 = new Goods();goods3.setGoodsId(1);goods3.setGoodsName("milk");goods3.setGoodsPrice(20.4);goods3.setGoodsRemark("the milk is good");luceneDao.saveGoods(goods3);}@Testpublic void testSelectGoods() {List<Goods> list = luceneDao.selectGoods("milk");for(Goods goods: list){System.out.println("商品信息:"+goods);}}}
依次运行testSaveGoods方法和testSelectGoods方法,在控制台可以 看到 如下信息 




0 0