Lucene分词与查询
来源:互联网 发布:手机拍一寸照的软件 编辑:程序博客网 时间:2024/06/04 20:44
package com.demo.ajax;public class Building{private Integer id;private String name;private String Information;public Integer getId(){return id;}public void setId(Integer id){this.id = id;}public String getName(){return name;}public void setName(String name){this.name = name;}public String getInformation(){return Information;}public void setInformation(String information){Information = information;}}
package com.demo.ajax;import java.util.ArrayList;import java.util.List;public class InitTool{public static List<Building> initBuilding(){List<Building> list = new ArrayList<Building>();for (int i=60;i<100;i++){Building building = new Building();building.setId(i);building.setName(i+"号楼");building.setInformation("总统套间");list.add(building);}return list;}}
package com.demo.ajax;import java.io.File;import java.net.URLDecoder;import java.util.Iterator;import java.util.List;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.store.FSDirectory;public class AnalyzerTool{public void createIndex() throws Exception {// 实例化分词器,使用的是中文分词器Analyzer analyzer = new PaodingAnalyzer();// 指定要保存的文件路径并保存到FSDirectory中//System.out.println(URLDecoder.decode(AnalyzerTool.class//.getResource("/date/index/building/").toString(),"UTF-8").substring(6));File file = new File("d:\\data\\index");if(!file.exists()){file.mkdirs();}FSDirectory directory = FSDirectory.getDirectory("d:\\data\\index");//true表示覆盖原来已经创建的索引,如果是false表示不覆盖,而是继续添加索引IndexWriter writer = new IndexWriter(directory, analyzer, true);List<Building> list = InitTool.initBuilding();for(int i=0;i<30;i++){Building building = list.get(i);System.out.println(building.getId()+"-------------->"+building.getName()+"---------->"+building.getInformation());}for(Building building : list){System.out.println(building.getId()+"-------------->"+building.getName()+"---------->"+building.getInformation());}for (Iterator<Building> it = list.iterator(); it.hasNext();) {Document doc = new Document();Building building = (Building) it.next();doc.add(new Field("id", String.valueOf(building.getId()), Field.Store.YES,Field.Index.UN_TOKENIZED));doc.add(new Field("building_name", building.getName(), Field.Store.YES,Field.Index.TOKENIZED));//String information = FunctionUtil.Html2Text(building.getInformation());doc.add(new Field("building_information", building.getInformation(), Field.Store.YES,Field.Index.TOKENIZED));writer.addDocument(doc);}writer.optimize();writer.close();}public static void main(String[] args) throws Exception{AnalyzerTool analyzerTool = new AnalyzerTool();analyzerTool.createIndex();}}
package com.demo.ajax;import java.io.StringReader;import java.util.ArrayList;import java.util.List;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.search.BooleanClause;import org.apache.lucene.search.Hits;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.Searcher;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.FSDirectory;public class LuceneSearchTool{List<Building> searcheResult = new ArrayList<Building>();public List<Building> getSearcheResult(){return searcheResult;}public void setSearcheResult(List<Building> searcheResult){this.searcheResult = searcheResult;}public List<Building> search(String keywords)throws Exception{String path = "d:\\data\\index";return searchIndex(path,keywords);}public List<Building> searchIndex(String path, String keywords) throws Exception {FSDirectory directory = FSDirectory.getDirectory(path);IndexReader reader = IndexReader.open(directory);Searcher searcher = new IndexSearcher(directory);// MultiFieldQueryParser.parse中的参数分别为:// 1.关键词// 2.要查询的字段,字符串类型的数组String[] field = { "building_name", "building_information" };// 3.两个字段的关系(与或非)BooleanClause.Occur[] flags = new BooleanClause.Occur[] {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };// 4.指明分词的时候所用的分词器Analyzer analyzer = new PaodingAnalyzer();Query query = MultiFieldQueryParser.parse(keywords, field, flags,analyzer);// 由于我们目前使用的查询是多字段查询,需要匹配度的排序// QueryScorer内置计分器//query.rewrite(reader);// 用于重写query对象,目的能够让计分器识别当前的query.// 获得结果集Hits hits = searcher.search(query);for (int i = 0; i < hits.length(); i++) {Document doc = hits.doc(i);Building building = new Building();building.setId(Integer.valueOf(doc.get("id")));// titleString name = doc.get("building_name");building.setName(name);// contentString information = doc.get("building_information");building.setInformation(information);// 以上两项需要加亮// Highlighter的构造函数中需要添加两个参数// 1.高亮文字的格式(这个格式是基于html)SimpleHTMLFormatter simpleHTMLFOrmatter = new SimpleHTMLFormatter("<font color=red>", "</font>");// 2.计分器Highlighter highlighter = new Highlighter(simpleHTMLFOrmatter,new QueryScorer(query));// 关键字附近字符串的截取,截取120个字Fragmenter fragmenter = new SimpleFragmenter(120);highlighter.setTextFragmenter(fragmenter);// 针对某个字段的加亮以及截取TokenStream tokenStream = analyzer.tokenStream("building_information",new StringReader(information));//将加亮并截取的字符串取出来String highLightText = highlighter.getBestFragment(tokenStream, information);if(highLightText!=null){building.setInformation(highLightText);}// 针对某个字段的加亮以及截取TokenStream name_tokenStream = analyzer.tokenStream("building_name",new StringReader(name));//将加亮并截取的字符串取出来String name_highLightText = highlighter.getBestFragment(name_tokenStream, name);if(name_highLightText != null){building.setName(name_highLightText);}searcheResult.add(building);}reader.close();return searcheResult;}}
package com.demo.ajax;import java.util.List;public class Junit{public static void main(String[] args) throws Exception{LuceneSearchTool luceneSearchTool = new LuceneSearchTool();List<Building> searcheResult = luceneSearchTool.search("号楼");System.out.println(searcheResult.size());for(Building building : searcheResult){System.out.println(building.getId()+"------------->"+building.getName()+"-------------->"+building.getInformation());}}}
- Lucene分词与查询
- Lucene实现自定义分词器(同义词查询与高亮)
- Lucene与中文分词
- Lucene分词原理与方式
- Lucene.Net 与 盘古分词
- lucene 与IKAnalyzer实现中文分词查询 (最新jar及实现代码)
- Lucene.Net与盘古分词实现站内搜索
- lucene分词
- lucene分词
- Lucene 分词
- lucene分词器分词
- Lucene 3.6 中文分词、分页查询、高亮显示等
- Lucene开发实例教程:Lucene中文分词、分页查询、高亮显示
- lucene分词器分词demo
- Lucene 分词 统计分词次数
- 一氪钟:浅说 Lucene 倒排索引与分词
- 与Lucene 4.10配合的中文分词比较
- 与Lucene 4.10配合的中文分词比较(转)
- 我思故我在系列——数据结构NO.16题(题目搜集整理者JULY,非常感谢!!)
- vim多行增加缩进
- android编译要求安装jdk1.5的解决办法
- pictureBox中使用滚动条 c# dotnet
- 界面记录
- Lucene分词与查询
- 三种主流VPN技术在档案信息共享网络中的应用研究
- 清华计算机专业考研经验分享
- java中java.lang.NoSuchMethodException: com.opensymphony.xwork2.ActionSupport.ledgerManage()错
- sybase存储过程返回参数
- SurfaceFlinger工作线程
- 动画
- netbeans打包java程序,并包含外部jar包
- java