lucene 搜索search

来源:互联网 发布:青少年正确使用网络 编辑:程序博客网 时间:2024/05/29 05:02

有很多可以使用的API操作,下面简单的几种程序如下:

package com.lxp.index;import java.io.IOException;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.queryParser.QueryParser.Operator;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TermRangeQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.store.Directory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class SearcherUtil {private String[] ids = {"1","2","3","4","5","6"};private String[] emails = {"aa@sina.com","bb@123.com","cc@qq.com","dd@uestc.com","ee@qq.com","ff@uestc.com"};private String[] contents = {"welcome to visited the space,I like book","hello boy, I like pingpeng ball","my name is cc I like game","I like football","I like football and I like basketball too","I like movie and swim"};private Date[] dates = null;private int[] attachs = {2,3,1,4,5,5};private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};private Directory directory;private IndexReader reader;private Map<String,Float> scores = new HashMap<String,Float>();public SearcherUtil() {directory = new RAMDirectory();setDates();index();}public void index() {IndexWriter writer = null;try {writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));writer.deleteAll();Document document = null;for(int i=0;i<ids.length;i++) {document = new Document();document.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));document.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));document.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));document.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));//存储数字document.add(new NumericField("attachs",Field.Store.YES,true).setIntValue(attachs[i]));//存储日期document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));String et = emails[i].substring(emails[i].lastIndexOf("@")+1);if(scores.containsKey(et)) {document.setBoost(scores.get(et));} else {document.setBoost(0.5f);}writer.addDocument(document);}} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {try {if(writer!=null)   writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}private void setDates() {SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");try {dates = new Date[ids.length];dates[0] = sdf.parse("2010-02-18");dates[1] = sdf.parse("2012-03-24");dates[2] = sdf.parse("2011-02-18");dates[3] = sdf.parse("2012-02-18");dates[4] = sdf.parse("2014-05-18");dates[5] = sdf.parse("2013-06-30");} catch (ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}}public IndexSearcher getSearcher() {try {if(reader==null) reader = IndexReader.open(directory);else {IndexReader tr = IndexReader.openIfChanged(reader);if(tr!=null) {reader.close();reader = tr;}}return new IndexSearcher(reader);} catch (IOException e) {e.printStackTrace();}return null;}//精确查询public void searchByTerm(String field,String name,int num) {try {IndexSearcher searcher = getSearcher();TermQuery query = new TermQuery(new Term(field,name));TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//查找以start开始,以end结束public void searchByTermRange(String field,String start,String end,int num) {try {IndexSearcher searcher = getSearcher();TermRangeQuery query = new TermRangeQuery(field, start, end, true, true);TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//前缀查找public void searchByPrefix(String field,String value,int num) {try {IndexSearcher searcher = getSearcher();PrefixQuery query = new PrefixQuery(new Term(field,value));TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//通配符搜索(value="*uestc"或”uestc*"等进行测试,?表示匹配一个字符,*表示任意多个public void searchByWildcard(String field,String value,int num) {try {IndexSearcher searcher = getSearcher();WildcardQuery query = new WildcardQuery(new Term(field,value));TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//BooleanQuery可以建立联合查询,名字中有zhangsan,并且content中有like,public void searchByBoolean(int num) {try {IndexSearcher searcher = getSearcher();BooleanQuery query = new BooleanQuery();query.add(new TermQuery(new Term("name","zhangsan")),Occur.MUST);query.add(new TermQuery(new Term("content","like")),Occur.MUST);TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//短语查询,类似于like。。。。China,只知道两个单词,就可以public void searchByParse(int num) {try {IndexSearcher searcher = getSearcher();PhraseQuery query = new PhraseQuery();query.setSlop(1);query.add(new Term("content","I"));query.add(new Term("content","football"));TopDocs tds = searcher.search(query, num);;System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//模糊匹配:FuzzyQuery// querypublic void searchByQueryParser(int num) {try {IndexSearcher searcher = getSearcher();//1、创建QueryParser,默认搜索域为contentQueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));//搜索content中包含有like的Query query = parser.parse("like football");//有like 或footble的//改变空格的默认操作符,以下可以改成AND//parser.setDefaultOperator(Operator.AND);//Query query = parser.parse("like AND football");//有like和footble的//Query query = parser.parse("name:mike");改变搜索域为name的mike//  同样可以使用*和?作为通配符的匹配//Query query = parser.parse("name:mike*");//name中以mike开头的//开启第一个字符的通配符匹配,默认关闭因为效率不高//parser.setAllowLeadingWildcard(true);//Query query = parser.parse("name:*mike");//name中以mike结尾的//内容中没有mike,但是content中有football的+和-要放置到前面//Query query = parser.parse("- name:mike + football");//name中以mike结尾的//匹配ID为1-4的注意TO必须大写{}不包含,[]包含边界//Query query = parser.parse("id:[1 TO 4]");//TopDocs tds = searcher.search(query, num);;//完全匹配I like footballQuery query = parser.parse("\" I like football\"");System.out.println("一共查询到:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attachs")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} catch (org.apache.lucene.queryParser.ParseException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}


0 0
原创粉丝点击