Lucene——八种搜索

来源:互联网 发布:人工智能能代替老师吗 编辑:程序博客网 时间:2024/05/22 00:34

//词语查找//范围查找 //数字范围查找//前缀查找//通配符查找//Boolean查询//短语查询//模糊查询

一看测试类什么都明白了

package org.itat.test;import org.itat.index.SearcherUtil;import org.junit.Before;import org.junit.Test;public class TestSearcher {private SearcherUtil su;@Beforepublic void init(){su=new SearcherUtil();}@Testpublic void searchByTerm(){su.searchByTerm("content","i",10);//I like soccer 词语查找//6条结果查su.searchByTerm("content","like",10);////su.searchByTerm("name","zhangsan",10);//su.searchByTerm("name","jack",10);//su.searchByTerm("name","j",10);//0}@Testpublic void searchByTermRange(){//su.searchByTermRange("id","1","6",10);su.searchByTermRange("name","a","z",10);//???????????????????????}@Testpublic void searchByNumRange() {su.searchByNumricRange("attach",2,10, 5);}@Testpublic void searchByPrefix() {su.searchByPrefix("name", "j", 10);}@Testpublic void searchByWildcard() {//匹配@itat.org结尾的所有字符su.searchByWildcard("email", "*qq.com", 10);//???????????????????????su.searchByWildcard("email", "*@qq.com", 10);//???????????????????????//匹配j开头的有三个字符的namesu.searchByWildcard("name", "j*", 10);//su.searchByWildcard("name", "j???", 10);}@Testpublic void searchByBoolean() {su.searchByBoolean(10);}@Testpublic void searchByPhrase() {su.searchByPhrase(10);}@Testpublic void searchByFuzzy() {su.searchByFuzzy(10);}}

下面是主类

package org.itat.index;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.HashMap;import java.util.Map;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.Term;import org.apache.lucene.search.BooleanClause.Occur;import org.apache.lucene.search.BooleanQuery;import org.apache.lucene.search.FuzzyQuery;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.NumericRangeQuery;import org.apache.lucene.search.PhraseQuery;import org.apache.lucene.search.PrefixQuery;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TermRangeQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.WildcardQuery;import org.apache.lucene.store.Directory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;public class SearcherUtil {private Directory directory=null;private IndexReader reader=null;private String[] ids = { "1", "2", "3", "4", "5", "6" };private String[] emails = { "dd@dd.org", "aa@qq.com","bb@qq.com", "cc@ytu.edu", "ee@ee.org", "ff@ff.org" };private String[] contents = { "I like soccer", "I like football","I like soccer and I like game", " like book", "I like soccer","I like soccer" };private int[] attachs = { 1, 6, 8, 9, 5, 4, 5 };private String[] names = { "zhangsan", "lisi", "jone", "haoliu", "jim","jack" };private Map<String, Float> scores = new HashMap<String, Float>();private Date[] dates = null;public SearcherUtil() {directory = new RAMDirectory();setDate();index();}public void setDate() {SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-DD"); // 时间转换,这个不会有点丢人try {dates = new Date[ids.length];dates[0] = sdf.parse("2010-08-17");dates[1] = sdf.parse("2011-02-17");dates[2] = sdf.parse("2012-03-17");dates[3] = sdf.parse("2011-04-17");dates[4] = sdf.parse("2012-05-17");dates[5] = sdf.parse("2011-07-17");} catch (Exception e) {e.printStackTrace();// TODO: handle exception}}public void index() {IndexWriter writer = null;Document doc = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));for (int i = 0; i <ids.length; i++) {doc = new Document();doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("email", emails[i], Field.Store.YES,Field.Index.ANALYZED));doc.add(new Field("content", contents[i], Field.Store.NO,Field.Index.ANALYZED));doc.add(new Field("name", names[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));// 存储数字doc.add(new NumericField("attach", Field.Store.YES, true).setIntValue(attachs[i]));// 存储日期doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime()));String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);if (scores.containsKey(et)) {doc.setBoost(scores.get(et));} else {doc.setBoost(5f);}writer.addDocument(doc);}} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (LockObtainFailedException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();} finally {if (writer != null) {try {writer.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}}public IndexSearcher getSearcher() {try {if (reader == null) {reader = IndexReader.open(directory);} else {IndexReader tr = IndexReader.openIfChanged(reader);if (tr != null) {reader.close();reader = tr;}}return new IndexSearcher(reader);} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println("到这里了");return null;}//词语查找public void searchByTerm(String field,String name,int num){try {IndexSearcher indexSearcher = getSearcher();Query query = new TermQuery(new Term(field, name));TopDocs tds = indexSearcher.search(query, num);System.out.println("一共查询了:" + tds.totalHits);for (ScoreDoc sd : tds.scoreDocs) {Document doc = indexSearcher.doc(sd.doc);System.out.println(doc.get("id") + "---->" + doc.get("name") + "["+ doc.get("email") + "]-->" + doc.get("id") + ","+ doc.get("attach") + "," + doc.get("date"));}indexSearcher.close();} catch (CorruptIndexException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}//范围查找public void searchByTermRange(String field,String start,String end,int num) {try {IndexSearcher searcher = getSearcher();Query query = new TermRangeQuery(field,start,end,true, true);TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//数字范围查找public void searchByNumricRange(String field,int start,int end,int num) {try {IndexSearcher searcher = getSearcher();Query query = NumericRangeQuery.newIntRange(field,start, end,true,true);TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//前缀查找public void searchByPrefix(String field,String value,int num) {try {IndexSearcher searcher = getSearcher();Query query = new PrefixQuery(new Term(field,value));TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//通配符查找//在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符public void searchByWildcard(String field,String value,int num) {try {IndexSearcher searcher = getSearcher();Query query = new WildcardQuery(new Term(field,value));TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//Boolean查询public void searchByBoolean(int num) {try {IndexSearcher searcher = getSearcher();BooleanQuery query = new BooleanQuery();/* * BooleanQuery可以连接多个子查询 * Occur.MUST表示必须出现 * Occur.MUST_NOT表示不能出现 * Occur.SHOULD表示可以出现,最好出现 */query.add(new TermQuery(new Term("name","zhangsan")), Occur.MUST_NOT);query.add(new TermQuery(new Term("content","game")),Occur.SHOULD);TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//短语查询//I like football//i和football中间是一个空public void searchByPhrase(int num) {try {IndexSearcher searcher = getSearcher();PhraseQuery query = new PhraseQuery();query.setSlop(5);//第一个Termquery.add(new Term("content","i"));//产生距离之后的第二个Termquery.add(new Term("content","game"));TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}//模糊查询public void searchByFuzzy(int num) {try {IndexSearcher searcher = getSearcher();FuzzyQuery query = new FuzzyQuery(new Term("name","mack"));//1//FuzzyQuery query = new FuzzyQuery(new Term("name","macc"));//0TopDocs tds = searcher.search(query, num);System.out.println("一共查询了:"+tds.totalHits);for(ScoreDoc sd:tds.scoreDocs) {Document doc = searcher.doc(sd.doc);System.out.println(doc.get("id")+"---->"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+doc.get("attach")+","+doc.get("date"));}searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}

原创粉丝点击