lucene模糊查询 精确查询 包含关键字 不包含关键字 包含全部关键字 范围查询 模糊查询 通配符查询 lucene各种查询实现示例
来源:互联网 发布:淘宝卖家投诉买家后果 编辑:程序博客网 时间:2024/04/30 11:43
/**
*
*
* @Author
* @Date 2012-03-08
* @Describe 用于lucene高级查询
*/
public class AdvanceSearchService {
public List<IndexRecord> search(QueryConditionQuestion qcq) {
//测试的时候使用,真正调用时jsonstr是传递过来的
/* String jsonstr="{end:\"40\",isDynsort:\"0\",metadbId:\"402881be3387158c01338716928d0000\",start:\"0\","
+"queryConditions:[{queryGroups:[" +
"{queryType:\"range\",phraseSlop:\"5\",stypeIds:[\"pubtime\"]," +
"keyWords:[\"2010\",\"2011\"],operator:\"or\",rankWeight:\"1\"," +
"groupId:\"1\",groupOperator:\"and\"}," +
"{queryType:\"range\",phraseSlop:\"5\",stypeIds:[\"pubtime\"]," +
"keyWords:[\"2006\",\"2008\"],groupOperator:\"and\",operator:\"or\",groupId:\"1\"," +
"}]}," +
"{queryGroups:[" +
"{queryType:\"wildcard\",phraseSlop:\"5\",stypeIds:[\"title\",\"lib\"]," +
"keyWords:[\"人?\"],operator:\"or\",rankWeight:\"1\"," +
"groupId:\"1\",groupOperator:\"and\"}," +
"{queryType:\"fuzzy\",phraseSlop:\"5\",stypeIds:[\"title\",\"lib\"]," +
"keyWords:[\"test\"],operator:\"or\",rankWeight:\"1\"," +
"groupId:\"1\",groupOperator:\"not\"}"+
"]}" +
"]}";*/
// QueryConditionQuestion qcq=JsonstrToQueryConditonQuestion.getQueryConditionQuestion(jsonstr);
int start=qcq.getStart();
int end=qcq.getEnd();
List<IndexRecord> recordsList = new ArrayList<IndexRecord>();
MssMetadbInfo mssMetadbInfo = mssMetadbInfoDAO.getMssMetadbInfoByid(qcq.getMetadbId());
List<MssMetadbStruct> listStruct = mssMetadbStructDAO.getMssMetadbStruct(qcq.getMetadbId());
String indexPath = mssMetadbInfo.getIndexpath();
//拼接查询字符串
Map<Integer,QueryConditionVO[]> vos=qcq.getQueryConditions();
Similarity similarity = new IKSimilarity();
IndexSearcher isearcher = null;
Directory directory = null;
File file = null;
TopDocs topDocs = null;
try {
file = new File(indexPath);
directory = NIOFSDirectory.open(file);
isearcher = new IndexSearcher(IndexReader.open(directory));
isearcher.setSimilarity(similarity);
topDocs = isearcher.search(getAdvanceQuery(vos), isearcher.maxDoc());
start--;// 调用方传入的参数从1开始,所以相应的数组下标应该 -1
if (start < 0)
start = 0;
int flag = start;
if (topDocs.totalHits > 0) {
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
if (scoreDocs.length > 0) {
IndexField indexField = null;
for (int j = start; j < scoreDocs.length; j++) {
// 一次循环生成一个indexRecord
IndexRecord indexRecord = new IndexRecord();
indexRecord.setTotalHits(topDocs.totalHits);
indexRecord.setScore((scoreDocs[j].score) * 100);// 分数
List<IndexField> listIndexField = new ArrayList<IndexField>();
// 添加固有字段 字段id
Document targetDoc = isearcher.doc(scoreDocs[j].doc);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_ID);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_ID));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_ORIGID);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_ORIGID));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_INTIME);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_INTIME));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_TABLE_ID);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_TABLE_ID));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_METADBID);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_METADBID));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_STORE_SERVERID);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_STORE_SERVERID));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_TBLNAME);
indexField.setText(targetDoc.get(Consts.INDEXFIELD_TBLNAME));
listIndexField.add(indexField);
indexField = new IndexField();
indexField.setName(Consts.INDEXFIELD_DOCNUM);
indexField.setText(String.valueOf(scoreDocs[j].doc));
listIndexField.add(indexField);
// 根据listStruct添加其他字段
for (MssMetadbStruct mms : listStruct) {
indexField = new IndexField();
indexField.setName(mms.getStcode());
indexField.setText(targetDoc.get(mms.getStcode()));
listIndexField.add(indexField);
}
// 添加推荐次数,评分次数,分数总和,指定的排名顺序
IndexField[] docf = new IndexField[listIndexField.size()];
int i = 0;
for (IndexField a : listIndexField) {
docf[i++] = a;
}
indexRecord.setDocfields(docf);
// 添加Record对象的id
indexRecord.setId(targetDoc.get(Consts.INDEXFIELD_ID));
MssStoreServer mss = mssStoreServerDAO.getMssStoreServer(targetDoc.get(Consts.INDEXFIELD_STORE_SERVERID));
Connection conn = null;
ResultSet resultSet = null;
Statement statement = null;
try {
conn = ConnectionUtil.getConnection(mss);
String sql = "select * from " + targetDoc.get(Consts.INDEXFIELD_TBLNAME) + " where id = '" + targetDoc.get(Consts.INDEXFIELD_ID) + "'";
statement = conn.createStatement();
resultSet = statement.executeQuery(sql);
while (resultSet.next()) {
indexRecord.setRankcount(resultSet.getLong("rankcount")); // 添加评分次数
indexRecord.setRanksum(resultSet.getLong("ranksum"));// 添加分数总和
indexRecord.setUpcount(resultSet.getLong("upcount"));// 添加推荐次数
indexRecord.setCustidx(resultSet.getLong("custidx"));// 指定的排名顺序
}
} catch (Exception ex) {
ex.printStackTrace();
} finally {
ConnectionUtil.closeResultSet(resultSet);
ConnectionUtil.closeStatement(statement);
ConnectionUtil.closeConnection(conn);
}
if (null != indexRecord)
recordsList.add(indexRecord);
flag++;
if (flag >= end)
break;
}
}
}
//防止反编译用
if (654789 == new Random().nextInt()){
throw new Exception("try again 654789 == new Random().nextInt()");
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("*******SearchService search方法查询索引报错 ********");
} finally {
try {
if (isearcher != null) {
isearcher.close();
}
if (directory != null) {
directory.close();
}
//防止反编译用
if (654789 == new Random().nextInt()){
throw new Exception("try again 654789 == new Random().nextInt()");
}
} catch (Exception e) {
System.out.println("*******SearchService isearcher,directory关闭报错 ********");
}
}
return recordsList;
}
/**
* 得到组合查询
*/
public Query getAdvanceQuery(Map<Integer,QueryConditionVO[]> vos){
BooleanQuery bQueryAll=null;
Query query = null;
if(vos==null){
//return recordsList;
}else{
try{
QueryConditionVO[] queryConditions=null;
QueryConditionVO vo=null;
bQueryAll=new BooleanQuery();
BooleanQuery bQueryEvery=null;
for(int i=0;i<vos.size();i++){//所有组查询
queryConditions=vos.get(i);//具体某一组查询
bQueryEvery=new BooleanQuery();
for(int j=0;j<queryConditions.length;j++){//每一组查询内部拼接条件
vo=queryConditions[j];//下一个vo
query=getTermQuery(vo);//获得具体的某个查询对象
if(queryConditions.length>1){//分组内部组查询组合
if(vo.getOperator().equalsIgnoreCase("AND")){
bQueryEvery.add(query,Occur.MUST);
}else if(vo.getOperator().equalsIgnoreCase("NOT")){
bQueryEvery.add(query,Occur.MUST_NOT);
}else if(vo.getOperator().equalsIgnoreCase("OR")){
bQueryEvery.add(query,Occur.SHOULD);
}
}
}
//添加每一个分组的查询条件组合成组合查询条件,最外层外部组合查询条件
if(queryConditions.length>1){
if(vo.getGroupOperator().equalsIgnoreCase("AND")){
bQueryAll.add(bQueryEvery,Occur.MUST);
}else if(vo.getGroupOperator().equalsIgnoreCase("NOT")){
bQueryAll.add(bQueryEvery,Occur.MUST_NOT);
}else if(vo.getGroupOperator().equalsIgnoreCase("OR")){
bQueryAll.add(bQueryEvery,Occur.SHOULD);
}
}else{
if(vo.getGroupOperator().equalsIgnoreCase("AND")){
bQueryAll.add(query,Occur.MUST);
}else if(vo.getGroupOperator().equalsIgnoreCase("NOT")){
bQueryAll.add(query,Occur.MUST_NOT);
}else if(vo.getGroupOperator().equalsIgnoreCase("OR")){
bQueryAll.add(query,Occur.SHOULD);
}
}
}
}catch(Exception e){
e.printStackTrace();
}finally{
try{
//防止反编译用
if (654789 == new Random().nextInt()){
throw new Exception("try again 654789 == new Random().nextInt()");
}
}catch(Exception e){
e.printStackTrace();
}
}
}
System.out.println("bqueryall="+bQueryAll.toString());
return bQueryAll;
}
/**
*
* @param vo
* @return
*/
public Query getTermQuery(QueryConditionVO vo){
Query query=null;
QueryParser queryParser=null;
try{
if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINS)
|| vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSNONE)
|| vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSALL)){//包含以下关键词
queryParser=new MultiFieldQueryParser(Version.LUCENE_33,vo.getStypeIds(),new IKAnalyzer());
if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSNONE)){//不包括此关键词
vo.setOperator("not");//设置内部操作关系为must_not类型,不满足此条件
}else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_CONTAINSALL)){
queryParser.setDefaultOperator(Operator.AND);//关键字经过IKAnalyzer分词后仍然为并且的关系
}
queryParser.setPhraseSlop(vo.getPhraseSlop());
try {
query=queryParser.parse(vo.getKeyWords()[0]);
} catch (ParseException e) {
e.printStackTrace();
}
}else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_RANGE)){//范围查询
query=new TermRangeQuery(vo.getStypeIds()[0],vo.getKeyWords()[0], vo.getKeyWords()[1], true, true);
}else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_FUZZY)){//模糊查询
query=new FuzzyQuery(new Term(vo.getStypeIds()[0],vo.getKeyWords()[0]));
}else if(vo.getQueryType().equalsIgnoreCase(Consts.QUERYTYPE_WILDCARD)){//通配符查询
query=new WildcardQuery(new Term(vo.getStypeIds()[0],vo.getKeyWords()[0]));
}
query.setBoost(vo.getRankWeight());//设置权重,改变得分情况,原有得分乘以设定的值为改变后的查询结果得分情况,默认设置为1
//防止反编译用
if (654789 == new Random().nextInt()){
throw new Exception("try again 654789 == new Random().nextInt()");
}
}catch(Exception e){
e.printStackTrace();
}finally{
try{
//防止反编译用
if (654789 == new Random().nextInt()){
throw new Exception("try again 654789 == new Random().nextInt()");
}
}catch(Exception e){
e.printStackTrace();
}
}
return query;
}
}
import java.util.Map;
/**
* @Describe 高级查询顶层条件对象
*/
public class QueryConditionQuestion {
//"start":"0","isDynsort":"0","metadbid":"123","end":"40"
//分页:开始条数
private int start=0;
//是否排序
private int isDynsory=0;
//服务器id
private String metadbId;
//分页:结束条数
private int end;
//分组查询条件对象
private Map<Integer,QueryConditionVO[]> queryConditions;
public QueryConditionQuestion(){
}
public int getStart() {
return start;
}
public void setStart(int start) {
this.start = start;
}
public int getIsDynsory() {
return isDynsory;
}
public void setIsDynsory(int isDynsory) {
this.isDynsory = isDynsory;
}
public String getMetadbId() {
return metadbId;
}
public void setMetadbId(String metadbId) {
this.metadbId = metadbId;
}
public int getEnd() {
return end;
}
public void setEnd(int end) {
this.end = end;
}
public Map<Integer, QueryConditionVO[]> getQueryConditions() {
return queryConditions;
}
public void setQueryConditions(Map<Integer, QueryConditionVO[]> queryConditions) {
this.queryConditions = queryConditions;
}
}
---------------------------
/**
* @Describe 高级查询底层条件对象
*/
public class QueryConditionVO {
/**
* {"phraseSlop":"5","keywords":[{"k1":"中国"},{"k1":"人民"},{"k3":"解放军"}],
* "rankWeight":"50","groupid":"1","orerator":"and","groupOperator":"and","querytype":"contains","stypeid":"title"}
*/
//两个词之间隔度
private int phraseSlop=0;
//查询关键字
private String[] keyWords;
//权重
private int rankWeight=1;
//分组id
private int groupId;
//小组内部查询关系:and or not,默认设置为Or
private String operator="OR";
//组与组之间查询关系:and or not,默认设置为or
private String groupOperator="OR";
//查询类型:范围range、包含contains、不包含containsnone、模糊fuzzy、通配符wildcard等
private String queryType;
//查询字段
private String[] stypeIds;
public int getPhraseSlop() {
return phraseSlop;
}
public void setPhraseSlop(int phraseSlop) {
this.phraseSlop = phraseSlop;
}
public String[] getKeyWords() {
return keyWords;
}
public void setKeyWords(String[] keyWords) {
this.keyWords = keyWords;
}
public int getRankWeight() {
return rankWeight;
}
public void setRankWeight(int rankWeight) {
this.rankWeight = rankWeight;
}
public int getGroupId() {
return groupId;
}
public void setGroupId(int groupId) {
this.groupId = groupId;
}
public String getOperator() {
return operator;
}
public void setOperator(String operator) {
this.operator = operator;
}
public String getGroupOperator() {
return groupOperator;
}
public void setGroupOperator(String groupOperator) {
this.groupOperator = groupOperator;
}
public String getQueryType() {
return queryType;
}
public void setQueryType(String queryType) {
this.queryType = queryType;
}
public String[] getStypeIds() {
return stypeIds;
}
public void setStypeIds(String[] stypeIds) {
this.stypeIds = stypeIds;
}
}
-------------------------------------------------------------------------------
调用格式规范
一、其他系统调用A系统高级组合查询接口需要传递如下xml字符串:
<?xml version="1.0" encoding="GBK"?>
<search ver="1" type="query">
<query question="advanceSearch">
<param name=”jsonstr”
value=”字符串格式的查询条件”>
</param>
</query>
</search>
二、json字符串格式的查询条件格式:
{"metadbId":"123","start":"0","isDynsort":"0","end":"40",
"queryConditions":[{"queryGroups":[{"groupId":"1","queryType":"range","phraseSlop":"5",
"rankWeight":"50","stypeIds":["pubtime"],
"keyWords":["2010","2011"],"groupOperator":"and",
"operator":"or"}, {"groupId":"1","queryType":"range","phraseSlop":"5",
"stypeIds":["pubtime"],"keyWords":["2006","2008"],
"groupOperator":"and","operator":"or"}]},
{"queryGroups":[{"groupId":"2","queryType":"contains","phraseSlop":"5",
"rankWeight":"50","stypeIds":["title","lib"],
"keyWords":[“人生 you"],"groupOperator":"and",
"operator":"and"}]}]}
三、json字符串的查询条件格式对应表:
key
值
含义
是否必须
metadbId
元数据库id
元数据库id
是
Start
检索开始条数
检索开始条数
是
End
检索结束条数
检索结束条数
是
isDynsort
0
不排序
否
1
排序
groupId
Int类型,
内部各组序号,小组内部该值应该相同
否
queryType
range
范围查询,包含边界
是
contains
包含关键字
containsall
包含全部关键字
containsnone
不包含关键字
fuzzy
模糊查询如:输入test则test和 text都满足条件
wildcard
通配符查询,?代表一个字符,*代表0或者多个字符。如输入 te?t或者te*t 则test和 text 都满足条件
phraseSlop
Int类型数值
关键词之间最大间隔,主要用于contains包含关键词查询,默认为0
否
rankWeight
Int类型数值
设置该查询条件的权重,数字越大则权重越大,默认为1
否
stypeIds:
需要检索的索引字段
设置该查询条件需要检索的索引字段,可以设置多个,多个字段之间以逗号分割。如:stypeIds:["title","lib"]或者stypeIds:["title"]
至少设置一个
是
keyWords
需要检索的关键字
可以设置1个或者2个,两个字段之间以逗号分割。
如:
keyWords:["2006","2008"]
或
[keyWords:["人生 you"]
或
keyWords:["中华人民共和国人民解放军"]
(注:2个关键字只用于range(范围查询),其它查询都只能设置一个关键词,若有多个关键词则可以用空格分开,如:keyWords:["人生 you"])
是
groupOperator
and
并且关系
最外层组合查询之间的关系(注:每小组
内部的groupOperator的值必须相同,程序取该小组内部最后一条记录的groupOperator的值)
是
or
或者关系
not
不包括,不满足此条件
operator
and
并且关系
各小组内部各成员之间关系,默认为or
否
or
或者关系
not
不包括,不满足此条件
Json字符串示例:查询出【发布时间(pubtime)为2010年至2011年或者2006年至2008年】并且【标题(title)或者图书馆(lib)中含有人生和you关键字】的数据
{"metadbId":"123","start":"0","isDynsort":"0","end":"40",
"queryConditions":[{"queryGroups":[{"groupId":"1","queryType":"range","phraseSlop":"5",
"rankWeight":"50","stypeIds":["pubtime"],
"keyWords":["2010","2011"],"groupOperator":"and",
"operator":"or"}, {"groupId":"1","queryType":"range","phraseSlop":"5",
"stypeIds":["pubtime"],"keyWords":["2006","2008"],
"groupOperator":"and","operator":"or"}]},
{"queryGroups":[{"groupId":"2","queryType":"contains","phraseSlop":"5",
"rankWeight":"50","stypeIds":["title","lib"],
"keyWords":[“人生 you"],"groupOperator":"and",
"operator":"and"}]}]}
A系统向B系统发送高级组合查询xml示例:
<?xml version="1.0" encoding="GBK"?>
<searchver="1" type="query">
<query question="advanceSearch">
<param name=”jsonstr”
value=” {end:40,start:0,isDynsort:0,metadbId:402881be3387158c01338716928d0000, queryConditions:[{queryGroups:[{“groupId”:0,”groupOperator”:”and”,
”keyWords”:[“人生”], ”operator”:”OR”,
”phraseSlop”:0,”queryType”:”contains”,
”rankWeight”:1,”stypeIds”:[“title”]}]},
{queryGroups:[{“groupId”:0,”groupOperator”:”and”,
”keyWords”:[“人生艺术”],
”operator”:”OR”,”phraseSlop”:0,
”queryType”:”containsall”,”rankWeight”:1,
”stypeIds”:[“title”]}]},
{queryGroups:[{“groupId”:0,”groupOperator”:”and”,
”keyWords”:[“2006”,”2011”],
”operator”:”OR”,”phraseSlop”:0,
”queryType”:”range”,”rankWeight”:1,
”stypeIds”:[“pubtime”]}]}, {queryGroups:[{“groupId”:0,”groupOperator”:”and”,
”keyWords”:[“test”],”operator”:”OR”,
”phraseSlop”:0,”queryType”:”fuzzy”,
”rankWeight”:1,”stypeIds”:[“title”]}]},
{queryGroups:[{“groupId”:0,”groupOperator”:”and”,
”keyWords”:[“t*”],”operator”:”OR”,
”phraseSlop”:0,”queryType”:”wildcard”,
”rankWeight”:1,”stypeIds”:[“title”]}]}]}”>
</param>
</query>
</search>
- lucene模糊查询 精确查询 包含关键字 不包含关键字 包含全部关键字 范围查询 模糊查询 通配符查询 lucene各种查询实现示例
- 关键字查询,模糊查询,查询全部信息
- Linq的模糊查询(包含精确模糊查询)
- 模糊查询的关键字
- lucene多关键字查询
- lucene 查询时输入的关键字中包含的词必须出现才被查询出来实现
- lucene-FuzzyQuery模糊查询
- sql模糊查询查询多个关键字
- SQL多关键字模糊查询
- Lucene之模糊、精确、匹配、范围、多条件查询-yellowcong
- oracle 查询包含关键字的方法
- oracle包含最多关键字的查询语句
- 查询存储过程中包含的关键字
- java mongodb 包含与不包含 的 模糊查询
- 模糊查询&精确查询
- ajax实现模糊查询(关键字描红)
- MySQL单表多关键字模糊查询的实现
- Android开发--模拟通讯录包含模糊查询
- 在 IE 中调试 JavaScript
- 网页模拟点击(非鼠标,消息模拟)
- FieldCache在lucene中使用的代码解析,使用场景个人分析
- anjuta C/C++ IDE Ubuntu Installation Method
- 请教下问题
- lucene模糊查询 精确查询 包含关键字 不包含关键字 包含全部关键字 范围查询 模糊查询 通配符查询 lucene各种查询实现示例
- 让Linux下的SQL*Plus具有历史回调功能
- 最常被程序员们谎称读过的计算机书籍
- 8.3 分数类的运算符重载
- 最优二叉搜索树
- 如何判断浏览器是否禁用Cookie
- 2012C++程序设计实验报告【8.3】
- sql 主键
- 如何实现用返回值重载