lucene HitCollector 的作用
来源:互联网 发布:网络编程基础知识入门 编辑:程序博客网 时间:2024/04/29 16:52
导读:
HitCollector 的作用很简单,通过collect()方法控制检索返回的结果,下面是lucene自带的一个例子----使用一个优先队
列,返回指定数目的Top n Doc。
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** A {@link HitCollector} implementation that collects the top-
scoring
* documents, returning them as a {@link TopDocs}. This is used by
{@link
* IndexSearcher} to implement {@link TopDocs}-based search.
*
*
This may be extended, overriding the collect method to, e.g.,
* conditionally invoke super()in order to filter which
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
int totalHits;
PriorityQueue hq;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopDocCollector(int numHits) {
this(numHits, new HitQueue(numHits));
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size()= minScore) {
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
}
}
}
/** The total number of documents that matched this query. */
public int getTotalHits() {return totalHits; }
/** The top-scoring hits. */
public TopDocs topDocs() {
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
float maxScore = (totalHits==0)
Float.NEGATIVE_INFINITY
: scoreDocs[0].score;
return new TopDocs(totalHits, scoreDocs, maxScore);
}
}
本文转自
http://blog.lough.com.cn/post/234/
HitCollector 的作用很简单,通过collect()方法控制检索返回的结果,下面是lucene自带的一个例子----使用一个优先队
列,返回指定数目的Top n Doc。
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** A {@link HitCollector} implementation that collects the top-
scoring
* documents, returning them as a {@link TopDocs}. This is used by
{@link
* IndexSearcher} to implement {@link TopDocs}-based search.
*
*
This may be extended, overriding the collect method to, e.g.,
* conditionally invoke super()in order to filter which
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
int totalHits;
PriorityQueue hq;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopDocCollector(int numHits) {
this(numHits, new HitQueue(numHits));
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size()
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score; // maintain minScore
}
}
}
/** The total number of documents that matched this query. */
public int getTotalHits() {return totalHits; }
/** The top-scoring hits. */
public TopDocs topDocs() {
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc)hq.pop();
float maxScore = (totalHits==0)
Float.NEGATIVE_INFINITY
: scoreDocs[0].score;
return new TopDocs(totalHits, scoreDocs, maxScore);
}
}
本文转自
http://blog.lough.com.cn/post/234/
- lucene HitCollector 的作用
- lucene-编写HitCollector直接访问搜索结果
- Lucene的作用以及应用
- 自定义排序,Filter和HitCollector
- [lucene]关于对xml的lucene索引
- Lucene学习笔记:Lucene的总体结构
- 【Lucene实战】Lucene的分析过程
- solr/lucene和nutch/lucene的区别
- lucene(一) lucene一些概念的理解
- 【Lucene】Lucene和Oracle数据库的对比
- .NET 的 Lucene
- lucene的首次应用
- Lucene的工作原理
- lucene的demo
- Lucene 的学习 【摘】
- Lucene的平行索引
- Lucene的平行索引
- Lucene 的学习
- 权限系统
- 项目管理常用词语
- CFile写文本的换行问题
- Building a GNU/Linux ARM Toolchain (from scratch)
- 完美人生必知五句话
- lucene HitCollector 的作用
- lucene PriorityQueue 优先队列
- 小语种介绍:LISP/Scheme
- Java为什么支持反射机制?
- 在ubuntu 下使用飞信
- 冒泡排序法
- 后缀数组
- SVN服务器配置之 在Solaris9上安装SVN
- Tomcat下配置与使用CAS实现单点登录