按照范围过滤所使用的类是Lucene的RangeFilter
来源:互联网 发布:js before 编辑:程序博客网 时间:2024/06/02 21:18
/*
* 按照范围过滤所使用的类是Lucene的RangeFilter
* 可以按照上下文所规定的范围进行对文本的过滤
* */
package filter;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.TermQuery;
public class RangeFilterTest {
private final int SECURITY_ADVANCED = 0;
private final int SECURITY_MIDDLE = 1;
private final int SECURITY_NORMAL = 2;
public RangeFilterTest(String INDEX_STORE_PATH) {
// TODO Auto-generated constructor stub
try{
IndexWriter writer = new IndexWriter(INDEX_STORE_PATH, new StandardAnalyzer(), true);
writer.setUseCompoundFile(false);
Document doc1 = new Document();
Field f1 = new Field("bookNumber", "0000003", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f2 = new Field("bookname", "论宇称非对称模型", Field.Store.YES, Field.Index.TOKENIZED);
Field f3 = new Field("publishdate", "1999-01-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc1.add(f1);
doc1.add(f2);
doc1.add(f3);
doc1.add(f4);
Document doc2 = new Document();
f1 = new Field("bookNumber", "0000005", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁战士", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1995-07-15", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc2.add(f1);
doc2.add(f2);
doc2.add(f3);
doc2.add(f4);
Document doc3 = new Document();
f1 = new Field("bookNumber", "0000001", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "相对论", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1963-02-14", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc3.add(f1);
doc3.add(f2);
doc3.add(f3);
doc3.add(f4);
Document doc4 = new Document();
f1 = new Field("bookNumber", "0000006", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "黑猫警长", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1988-05-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc4.add(f1);
doc4.add(f2);
doc4.add(f3);
doc4.add(f4);
Document doc5 = new Document();
f1 = new Field("bookNumber", "0000004", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "原子弹的爆破过程", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1959-10-21", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc5.add(f1);
doc5.add(f2);
doc5.add(f3);
doc5.add(f4);
Document doc6 = new Document();
f1 = new Field("bookNumber", "0000007", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁是怎样炼成的", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1970-01-11", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc6.add(f1);
doc6.add(f2);
doc6.add(f3);
doc6.add(f4);
Document doc7 = new Document();
f1 = new Field("bookNumber", "0000002", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "白毛女", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1977-09-07", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_NORMAL + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc7.add(f1);
doc7.add(f2);
doc7.add(f3);
doc7.add(f4);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.addDocument(doc5);
writer.addDocument(doc6);
writer.addDocument(doc7);
writer.close();
//创建3个Term对象
Term normal = new Term("securitylevel", SECURITY_NORMAL + "");
Term middle = new Term("securitylevel", SECURITY_MIDDLE + "");
Term advanced = new Term("securitylevel", SECURITY_ADVANCED + "");
//构建一个BooleanQuery,将前面的Term对象作为“或”关系传入
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(normal), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(middle), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(advanced), BooleanClause.Occur.SHOULD);
IndexSearcher searcher = new IndexSearcher(INDEX_STORE_PATH);
//实例化一个RangeFilter,将出版日期在1970年前和1990年后的文档过滤掉
RangeFilter filter = new RangeFilter("publishdate", "1970-01-01", "1990-01-01", true, true);
//检索
Hits hits = searcher.search(bq, filter);
for(int i = 0; i < hits.length(); i++){
Document doc = hits.doc(i);
System.out.print("书号:");
System.out.println(doc.get("bookNumber"));
System.out.print("书名:");
System.out.println(doc.get("bookname"));
System.out.print("发布日期:");
System.out.println(doc.get("publishdate"));
System.out.print("安全级别:");
System.out.print(doc.get("securitylevel"));
int level = Integer.parseInt(doc.get("securitylevel"));
switch(level){
case SECURITY_ADVANCED:
System.out.println("高级");
break;
case SECURITY_MIDDLE:
System.out.println("中级");
break;
case SECURITY_NORMAL:
System.out.println("一般");
break;
}
System.out.println("========================");
}
}catch(IOException e){
e.printStackTrace();
}
}
//级别过滤器
public class AdvancedSecurityFilter extends Filter{
//安全级别常量
public static final int SECURITY_ADVANCED = 0;
@Override
public BitSet bits(IndexReader reader) throws IOException {
// TODO Auto-generated method stub
//首先初始化一个BigSet对象
final BitSet bits = new BitSet(reader.maxDoc());
//先将整个集合置为true
//表示当前集合内的所有文档都可以被检索到
bits.set(0, bits.size() - 1);
//构造一个Term对象,代表最高安全级别
Term term = new Term("securitylevel", SECURITY_ADVANCED + "");
//从索引中取出具有最高安全级别的文档
TermDocs termDocs = reader.termDocs(term);
//方法一
//遍历每个文档
while(termDocs.next()){
bits.set(termDocs.doc(), false);
}
return bits;
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
RangeFilterTest rf = new RangeFilterTest("E:\\Lucene项目\\索引文件");
}
}
* 按照范围过滤所使用的类是Lucene的RangeFilter
* 可以按照上下文所规定的范围进行对文本的过滤
* */
package filter;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.TermQuery;
public class RangeFilterTest {
private final int SECURITY_ADVANCED = 0;
private final int SECURITY_MIDDLE = 1;
private final int SECURITY_NORMAL = 2;
public RangeFilterTest(String INDEX_STORE_PATH) {
// TODO Auto-generated constructor stub
try{
IndexWriter writer = new IndexWriter(INDEX_STORE_PATH, new StandardAnalyzer(), true);
writer.setUseCompoundFile(false);
Document doc1 = new Document();
Field f1 = new Field("bookNumber", "0000003", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f2 = new Field("bookname", "论宇称非对称模型", Field.Store.YES, Field.Index.TOKENIZED);
Field f3 = new Field("publishdate", "1999-01-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc1.add(f1);
doc1.add(f2);
doc1.add(f3);
doc1.add(f4);
Document doc2 = new Document();
f1 = new Field("bookNumber", "0000005", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁战士", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1995-07-15", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc2.add(f1);
doc2.add(f2);
doc2.add(f3);
doc2.add(f4);
Document doc3 = new Document();
f1 = new Field("bookNumber", "0000001", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "相对论", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1963-02-14", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc3.add(f1);
doc3.add(f2);
doc3.add(f3);
doc3.add(f4);
Document doc4 = new Document();
f1 = new Field("bookNumber", "0000006", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "黑猫警长", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1988-05-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc4.add(f1);
doc4.add(f2);
doc4.add(f3);
doc4.add(f4);
Document doc5 = new Document();
f1 = new Field("bookNumber", "0000004", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "原子弹的爆破过程", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1959-10-21", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc5.add(f1);
doc5.add(f2);
doc5.add(f3);
doc5.add(f4);
Document doc6 = new Document();
f1 = new Field("bookNumber", "0000007", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁是怎样炼成的", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1970-01-11", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc6.add(f1);
doc6.add(f2);
doc6.add(f3);
doc6.add(f4);
Document doc7 = new Document();
f1 = new Field("bookNumber", "0000002", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "白毛女", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1977-09-07", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_NORMAL + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc7.add(f1);
doc7.add(f2);
doc7.add(f3);
doc7.add(f4);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.addDocument(doc5);
writer.addDocument(doc6);
writer.addDocument(doc7);
writer.close();
//创建3个Term对象
Term normal = new Term("securitylevel", SECURITY_NORMAL + "");
Term middle = new Term("securitylevel", SECURITY_MIDDLE + "");
Term advanced = new Term("securitylevel", SECURITY_ADVANCED + "");
//构建一个BooleanQuery,将前面的Term对象作为“或”关系传入
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(normal), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(middle), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(advanced), BooleanClause.Occur.SHOULD);
IndexSearcher searcher = new IndexSearcher(INDEX_STORE_PATH);
//实例化一个RangeFilter,将出版日期在1970年前和1990年后的文档过滤掉
RangeFilter filter = new RangeFilter("publishdate", "1970-01-01", "1990-01-01", true, true);
//检索
Hits hits = searcher.search(bq, filter);
for(int i = 0; i < hits.length(); i++){
Document doc = hits.doc(i);
System.out.print("书号:");
System.out.println(doc.get("bookNumber"));
System.out.print("书名:");
System.out.println(doc.get("bookname"));
System.out.print("发布日期:");
System.out.println(doc.get("publishdate"));
System.out.print("安全级别:");
System.out.print(doc.get("securitylevel"));
int level = Integer.parseInt(doc.get("securitylevel"));
switch(level){
case SECURITY_ADVANCED:
System.out.println("高级");
break;
case SECURITY_MIDDLE:
System.out.println("中级");
break;
case SECURITY_NORMAL:
System.out.println("一般");
break;
}
System.out.println("========================");
}
}catch(IOException e){
e.printStackTrace();
}
}
//级别过滤器
public class AdvancedSecurityFilter extends Filter{
//安全级别常量
public static final int SECURITY_ADVANCED = 0;
@Override
public BitSet bits(IndexReader reader) throws IOException {
// TODO Auto-generated method stub
//首先初始化一个BigSet对象
final BitSet bits = new BitSet(reader.maxDoc());
//先将整个集合置为true
//表示当前集合内的所有文档都可以被检索到
bits.set(0, bits.size() - 1);
//构造一个Term对象,代表最高安全级别
Term term = new Term("securitylevel", SECURITY_ADVANCED + "");
//从索引中取出具有最高安全级别的文档
TermDocs termDocs = reader.termDocs(term);
//方法一
//遍历每个文档
while(termDocs.next()){
bits.set(termDocs.doc(), false);
}
return bits;
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
RangeFilterTest rf = new RangeFilterTest("E:\\Lucene项目\\索引文件");
}
}
0 0
- 按照范围过滤所使用的类是Lucene的RangeFilter
- MySQL所使用的数据类型和范围
- lucene的学习----范围查询
- lucene的过滤和分页
- 使用java8的lambada表达式按照一定的条件对查出的数据进行过滤
- VC中各数据类型所表示的范围和占用的字节数是多大?
- 中国各省所处的经纬度范围
- jquery下载地址:https://code.jquery.com/jquery/ 影响范围: 版本低于1.7的jQuery过滤用户输入数据所使用的正则表达式存在缺陷,可能导致LOCA
- Lucene 的索引排序是使用了倒排序原理
- java实现的分页方法(按照页码范围分页)
- 一个使用cv::Mat按单通道读取数据然后按照灰度范围设置灰度的例子
- memset是按照字节填充的
- Lucene使用Filter搜索过滤
- [lucene那点事儿]继承RangeFilter编写数字型过滤器
- Lucene.Net2.9.2版本的的范围匹配与排序
- javaBean的使用范围
- 帽子的使用范围
- static的使用范围
- myAche
- jdbc技术(一)
- 再谈高级面向对象编程之抽象类和方法
- 冒泡排序算法
- 初识Hadoop
- 按照范围过滤所使用的类是Lucene的RangeFilter
- mp4 文件格式系列
- Android通过NDK获取Keystore签名值 .
- [转载]Ogre中的内存泄露
- Hibernate,JPA 对象关系映射之关联关系映射策略
- gvim下的常识性知识
- Kinect V2 开发专题(3)设备信息获取与音频功能探索
- JavaScript与.Net MVC的一次纠结之旅
- Android 自定义Adapter的ListView