按照范围过滤所使用的类是Lucene的RangeFilter

来源:互联网 发布:js before 编辑:程序博客网 时间:2024/06/02 21:18
/*
 * 按照范围过滤所使用的类是Lucene的RangeFilter
 * 可以按照上下文所规定的范围进行对文本的过滤
 * */
package filter;


import java.io.IOException;
import java.util.BitSet;


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeFilter;
import org.apache.lucene.search.TermQuery;


public class RangeFilterTest {
private final int SECURITY_ADVANCED = 0;
private final int SECURITY_MIDDLE = 1;
private final int SECURITY_NORMAL = 2;

public RangeFilterTest(String INDEX_STORE_PATH) {
// TODO Auto-generated constructor stub
try{
IndexWriter writer = new IndexWriter(INDEX_STORE_PATH, new StandardAnalyzer(), true);
writer.setUseCompoundFile(false);

Document doc1 = new Document();
Field f1 = new Field("bookNumber", "0000003", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f2 = new Field("bookname", "论宇称非对称模型", Field.Store.YES, Field.Index.TOKENIZED);
Field f3 = new Field("publishdate", "1999-01-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
Field f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc1.add(f1);
doc1.add(f2);
doc1.add(f3);
doc1.add(f4);

Document doc2 = new Document();
f1 = new Field("bookNumber", "0000005", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁战士", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1995-07-15", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc2.add(f1);
doc2.add(f2);
doc2.add(f3);
doc2.add(f4);

Document doc3 = new Document();
f1 = new Field("bookNumber", "0000001", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "相对论", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1963-02-14", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc3.add(f1);
doc3.add(f2);
doc3.add(f3);
doc3.add(f4);


Document doc4 = new Document();
f1 = new Field("bookNumber", "0000006", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "黑猫警长", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1988-05-01", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc4.add(f1);
doc4.add(f2);
doc4.add(f3);
doc4.add(f4);

Document doc5 = new Document();
f1 = new Field("bookNumber", "0000004", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "原子弹的爆破过程", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1959-10-21", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_ADVANCED + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc5.add(f1);
doc5.add(f2);
doc5.add(f3);
doc5.add(f4);

Document doc6 = new Document();
f1 = new Field("bookNumber", "0000007", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "钢铁是怎样炼成的", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1970-01-11", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_MIDDLE + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc6.add(f1);
doc6.add(f2);
doc6.add(f3);
doc6.add(f4);


Document doc7 = new Document();
f1 = new Field("bookNumber", "0000002", Field.Store.YES, Field.Index.UN_TOKENIZED);
f2 = new Field("bookname", "白毛女", Field.Store.YES, Field.Index.TOKENIZED);
f3 = new Field("publishdate", "1977-09-07", Field.Store.YES, Field.Index.UN_TOKENIZED);
f4 = new Field("securitylevel", SECURITY_NORMAL + "", Field.Store.YES, Field.Index.UN_TOKENIZED);
doc7.add(f1);
doc7.add(f2);
doc7.add(f3);
doc7.add(f4);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.addDocument(doc5);
writer.addDocument(doc6);
writer.addDocument(doc7);
writer.close();

//创建3个Term对象
Term normal = new Term("securitylevel", SECURITY_NORMAL + "");
Term middle = new Term("securitylevel", SECURITY_MIDDLE + "");
Term advanced = new Term("securitylevel", SECURITY_ADVANCED + "");

//构建一个BooleanQuery,将前面的Term对象作为“或”关系传入
BooleanQuery bq = new BooleanQuery();
bq.add(new TermQuery(normal), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(middle), BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(advanced), BooleanClause.Occur.SHOULD);

IndexSearcher searcher = new IndexSearcher(INDEX_STORE_PATH);

//实例化一个RangeFilter,将出版日期在1970年前和1990年后的文档过滤掉
RangeFilter filter = new RangeFilter("publishdate", "1970-01-01", "1990-01-01", true, true);

//检索
Hits hits = searcher.search(bq, filter); 


for(int i = 0; i < hits.length(); i++){
Document doc = hits.doc(i);
System.out.print("书号:");
System.out.println(doc.get("bookNumber"));
System.out.print("书名:");
System.out.println(doc.get("bookname"));
System.out.print("发布日期:");
System.out.println(doc.get("publishdate"));
System.out.print("安全级别:");
System.out.print(doc.get("securitylevel"));
int level = Integer.parseInt(doc.get("securitylevel"));
switch(level){
case SECURITY_ADVANCED:
System.out.println("高级");
break;
case SECURITY_MIDDLE:
System.out.println("中级");
break;

case SECURITY_NORMAL:
System.out.println("一般");
break;
}
System.out.println("========================");
}
}catch(IOException e){
e.printStackTrace();
}
}



//级别过滤器
public class AdvancedSecurityFilter extends Filter{
//安全级别常量
public static final int SECURITY_ADVANCED = 0;

@Override
public BitSet bits(IndexReader reader) throws IOException {
// TODO Auto-generated method stub

//首先初始化一个BigSet对象
final BitSet bits = new BitSet(reader.maxDoc());

//先将整个集合置为true
//表示当前集合内的所有文档都可以被检索到
bits.set(0, bits.size() - 1);

//构造一个Term对象,代表最高安全级别
Term term = new Term("securitylevel", SECURITY_ADVANCED + "");

//从索引中取出具有最高安全级别的文档
TermDocs termDocs = reader.termDocs(term);

//方法一
//遍历每个文档
while(termDocs.next()){
bits.set(termDocs.doc(), false);
}
return bits;
}
}


public static void main(String[] args) {
// TODO Auto-generated method stub
RangeFilterTest rf = new RangeFilterTest("E:\\Lucene项目\\索引文件");
}


}
0 0