08-自定义filter

来源:互联网 发布:手机视频加密软件 编辑:程序博客网 时间:2024/04/29 03:31

CustomFilter.java


package org.itat.lucene.util;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;public class CustomFilter {public void searchByCustomFilter() {try {IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtil.getDirectory()));Query q = new TermQuery(new Term("content", "java"));TopDocs tds = null;tds = searcher.search(q, new MyIDFilter(),100);SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "[" + d.get("path") + "]-->"+ d.get("size")+"----------->" +d.get("id"));}searcher.close();} catch (Exception e) {e.printStackTrace();}}}

FileIndexUtil.java

package org.itat.lucene.util;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.util.Random;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class FileIndexUtil {private static Directory directory = null;static {try {directory = FSDirectory.open(new File("d:/lucene/files"));} catch (Exception e) {e.printStackTrace();}}public static Directory getDirectory() {return directory;}/** *@MethodName:index *@Description:创建索引 *@param hasNew是否要新建索引 *@author:半仙儿 *@return void *@date:2015-4-15下午04:05:04 */public static void index(boolean hasNew) {IndexWriter writer = null;try {writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));if (hasNew) {writer.deleteAll();}File file = new File("d:/lucene/example");Document doc = null;//定义一个随机数Random ran=new Random();int index=0;for (File f : file.listFiles()) {//分数int score=ran.nextInt(600);doc = new Document();doc.add(new Field("id", String.valueOf(index++),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("content", new FileReader(f)));doc.add(new Field("filename", f.getName(), Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new Field("path", f.getAbsolutePath(), Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(f.lastModified()));doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) f.length()));doc.add(new NumericField("score",Field.Store.NO,true).setIntValue(score));writer.addDocument(doc);}} catch (Exception e) {e.printStackTrace();} finally {try {if (writer != null)writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}}
MyIDFilter.java

package org.itat.lucene.util;import java.io.IOException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.index.TermDocs;import org.apache.lucene.search.DocIdSet;import org.apache.lucene.search.Filter;import org.apache.lucene.util.OpenBitSet;import org.omg.CORBA.FREE_MEM;/** * @ProjectName:lucene_advance_search * @ClassName:MyIDFilter * @Description:自定义过滤器,网站做促销商品搜索的时候需要用到 * @date: 2015-4-20下午04:26:58 * @author: 半仙儿 * @version: V1.0 * @date:2015-4-20下午04:26:58 */public class MyIDFilter extends Filter {//要过滤的IDprivate String[] delIds = { "100", "3", "4", "5", "6", "22", "33" };@Overridepublic DocIdSet getDocIdSet(IndexReader reader) throws IOException {// 创建一个openBitSetOpenBitSet obs = new OpenBitSet(reader.maxDoc());// 先把元素填满obs.set(0, reader.maxDoc() - 1);int[] docs = new int[1];int[] freqs = new int[1];// 获取ID所在的DOC的位置,并将其设置为0for (String delId : delIds) {// 获取TermDocsTermDocs tds = reader.termDocs(new Term("id", delId));// 会将查询出来的对象的位置存储到docs中,出现的频率存储到freqs,返回查询出来的条数int count = tds.read(docs, freqs);if (count == 1) {//将这个位置的元素删除obs.clear(docs[0]);}}return obs;}}

SearchTest.java

package org.itat.lucene.util;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.Sort;import org.apache.lucene.search.TopDocs;import org.apache.lucene.util.Version;public class SearchTest {// 高效获取indexReaderprivate static IndexReader reader = null;static {try {reader = IndexReader.open(FileIndexUtil.getDirectory());} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public IndexSearcher getSeacher() {try {if (reader == null) {reader = IndexReader.open(FileIndexUtil.getDirectory());} else {IndexReader tr = IndexReader.openIfChanged(reader);if (tr != null) {reader.close();reader = tr;}}return new IndexSearcher(reader);} catch (Exception e) {e.printStackTrace();}return null;}public void searcherByFilter(String queryStr, Filter filter) {try {IndexSearcher searcher = getSeacher();QueryParser parser = new QueryParser(Version.LUCENE_35, "content",new StandardAnalyzer(Version.LUCENE_35));Query query = parser.parse(queryStr);TopDocs tds = null;if (filter != null) {tds = searcher.search(query, filter, 50);} else {tds = searcher.search(query, 50);}SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "[" + d.get("path") + "]-->"+ d.get("size") + "----"+ sdf.format(new Date(Long.valueOf(d.get("date"))))+ "]");}searcher.close();} catch (Exception e) {e.printStackTrace();}}public void searcherByQuery(Query queryStr) {try {IndexSearcher searcher = getSeacher();TopDocs tds = null;tds = searcher.search(queryStr, 50);SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "[" + d.get("path") + "]-->"+ d.get("size") + "----"+ sdf.format(new Date(Long.valueOf(d.get("date"))))+ "]");}searcher.close();} catch (Exception e) {e.printStackTrace();}}public void searcherBySort(String queryStr, Sort sort) {try {IndexSearcher searcher = getSeacher();QueryParser parser = new QueryParser(Version.LUCENE_35, "content",new StandardAnalyzer(Version.LUCENE_35));Query query = parser.parse(queryStr);TopDocs tds = null;if (sort != null) {tds = searcher.search(query, 50, sort);} else {tds = searcher.search(query, 50);}SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "[" + d.get("path") + "]-"+d.get("score")+"->"+ d.get("size") + "----"+ sdf.format(new Date(Long.valueOf(d.get("date"))))+ "]");}searcher.close();} catch (Exception e) {e.printStackTrace();}}}

TestCustomFilter.java

package org.itat.lucene.test;import org.itat.lucene.util.CustomFilter;import org.junit.Test;public class TestCustomFilter {/** *  *@MethodName:test01 *@Description:将MyIDFilter数组中的id值全部过滤掉了 *@author:半仙儿 *@return void *@date:2015-4-20下午05:51:31 */@Testpublic void test01() {CustomFilter cf = new CustomFilter();cf.searchByCustomFilter();}}

TestSearch.java

package org.itat.lucene.test;import org.apache.lucene.index.Term;import org.apache.lucene.search.Filter;import org.apache.lucene.search.NumericRangeFilter;import org.apache.lucene.search.Query;import org.apache.lucene.search.QueryWrapperFilter;import org.apache.lucene.search.Sort;import org.apache.lucene.search.SortField;import org.apache.lucene.search.TermRangeFilter;import org.apache.lucene.search.WildcardQuery;import org.itat.lucene.util.FileIndexUtil;import org.itat.lucene.util.SearchTest;import org.junit.Before;import org.junit.Test;public class TestSearch {private SearchTest st;@Beforepublic void init() {st = new SearchTest();}@Testpublic void index() {FileIndexUtil.index(true);}@Testpublic void test01() {// 不进行排序st.searcherBySort("java", Sort.INDEXORDER);// 以Doc的Id进行排序// st.searcherBySort("java", Sort.INDEXORDER);// 通过评分进行排序--设置了排序,就不能看到评分了。// st.searcherBySort("java", Sort.RELEVANCE);// 根据文件的大小进行排序// st.searcherBySort("java", new Sort(new SortField("size",// SortField.INT)));// 通过日期进行排序// st.searcherBySort("java", new Sort(new SortField("date",// SortField.LONG)));// 通过文件名进行排序// st.searcherBySort("java", new Sort(// new SortField("filename", SortField.STRING)));// 使用降序进行排序(通过设置SortField的最后的一个参数设置降序排序)// st.searcherBySort("java", new Sort(new SortField("filename",// SortField.STRING, true)));// 根据文件的大小和评分进行排序st.searcherBySort("java", new Sort(new SortField("size", SortField.INT), SortField.FIELD_SCORE));}@Testpublic void test02() {Filter tr = new TermRangeFilter("filename", "java.hhh", "java.ttt",true, true);tr = NumericRangeFilter.newIntRange("size", 500, 4900, true, true);//通过query进行过滤tr = new QueryWrapperFilter(new WildcardQuery(new Term("filename","*.ff")));st.searcherByFilter("java", tr);}@Testpublic void test03() {Query query = new WildcardQuery(new Term("filename", "c*"));st.searcherByQuery(query);}}
Test.java


package org.itat.lucene.test;import java.util.regex.Matcher;import java.util.regex.Pattern;public class Test {/** *@MethodName:main *@Description:正则表达式匹配测试 *@param args *@author:半仙儿 *@return void *@date:2015-4-20下午02:21:32 */public static void main(String[] args) {// Pattern pattern=Pattern.compile("\\d\\d\\d\\d-\\d\\d-\\d\\d");Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2}");Matcher matcher = pattern.matcher("2011-12-23");System.out.println(matcher.matches());}}

过滤id为100的之后的运行结果:


没有过滤Id为100的结果:(将Id=100的换成1或别的)




正则表达式匹配的是:2011-12-23格式的日期

如果格式相同,则返回true,不相同,则返回false.


0 0
原创粉丝点击