Lucene3.5自定义评分以及根据域进行自定义评分设定

来源:互联网 发布:mac视频播放器加速 编辑:程序博客网 时间:2024/06/05 00:53
一、首先来综述一下Lucene自定义评分的步骤:
1、创建一个评分域
FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);
2、根据评分域和原有的query创建自定义的query对象
MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);
@SuppressWarnings("serial")private class MyCustomScoreQuery extends CustomScoreQuery {public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {super(subQuery, valSrcQuery);}@Overrideprotected CustomScoreProvider getCustomScoreProvider(IndexReader reader)throws IOException {//默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的//为了根据不同的需求进行评分,需要自己进行评分的设定/** * 自定评分的步骤 * 创建一个类继承于CustomScoreProvider * 覆盖customScore方法 *///return super.getCustomScoreProvider(reader);return new MyCustomScoreProvider(reader);}}
3、创建一个类继承于CustomScoreProvider,覆盖customScore方法
private class MyCustomScoreProvider extends CustomScoreProvider {public MyCustomScoreProvider(IndexReader reader) {super(reader);}        /**         * subQueryScore表示默认文档的打分         * valSrcScore表示评分域的打分         */@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {//return super.customScore(doc, subQueryScore, valSrcScore);return subQueryScore/valSrcScore;}}

二、根据域进行自定义评分设定
1、根据文件后缀名进行自定义评分
private class FilenameScoreQuery extends CustomScoreQuery {public FilenameScoreQuery(Query subQuery) {super(subQuery);}@Overrideprotected CustomScoreProvider getCustomScoreProvider(IndexReader reader)throws IOException {//return super.getCustomScoreProvider(reader);return new FilenameScoreProvider(reader);}}private class FilenameScoreProvider extends CustomScoreProvider {String [] filenames = null;public FilenameScoreProvider(IndexReader reader) {super(reader);try {filenames = FieldCache.DEFAULT.getStrings(reader, "filename");} catch (IOException e) {e.printStackTrace();}}@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {//如何根据doc获取相应的field的值/** * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息 */String filename = filenames[doc];if(filename.endsWith(".txt")||filename.endsWith(".ini")) {return subQueryScore*1.5f;}//return super.customScore(doc, subQueryScore, valSrcScore);return subQueryScore/1.5f;}}
2、根据日期进行自定义评分
private class DateScoreProvider extends CustomScoreProvider {long[] dates = null;public DateScoreProvider(IndexReader reader) {super(reader);try {dates = FieldCache.DEFAULT.getLongs(reader, "date");} catch (IOException e) {e.printStackTrace();}}@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {long date = dates[doc];long today = new Date().getTime();long year = 1000*60*60*365;if(today - date <= year) {//为其加分}return super.customScore(doc, subQueryScore, valSrcScore);}}

Lucene实现自定义评分的关键思想:
indexSearch.search中要传入一个CustomScoreQuery,要覆盖getCustomScoreProvider方法,并且要返回CustomScoreProvider 对象,在用匿名内部内的方式写一个CustomScoreProvider 覆盖customScore方法,这个方法有3个参数,第一个参数代表文档id,第二个参数代表原来评分,最后一个代表我们设置的评分域,然后我们就可以定义自己的一套评分算法为我们的搜索制定评分了。

完整代码如下:
1、工具类:
package com.dhb.util;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.util.Random;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.NumericField;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.LockObtainFailedException;import org.apache.lucene.util.Version;public class FileIndexUtils {private static Directory directory = null;static {try {directory = FSDirectory.open(new File("D:/luceneData/files/"));} catch (IOException e) {e.printStackTrace();}}public static Directory getDirectory() {return directory;}public static void index(boolean hasNew) {IndexWriter writer = null;try {IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));writer = new IndexWriter(directory, iwc);//是否新建索引if(hasNew) {writer.deleteAll();}Document doc = null;File f = new File("D:/luceneData/example");Random rand = new Random();int index = 0;for (File file : f.listFiles()) {int score = rand.nextInt(600);  //测试自定义评分用的doc = new Document();//测试自定义Filter用的doc.add(new Field("id", String.valueOf(index++), Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));doc.add(new Field("content", new FileReader(file)));doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length())));doc.add(new NumericField("score", Field.Store.YES, true).setIntValue(score));writer.addDocument(doc);}} catch (CorruptIndexException e) {e.printStackTrace();} catch (LockObtainFailedException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if(writer!=null)try {writer.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}}}
注意:(先自己生成索引,我这里就没调用了,因为放在另一个地方了,没有贴了)
2、自定义类
package com.dhb.util;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.lucene.document.Document;import org.apache.lucene.index.CorruptIndexException;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.Term;import org.apache.lucene.search.FieldCache;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TermQuery;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.function.CustomScoreProvider;import org.apache.lucene.search.function.CustomScoreQuery;import org.apache.lucene.search.function.FieldScoreQuery;import org.apache.lucene.search.function.FieldScoreQuery.Type;import org.apache.lucene.search.function.ValueSourceQuery;public class MyScoreQuery {public void searchByScoreQuery() {try {IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory()));    Query q = new TermQuery(new Term("content", "java"));    //1、创建一个评分域    FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);    //2、根据评分域和原有的query创建自定义的query对象    MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);        TopDocs tds = null;    tds = searcher.search(query, 100);        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "【" + d.get("path") + "】---"+ d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score"));}        searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public void searchByFileScoreQuery() {try {IndexSearcher searcher = new IndexSearcher(IndexReader.open(FileIndexUtils.getDirectory()));    Query q = new TermQuery(new Term("content", "java"));    //1、创建一个评分域    //FieldScoreQuery fd = new FieldScoreQuery("score", Type.INT);    FilenameScoreQuery query = new FilenameScoreQuery(q);        //2、根据评分域和原有的query创建自定义的query对象    //MyCustomScoreQuery query = new MyCustomScoreQuery(q, fd);        TopDocs tds = null;    tds = searcher.search(query, 100);        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");for (ScoreDoc sd : tds.scoreDocs) {Document d = searcher.doc(sd.doc);System.out.println(sd.doc + ":(" + sd.score + ")["+ d.get("filename") + "【" + d.get("path") + "】---"+ d.get("size") + "----" + sdf.format(Long.valueOf(d.get("date")))+"自定义评分:"+d.get("score"));}        searcher.close();} catch (CorruptIndexException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}@SuppressWarnings("serial")private class MyCustomScoreQuery extends CustomScoreQuery {public MyCustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {super(subQuery, valSrcQuery);}@Overrideprotected CustomScoreProvider getCustomScoreProvider(IndexReader reader)throws IOException {//默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终评分的//为了根据不同的需求进行评分,需要自己进行评分的设定/** * 自定评分的步骤 * 创建一个类继承于CustomScoreProvider * 覆盖customScore方法 *///return super.getCustomScoreProvider(reader);return new MyCustomScoreProvider(reader);}}private class MyCustomScoreProvider extends CustomScoreProvider {public MyCustomScoreProvider(IndexReader reader) {super(reader);}        /**         * subQueryScore表示默认文档的打分         * valSrcScore表示评分域的打分         */@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {//return super.customScore(doc, subQueryScore, valSrcScore);return subQueryScore/valSrcScore;}}@SuppressWarnings("serial")private class FilenameScoreQuery extends CustomScoreQuery {public FilenameScoreQuery(Query subQuery) {super(subQuery);}@Overrideprotected CustomScoreProvider getCustomScoreProvider(IndexReader reader)throws IOException {//return super.getCustomScoreProvider(reader);return new FilenameScoreProvider(reader);}}private class FilenameScoreProvider extends CustomScoreProvider {String [] filenames = null;public FilenameScoreProvider(IndexReader reader) {super(reader);try {filenames = FieldCache.DEFAULT.getStrings(reader, "filename");} catch (IOException e) {e.printStackTrace();}}@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {//如何根据doc获取相应的field的值/** * 在reader没有关闭之前,所有的数据会存储在一个缓存域中,可以通过缓存获取很多有用的信息 * filenames = FieldCache.DEFAULT.getStrings(reader, "filename");可以获取所有的filename域的信息 */String filename = filenames[doc];if(filename.endsWith(".txt")||filename.endsWith(".ini")) {return subQueryScore*1.5f;}//return super.customScore(doc, subQueryScore, valSrcScore);return subQueryScore/1.5f;}}@SuppressWarnings("unused")private class DateScoreProvider extends CustomScoreProvider {long[] dates = null;public DateScoreProvider(IndexReader reader) {super(reader);try {dates = FieldCache.DEFAULT.getLongs(reader, "date");} catch (IOException e) {e.printStackTrace();}}@Overridepublic float customScore(int doc, float subQueryScore, float valSrcScore)throws IOException {long date = dates[doc];long today = new Date().getTime();long year = 1000*60*60*365;if(today - date <= year) {//为其加分}return super.customScore(doc, subQueryScore, valSrcScore);}}}
3、测试类
package com.dhb.test;import org.junit.Test;import com.dhb.util.MyScoreQuery;public class TestCustomScore {@Testpublic void test01() {MyScoreQuery msq = new MyScoreQuery();msq.searchByScoreQuery();}@Testpublic void test02() {MyScoreQuery msq = new MyScoreQuery();msq.searchByFileScoreQuery();}}








0 0
原创粉丝点击