多属性TopK——NRA算法(JAVA)实现

来源:互联网 发布:python 淘宝联盟 爬虫 编辑:程序博客网 时间:2024/05/21 09:37

这两天看了一下多属性的TopK,有关NRA,google之。

试着用Java实现了一下,可能有错误,欢迎指出错误。

写的英文注解也不好,凑合着。

算法中使用几种操作:And Or 和加权。

(1)Result的数据结构,这里的Result其实是上次搜索的输出(项目的一个数据结构)

public class Result implements Cloneable{private String id = null;private float score = 0;public Result(String id){this(id, 0);}public Result(String id, float score){this.id = id;this.score = score;}public String getId(){return this.id;}public void setId(String id){this.id = id;}public float getScore(){return this.score;}public void setScore(float score){this.score = score;}@Overridepublic boolean equals(Object obj) {if (obj instanceof Result) {Result name = (Result) obj;return (id.equals(name.id)&&Math.abs(score - name.score) < 0.001);}return super.equals(obj);}@Overrideprotected Object clone() throws CloneNotSupportedException {return super.clone();}@Overridepublic String toString(){return this.id+"="+this.score;}}

 

(2)多个Result组成的List

import java.util.ArrayList;import java.util.List;public class ResultList implements Cloneable{private List<Result> resultList;private float weight;public ResultList(){this.resultList = new ArrayList<Result>();this.weight = 1;}public ResultList(int num){this(num,1);}public ResultList(int num, float weight){ this.resultList = new ArrayList<Result>(num); this.weight = weight;}public List<Result> getResultList(){return this.resultList;}public Result getResult(int i){return this.resultList.get(i);}public float getWeight(){return this.weight;}public void setWeight(float weight){this.weight = weight;}public void setResult(Result result, int i){Result temp = this.resultList.get(i);temp.setId(result.getId());temp.setScore(result.getScore());}public void addResult(Result result){this.resultList.add(result);}public int getSize(){return this.resultList.size();}@Overridepublic boolean equals(Object obj) {if (obj instanceof ResultList) {ResultList name = (ResultList) obj;boolean flags = true;for(int i=0;i<resultList.size();i++){flags = flags&&resultList.get(i).equals(name.resultList.get(i));}return (flags&&Math.abs(resultList.size()-name.resultList.size())<0.001);}return super.equals(obj);}@Overrideprotected Object clone() throws CloneNotSupportedException {ResultList resultListCloneList = null;if(resultList != null){resultListCloneList = new ResultList();for(Result result:resultList){resultListCloneList.resultList.add((Result) result.clone());}}return resultListCloneList;}@Overridepublic String toString(){return this.resultList.toString()+"\n"+this.weight;}}

 

(3)Operation数据结构


public enum Operation {AND,OR,WEIGHT}


(4)算法的真正实现

import java.util.ArrayList;import java.util.BitSet;import java.util.HashMap;import java.util.Iterator;import java.util.Random;import java.util.TreeSet;import java.util.Map.Entry;import org.junit.Test;import process.search.engine.Operation;import process.search.engine.Result;import process.search.engine.ResultList;public class TestNRA {class Node implements Comparable<TestNRA.Node>{private String id;private float worst;private float best;private BitSet visited;public Node(String id){this(id, 0, 0);}public Node(String id, float worst, float best){this.id = id;this.worst = worst;this.best = best;this.visited = new BitSet();}public Node(Result r){this(r.getId(),r.getScore(),r.getScore());}public void setVisit(int i){this.visited.set(i, true);}public boolean getVisit(int i){return this.visited.get(i);}public BitSet getVisits(){return this.visited;}public String getId(){return this.id;}public void setId(String id ){this.id = id;}public float getWorst(){return this.worst;}public void setWorst(float minValue){this.worst = minValue;}public float getBest(){return this.best;}public void setBest(float best){this.best = best;}@Overridepublic int compareTo(Node node) {if(getWorst() > node.getWorst())return 1;else if( getWorst() == node.getWorst()){return this.getId().compareTo(node.getId());}elsereturn -1;}@Overridepublic boolean equals(Object o){if(o instanceof Node)return this.id.equals(((Node)o).id);if(o instanceof Result)return this.id.equals(((Result)o).getId());return false;}@Overridepublic String toString(){return this.id +"=[" + this.worst + "," + this.best + "] : " + this.visited;}}public Node findWorstNode(ArrayList<Node> list, boolean direction){Node node = null;if(list.size()>0)node = list.get(0);for(int i = 1 ; i < list.size(); i++){if(direction){if(1==node.compareTo(list.get(i)))node = list.get(i);}else{if(-1==node.compareTo(list.get(i)))node = list.get(i);}}return node;}public Node findBestNode(ArrayList<Node> list){Node node = null;if(list.size()>0)node = list.get(0);for(int i = 1; i < list.size(); i++){if(node.getBest()<list.get(i).getBest())node = list.get(i);}return node;}private float findMax(ArrayList<Float> list, BitSet visited){float ff = 0;for(int i = 0; i < list.size() ; i++){if ( ff < list.get(i) && !visited.get(i)){ff = list.get(i);}}return ff;}private float findMin(ArrayList<Float> list, BitSet visited){float ff = Float.MAX_VALUE;for(int i = 0 ; i < list.size(); i++){if( ff > list.get(i) && !visited.get(i)){ff = list.get(i);}}return ff;}public ResultList execute(ResultList[] resultSet, Operation op, int topk){// the column number of resultint colNum = resultSet.length;// result list of top KResultList result = new ResultList(topk);// hash map source record all element and there attributes' range [worst, best]HashMap<String,Node> source = new HashMap<String,Node>();// tree set top records the element of highest [worst] value //TreeSet<Node> top = new TreeSet<Node>();ArrayList<Node> top = new ArrayList<Node>();// tree set candidate records the source of element apart from the top set //TreeSet<Node> candidate = new TreeSet<Node>();ArrayList<Node> candidate = new ArrayList<Node>();// if the operation is weight, limits records the the attribute value of the elements in the last time.ArrayList<Float> limits = new ArrayList<Float>(colNum);// local temple variableResult r = null;Node node = null, tnode = null;Node topWorstNode=null,canWorstNode=null,canBestNode = null;float best = 0, max = 0, min = 0;for(int i = 0; i < colNum; i++){best += (resultSet[i].getResult(0).getScore()*resultSet[i].getWeight());limits.add(resultSet[i].getResult(0).getScore());}boolean stop = true;// start the algorithm, j stands for the line of the result setfor(int j = 0; ; j++){// i stands for the column of the result setfor(int i = 0; i < colNum; i++){// the element equals to resultSet[j][i];if(j>=resultSet[i].getSize()){stop = true;continue;}stop = false;r = resultSet[i].getResult(j);if(op==Operation.WEIGHT)best = best - (limits.get(i)-r.getScore())*resultSet[i].getWeight();elselimits.set(i,r.getScore());// if the element is in the source mapif(source.containsKey(r.getId())){// get the element from the sourcenode = source.get(r.getId());// set the attribute of the element has been visitednode.setVisit(i);// base different operationif(op == Operation.OR){// or operation need to assure the largest valueif(node.getBest()<r.getScore()){node.setBest(r.getScore());node.setWorst(r.getScore());}else if(node.getWorst()<r.getScore())node.setWorst(r.getScore());}else if(op == Operation.AND){// and operation need to assure the smallest valueif(node.getBest()>r.getScore()){node.setWorst(r.getScore());node.setBest(r.getScore());}}else {// weight operation need to assure all the attribute value with weightnode.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());}}else{// if the element is not in the source map, add a new node into sourcenode = new Node(r);// if the operation is weight, set the best value of the element to bestif(op==Operation.WEIGHT){node.setBest(best);}else if(op==Operation.OR){max = findMax(limits,node.getVisits());if(max>r.getScore())node.setBest(max);}else{min = findMin(limits,node.getVisits());node.setWorst(0);node.setBest(min);}node.setVisit(i);source.put(r.getId(), node);// if top size less than topkif(top.size()<topk){top.add(node);}// else add the node into candidateselse{candidate.add(node);}}// reset the [worst, best] range of all the element in the source map according// to each operation.if(op==Operation.OR){for(Entry<String,Node> element : source.entrySet()){tnode = element.getValue();if(!tnode.getVisit(i)){if(tnode.equals(node)){if(tnode.getBest()<node.getWorst()){tnode.setBest(node.getWorst());}if(tnode.getWorst()<node.getWorst()){tnode.setWorst(node.getWorst());}}else{if(tnode.getBest()>node.getBest()&&tnode.getWorst()<node.getBest())tnode.setBest(node.getBest());}}}} else if( op == Operation.AND){for(Entry<String,Node> element : source.entrySet()){tnode = element.getValue();if(!tnode.getVisit(i)){if(tnode.getBest()>node.getBest()){tnode.setBest(node.getBest());tnode.setWorst(0);}}}} else {for(Entry<String,Node> element : source.entrySet()){node = element.getValue();if(!node.getVisit(i)){if(node.equals(r)){node.setWorst(node.getWorst()+r.getScore()*resultSet[i].getWeight());}else{node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());}}else{if(node.equals(r)&&node.getVisits().length()!=1)node.setBest(node.getBest()-(limits.get(i)-r.getScore())*resultSet[i].getWeight());}}}// if candidate size > 0, compare the node of candidate with the largest worst value // to the node of top with smallest value. if it is larger, swap two node. if(candidate.size()>0){topWorstNode = findWorstNode(top,true);canWorstNode = findWorstNode(candidate,false);canBestNode = findBestNode(candidate);if(topWorstNode.getWorst()<canWorstNode.getWorst()){top.remove(topWorstNode);top.add(canWorstNode);candidate.remove(canWorstNode);candidate.add(topWorstNode);}}}if(stop)break;// record the values of element last visited.if(op == Operation.WEIGHT){limits.clear();for(int i = 0; i < colNum; i++)limits.add(resultSet[i].getResult(j).getScore());}// if the largest best value of candidate smaller than the smallest worst value if(candidate.size() != 0){// find the largest best value of the element in the candidateif(canBestNode.getBest()<topWorstNode.getWorst()){System.out.println("top : "+top);System.out.println("candidate: " +candidate);System.out.println("===================================================");System.out.println("execute at the line : " + j);break;//}}}}Iterator<Node> it = top.iterator();while(it.hasNext()){Node tn = it.next();Result rr = new Result(tn.getId(),tn.getWorst());result.addResult(rr);}return result;}@Testpublic void test(){ResultList[] resultSet = new ResultList[3];String[][] idList = {{"a","b","c","d","e","f","g","h","i","j"},{"e","i","b","a","f","d","g","h","j","c"},{"g","j","b","f","i","h","c","d","a","e"}};float[][] score = {{100,98,82,73,45,32,21,14,11,6},{80,75,62,55,53,45,39,37,26,19},{93,70,61,57,44,36,26,19,17,3}};for(int i = 0 ; i < 3 ; i++){resultSet[i] = new ResultList();for(int j = 0 ; j < 10; j++){Result r = new Result(idList[i][j],score[i][j]);resultSet[i].addResult(r);}resultSet[i].setWeight(1);}ResultList rl = execute(resultSet,Operation.WEIGHT,3);System.out.println(rl);}}


 

原创粉丝点击