match vertex

来源:互联网 发布:学哪种编程语言好? 编辑:程序博客网 时间:2024/06/05 03:43

数据的输入:

1 A 2 3 4 5 14 19
2 B 8 9
3 C 8
4 B 5 7
5 E 6 7
6 F
7 G
8 E
9 D 8 10
10 E 11 12
11 F
12 G
13 D 5
14 H 15 16 17 18
15 I
16 J
17 K
18 M
19 C 20
20 H 21 22
21 I
22 J

要查询的数据:

1 A 2|1 7|1
2 B 3|1 4|1
3 D 4|1
4 E 5|1 6|1
5 F
6 G
7 C 8|1
8 H 9|1 10|1 11|1 12|1
9 I
10 J
11 K
12 M


package test;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.URI;import java.util.ArrayDeque;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.Queue;import org.apache.commons.cli.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.*;import org.apache.hama.HamaConfiguration;import org.apache.hama.bsp.HashPartitioner;import org.apache.hama.bsp.TextInputFormat;import org.apache.hama.bsp.TextOutputFormat;import org.apache.hama.graph.Edge;import org.apache.hama.graph.GraphJob;import org.apache.hama.graph.Vertex;import org.apache.hama.graph.VertexInputReader;import com.google.common.base.Objects;public class ProbMatch {public static class ProbMatchVertex extends Vertex<Text, NullWritable, TriTextPair> {private boolean match;private Graph queryGraph = null;private double alpha = 0.8;private List<TriTextPair> parents= null;private List<TriTextPair> children = null;private HashMap<Text, HashMap<Text, MinDist>> queryMap = null;@Overridepublic void setup(HamaConfiguration conf) {String queryStr = conf.get("hama.probmatch.querygraph");queryGraph = str2graph(queryStr);queryMap = new HashMap<Text, HashMap<Text, MinDist>>();for(int i = 0;i<queryGraph.getNodes().size();i++){queryMap.put(new Text(queryGraph.getNodes().get(i).getLabel()), getMinDist(queryGraph, i));}}@Overridepublic void compute(Iterable<TriTextPair> messages) throws IOException {if (getSuperstepCount() == 0) {System.out.println("step0:" + getSuperstepCount());match = false;for (int i = 0; i < queryGraph.getNodes().size(); i++) {if (Objects.equal(this.getVertexLabel(), new Text(queryGraph.getNodes().get(i).getLabel()))) {match = true;}}if (!match) {setValue(new TriTextPair(getVertexLabel(), new Text("False" + "|"+ getSuperstepCount()), new Text("")));voteToHalt();return;}sendMessageToNeighbors(new TriTextPair(getVertexID(),getVertexLabel(), new Text("")));} else if(getSuperstepCount() == 1){System.out.println("step1:" + getSuperstepCount());System.out.println("===="+getValue());parents = new ArrayList<TriTextPair>();for(TriTextPair msg : messages){parents.add(msg);sendMessage(msg.getFirst(), new TriTextPair(getVertexID(), getVertexLabel(), new Text("")));}} else if(getSuperstepCount() == 2){System.out.println("step2:" + getSuperstepCount());children = new ArrayList<TriTextPair>();for(TriTextPair msg : messages){msg.setThird(new Text("1"));children.add(msg);}//double matchPer = calculateValue(this.getVertexLabel());System.out.println("===="+getValue());//for(TriTextPair parent: parents){//sendMessage(parent.getFirst(), new TriTextPair(getVertexID(), getVertexLabel(), new Text(String.valueOf(matchPer))));//}} else if(getSuperstepCount() >2){System.out.println("step3:" + getSuperstepCount());for(TriTextPair msg : messages){children.get(children.indexOf(msg)).setThird(msg.getThird());;}double matchPer = calculateValue(this.getVertexLabel());for(TriTextPair parent: parents){sendMessage(parent.getFirst(), new TriTextPair(getVertexID(), getVertexLabel(), new Text(String.valueOf(matchPer))));}voteToHalt();}}private double calculateValue(Text label){for(TriTextPair tri: children){}return 0;}public HashMap<Text, MinDist> getMinDist(Graph graph, int index){int MAX = Integer.MAX_VALUE;List<GraphNode>  nodes = graph.getNodes();int[] dist = new int[nodes.size()];boolean[] visit = new boolean[nodes.size()];HashMap<Text, MinDist> map = new HashMap<Text, MinDist>();for (int i = 0;i<nodes.size();i++){dist[i] = MAX;}dist[index] = 0;Arc  arc = nodes.get(index).getFirstArc();Node cur = new Node();cur.id = index;cur.cost = 0;Queue<Node> queue = new ArrayDeque<Node>();queue.add(cur);while(!queue.isEmpty()){Node u = queue.remove();if(visit[u.id] == true){continue;}visit[u.id] = true;arc = nodes.get(u.id).getFirstArc();while(arc != null){int v = arc.getNodeid() - 1;if(!visit[v] && dist[v]>u.cost+1){Node node = new Node();node.id = v;dist[v] = u.cost+1;node.cost = dist[v];queue.add(node);MinDist minDist = new MinDist(v+1, dist[v], u.id + 1);map.put(new Text(nodes.get(v).getLabel()), minDist);}arc = arc.getNextArc();}}return map;}private static class Node{public int id;public int cost;}public static class MinDist{public int id;private int dist;private int parent;public MinDist(int id, int dist, int parent){this.id = id;this.dist = dist;this.parent = parent;}public int getId() {return id;}public void setId(int id) {this.id = id;}public int getDist() {return dist;}public void setDist(int dist) {this.dist = dist;}public int getParent() {return parent;}public void setParent(int parent) {this.parent = parent;}}/** * 得到顶点的标签 */private Text getVertexLabel() {return getValue().getFirst();}public String graph2str(Graph graph){String str = "";for(int i=0;i<graph.getNodes().size();i++){str = str + (i+1) + " " + graph.getNodes().get(i).getLabel() + " ";Arc arc = graph.getNodes().get(i).getFirstArc();while(arc !=null){str = str + arc.getNodeid() + "|" + arc.getWeight() + " ";arc = arc.getNextArc();}str += "\n";}return str;}public Graph str2graph(String queryStr){String[] queryArr = queryStr.split("-");Graph g = new Graph();for (int i = 0; i < queryArr.length; i++) {if (queryArr[i].length() < 3) {break;}String[] arcs = queryArr[i].split(" ");GraphNode node = new GraphNode();node.setLabel(arcs[1]);if(arcs.length >2){node.setFirstArc(getArcByStr(arcs[2]));}else{node.setFirstArc(null);}for(int j=3; j < arcs.length;j++){Arc arc = getArcByStr(arcs[j]);node.getLastArc().setNextArc(arc);}g.nodes.add(node);}return g;}public static Arc getArcByStr(String arcStr){String[] arcArr = arcStr.split("\\|");Arc arc = new Arc();arc.setNodeid(Integer.parseInt(arcArr[0]));arc.setWeight(Double.parseDouble(arcArr[1]));arc.setNextArc(null);return arc;}public static class Graph {private List<GraphNode> nodes;public Graph(){this.nodes = new ArrayList<GraphNode>();}public List<GraphNode> getNodes() {return nodes;}public void setNodes(List<GraphNode> nodes) {this.nodes = nodes;}}public static class GraphNode {private String label;private Arc firstArc;private double prob;public Arc getLastArc(){Arc arc = this.firstArc;while(arc.nextArc!=null){arc = arc.nextArc;}return arc;}public String getLabel() {return label;}public void setLabel(String label) {this.label = label;}public Arc getFirstArc() {return firstArc;}public void setFirstArc(Arc firstArc) {this.firstArc = firstArc;}public double getProb() {return prob;}public void setProb(double prob) {this.prob = prob;}}private static class Arc {private int nodeid;private Arc nextArc;private double weight;public int getNodeid() {return nodeid;}public void setNodeid(int nodeid) {this.nodeid = nodeid;}public Arc getNextArc() {return nextArc;}public void setNextArc(Arc nextArc) {this.nextArc = nextArc;}public double getWeight() {return weight;}public void setWeight(double weight) {this.weight = weight;}}}public static class ProbMatchTextReaderextends VertexInputReader<LongWritable, Text, Text, NullWritable, TriTextPair> {@Overridepublic boolean parseVertex(LongWritable key, Text value, Vertex<Text, NullWritable, TriTextPair> vertex)throws Exception {if (value.toString().lastIndexOf("\t") > 0) {String[] array = value.toString().split("\t");vertex.setVertexID(new Text(array[0]));// 读取某行第一个元素赋值给顶点IDString[] tokenArray = array[1].split(" ");vertex.setValue(new TriTextPair(new Text(tokenArray[0]),new Text( // 顶点value赋值""), new Text("")));for (int i = 1; i < tokenArray.length; i++) {vertex.addEdge(new Edge<Text, NullWritable>(new Text( // 加边tokenArray[i]), null));}} else {String[] tokenArray = value.toString().split(" ");vertex.setVertexID(new Text(tokenArray[0]));// 读取某行第一个元素赋值给顶点IDvertex.setValue(new TriTextPair(new Text(tokenArray[1]),new Text( // 顶点value赋值""), new Text("")));for (int i = 2; i < tokenArray.length; i++) {vertex.addEdge(new Edge<Text, NullWritable>(new Text( // 加边tokenArray[i]), null));}}return true;}}public static GraphJob createJob(String[] args, HamaConfiguration conf, Options opts)throws IOException, ParseException {CommandLine cliParser = new GnuParser().parse(opts, args);if (!cliParser.hasOption("i") || !cliParser.hasOption("o")) {System.out.println("No input or output path specified for PageRank, exiting.");}GraphJob pageJob = new GraphJob(conf, ProbMatch.class);pageJob.setJobName("ProbMatch");pageJob.setVertexClass(ProbMatchVertex.class);pageJob.setInputPath(new Path(cliParser.getOptionValue("i")));pageJob.setOutputPath(new Path(cliParser.getOptionValue("o")));String queryPathStr = cliParser.getOptionValue("q");String queryStr = readFileFromHdfs(queryPathStr);pageJob.set("hama.probmatch.querygraph", queryStr);// set the defaultspageJob.setMaxIteration(30);if (cliParser.hasOption("t")) {pageJob.setNumBspTask(Integer.parseInt(cliParser.getOptionValue("t")));}pageJob.setVertexInputReaderClass(ProbMatchTextReader.class);pageJob.setVertexIDClass(Text.class);pageJob.setVertexValueClass(TriTextPair.class);pageJob.setEdgeValueClass(NullWritable.class);pageJob.setInputFormat(TextInputFormat.class);pageJob.setInputKeyClass(LongWritable.class);pageJob.setInputValueClass(Text.class);pageJob.setPartitioner(HashPartitioner.class);pageJob.setOutputFormat(TextOutputFormat.class);pageJob.setOutputKeyClass(Text.class);pageJob.setOutputValueClass(TriTextPair.class);return pageJob;}public static String readFileFromHdfs(String inputPath){Configuration conf = new Configuration();String fileStr = "";try {FileSystem hdfs = FileSystem.get(URI.create(inputPath), conf);FSDataInputStream getIt = hdfs.open(new Path(inputPath));BufferedReader d = new BufferedReader(new InputStreamReader(getIt));String s = "";while ((s = d.readLine()) != null) {fileStr = fileStr + s + "-";}d.close();hdfs.close();} catch (Exception e) {e.printStackTrace();} finally{return fileStr;}}public static String inputStream2String(InputStream in) throws IOException {StringBuffer out = new StringBuffer();byte[] b = new byte[4096];for (int n; (n = in.read(b)) != -1;) {out.append(new String(b, 0, n));}return out.toString();}public static void main(String[] args)throws IOException, InterruptedException, ClassNotFoundException, ParseException {Options opts = new Options();opts.addOption("i", "input_path", true, "The Location of output path.");opts.addOption("o", "output_path", true, "The Location of input path.");opts.addOption("q", "query_graph_path", true, "The Location of query graph path.");opts.addOption("h", "help", false, "Print usage");opts.addOption("t", "task_num", true, "The number of tasks.");if (args.length < 3) {new HelpFormatter().printHelp("pagerank -i INPUT_PATH -o OUTPUT_PATH -q QUERY_GRAPH_PATH " + "[-t NUM_TASKS] [-f FILE_TYPE]",opts);System.exit(-1);}HamaConfiguration conf = new HamaConfiguration();GraphJob pageJob = createJob(args, conf, opts);long startTime = System.currentTimeMillis();if (pageJob.waitForCompletion(true)) {System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");}}}


package test;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;public final class TriTextPair implements Writable {Text first;Text second;Text third;public TriTextPair() {first = new Text();second = new Text();third = new Text();}public TriTextPair(Text first, Text second, Text third) {this.first = first;this.second = second;this.third = third;}public Text getFirst() {return first;}public void setFirst(Text first) {this.first = first;}public Text getSecond() {return second;}public void setSecond(Text second) {this.second = second;}public Text getThird() {return third;}public void setThird(Text third) {this.third = third;}@Overridepublic void write(DataOutput out) throws IOException {first.write(out);second.write(out);third.write(out);}@Overridepublic void readFields(DataInput in) throws IOException {first.readFields(in);second.readFields(in);third.readFields(in);}@Overridepublic String toString() {return first + " " + second + " " + third;}public boolean equals(Object obj) {TriTextPair tp = (TriTextPair) obj;if (first.toString().equals(tp.getFirst().toString())) {return true;} else {return false;}}}




1 0
原创粉丝点击