
来源:互联网 发布:手机加密软件哪个好 编辑:程序博客网 时间:2024/06/05 08:35



Edge Triplet


/** * @author xubo * ref * time 20160503 */package org.apache.spark.graphx.learningimport org.apache.spark._import org.apache.spark.graphx._// To make some of the examples work we will also need RDDimport org.apache.spark.rdd.RDDobject gettingStart {  def main(args: Array[String]) {    val conf = new SparkConf().setAppName("gettingStart").setMaster("local[4]")    // Assume the SparkContext has already been constructed    val sc = new SparkContext(conf)    // Create an RDD for the vertices    val users: RDD[(VertexId, (String, String))] =      sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),        (5L, ("franklin", "prof")), (2L, ("istoica", "prof"))))    // Create an RDD for edges    val relationships: RDD[Edge[String]] =      sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),        Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi")))    // Define a default user in case there are relationship with missing user    val defaultUser = ("John Doe", "Missing")    // Build the initial Graph    val graph = Graph(users, relationships, defaultUser)    // Count all users which are postdocs    println(graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc" }.count)    // Count all the edges where src > dst    println(graph.edges.filter(e => e.srcId > e.dstId).count)    //another method    println(graph.edges.filter { case Edge(src, dst, prop) => src > dst }.count)    //  reverse    println(graph.edges.filter { case Edge(src, dst, prop) => src < dst }.count)    // Use the triplets view to create an RDD of facts.    val facts: RDD[String] = =>        triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1)    facts.collect.foreach(println(_))           // Use the triplets view to create an RDD of facts.    println("\ntriplets:");    val facts2: RDD[String] = =>       triplet.srcId   +"("+triplet.srcAttr._1+" "+ triplet.srcAttr._2+")"+" is the" + triplet.attr + " of " + triplet.dstId+"("+triplet.dstAttr._1+" "+ triplet.dstAttr._2+ ")")    facts2.collect.foreach(println(_))  }}


2016-05-03 19:18:48 WARN  MetricsSystem:71 - Using default name DAGScheduler for source because is not set.1113rxin is the collab of jgonzalfranklin is the advisor of rxinistoica is the colleague of franklinfranklin is the pi of jgonzaltriplets:3(rxin student) is thecollab of 7(jgonzal postdoc)5(franklin prof) is theadvisor of 3(rxin student)2(istoica prof) is thecolleague of 5(franklin prof)5(franklin prof) is thepi of 7(jgonzal postdoc)




0 0