
来源:互联网 发布:重庆西南大学网络学费 编辑:程序博客网 时间:2024/05/17 07:13



val graph: Graph[(String, String), String]// Use the implicit GraphOps.inDegrees operatorval inDegrees: VertexRDD[Int] = graph.inDegrees



/** Summary of the functionality in the property graph */class Graph[VD, ED] {  // Information about the Graph ===================================================================  val numEdges: Long  val numVertices: Long  val inDegrees: VertexRDD[Int]  val outDegrees: VertexRDD[Int]  val degrees: VertexRDD[Int]  // Views of the graph as collections =============================================================  val vertices: VertexRDD[VD]  val edges: EdgeRDD[ED]  val triplets: RDD[EdgeTriplet[VD, ED]]  // Functions for caching graphs ==================================================================  def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]  def cache(): Graph[VD, ED]  def unpersistVertices(blocking: Boolean = true): Graph[VD, ED]  // Change the partitioning heuristic  ============================================================  def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]  // Transform vertex and edge attributes ==========================================================  def mapVertices[VD2](map: (VertexId, VD) => VD2): Graph[VD2, ED]  def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]  def mapEdges[ED2](map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]  def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]  def mapTriplets[ED2](map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2])    : Graph[VD, ED2]  // Modify the graph structure ====================================================================  def reverse: Graph[VD, ED]  def subgraph(      epred: EdgeTriplet[VD,ED] => Boolean = (x => true),      vpred: (VertexId, VD) => Boolean = ((v, d) => true))    : Graph[VD, ED]  def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]  def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]  // Join RDDs with the graph ======================================================================  def joinVertices[U](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD): Graph[VD, ED]  def outerJoinVertices[U, VD2](other: RDD[(VertexId, U)])      (mapFunc: (VertexId, VD, Option[U]) => VD2)    : Graph[VD2, ED]  // Aggregate information about adjacent triplets =================================================  def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]]  def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]]  def aggregateMessages[Msg: ClassTag](      sendMsg: EdgeContext[VD, ED, Msg] => Unit,      mergeMsg: (Msg, Msg) => Msg,      tripletFields: TripletFields = TripletFields.All)    : VertexRDD[A]  // Iterative graph-parallel computation ==========================================================  def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(      vprog: (VertexId, VD, A) => VD,      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)],      mergeMsg: (A, A) => A)    : Graph[VD, ED]  // Basic graph algorithms ========================================================================  def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double]  def connectedComponents(): Graph[VertexId, ED]  def triangleCount(): Graph[Int, ED]  def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED]}



class Graph[VD, ED] {  def mapVertices[VD2](map: (VertexId, VD) => VD2): Graph[VD2, ED]  def mapEdges[ED2](map: Edge[ED] => ED2): Graph[VD, ED2]  def mapTriplets[ED2](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]}



val newVertices = { case (id, attr) => (id, mapUdf(id, attr)) }val newGraph = Graph(newVertices, graph.edges)

val newGraph = graph.mapVertices((id, attr) => mapUdf(id, attr))


// Given a graph where the vertex property is the out degreeval inputGraph: Graph[Int, String] =  graph.outerJoinVertices(graph.outDegrees)((vid, _, degOpt) => degOpt.getOrElse(0))// Construct a graph where each edge contains the weight// and each vertex is the initial PageRankval outputGraph: Graph[Double, Double] =  inputGraph.mapTriplets(triplet => 1.0 / triplet.srcAttr).mapVertices((id, _) => 1.0)



class Graph[VD, ED] {  def reverse: Graph[VD, ED]  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean,               vpred: (VertexId, VD) => Boolean): Graph[VD, ED]  def mask[VD2, ED2](other: Graph[VD2, ED2]): Graph[VD, ED]  def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]}


// Create an RDD for the verticesval users: RDD[(VertexId, (String, String))] =  sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),                       (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),                       (4L, ("peter", "student"))))// Create an RDD for edgesval relationships: RDD[Edge[String]] =  sc.parallelize(Array(Edge(3L, 7L, "collab"),    Edge(5L, 3L, "advisor"),                       Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),                       Edge(4L, 0L, "student"),   Edge(5L, 0L, "colleague")))// Define a default user in case there are relationship with missing userval defaultUser = ("John Doe", "Missing")// Build the initial Graphval graph = Graph(users, relationships, defaultUser)// Notice that there is a user 0 (for which we have no information) connected to users// 4 (peter) and 5 (franklin)  triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1).collect.foreach(println(_))// Remove missing vertices as well as the edges to connected to themval validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")// The valid subgraph will disconnect users 4 and 5 by removing user 0validGraph.vertices.collect.foreach(println(_))  triplet => triplet.srcAttr._1 + " is the " + triplet.attr + " of " + triplet.dstAttr._1).collect.foreach(println(_))

// Run Connected Componentsval ccGraph = graph.connectedComponents() // No longer contains missing field// Remove missing vertices as well as the edges to connected to themval validGraph = graph.subgraph(vpred = (id, attr) => attr._2 != "Missing")// Restrict the answer to the valid subgraphval validCCGraph = ccGraph.mask(validGraph)



class Graph[VD, ED] {  def joinVertices[U](table: RDD[(VertexId, U)])(map: (VertexId, VD, U) => VD)    : Graph[VD, ED]  def outerJoinVertices[U, VD2](table: RDD[(VertexId, U)])(map: (VertexId, VD, Option[U]) => VD2)    : Graph[VD2, ED]}



val nonUniqueCosts: RDD[(VertexId, Double)]val uniqueCosts: VertexRDD[Double] =  graph.vertices.aggregateUsingIndex(nonUnique, (a,b) => a + b)val joinedGraph = graph.joinVertices(uniqueCosts)(  (id, oldCost, extraCost) => oldCost + extraCost)


val outDegrees: VertexRDD[Int] = graph.outDegreesval degreeGraph = graph.outerJoinVertices(outDegrees) { (id, oldAttr, outDegOpt) =>  outDegOpt match {    case Some(outDeg) => outDeg    case None => 0 // No outDegree means zero outDegree  }}


val joinedGraph = graph.joinVertices(uniqueCosts,  (id: VertexId, oldCost: Double, extraCost: Double) => oldCost + extraCost)
