spark MLlib 学习
来源:互联网 发布:淘宝助理ipad版 编辑:程序博客网 时间:2024/04/18 10:00
spark GraphX-用户关系
import org.apache.spark.graphx.{VertexRDD, VertexId, Graph, Edge}import org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object UserRelationship { def main(args: Array[String]) { val conf = new SparkConf() .setMaster("local[1]") .setAppName(" user relationship ") val sc = new SparkContext(conf) // id , name , age val vertexArray = Array( (1L, ("Alice", 28)), (2L, ("Bob", 27)), (3L, ("Charlie", 65)), (4L, ("David", 42)), (5L, ("Ed", 55)), (6L, ("Fran", 50))) val edgeArray = Array( Edge(2L, 1L, 7), Edge(2L, 4L, 2), Edge(3L, 2L, 4), Edge(3L, 6L, 3), Edge(4L, 1L, 1), Edge(5L, 2L, 2), Edge(5L, 3L, 8), Edge(5L, 6L, 3)) /* Edge have a scrId and a dstId corresponding to the source and destination vertexArray and edgeArray variables */ val vertexRDD: RDD[(Long, (String, Int))] = sc.parallelize(vertexArray) val edgeRDD: RDD[Edge[Int]] = sc.parallelize(edgeArray) val graph: Graph[(String, Int), Int] = Graph(vertexRDD, edgeRDD) println("find whose age > 30") // find whose age > 30 graph.vertices.filter { case (id, (name, age)) => age > 30 }.collect.foreach { case (id, (name, age)) => println(s"$name is $age") } println(" find the attribute > 5 ") //find the attribute > 5 graph.edges.filter(e => e.attr > 5) .collect.foreach(e => println(s"{$e.srcId} to s{$e.destId} att ${e.attr}")) println("edge attr >5") // edge attr >5 for (triple <- graph.triplets.filter(t => t.attr > 5).collect) { println(s"${triple.srcAttr._1} likes ${triple.dstAttr._1}") } //Degrees操作 找出图中最大的出度 入度 度数 def max(a: (VertexId, Int), b: (VertexId, Int)): (VertexId, Int) = { if (a._2 > b._2) a else b } println("the max outDegrees and inDegress Degress is :") println("max of outDegrees:" + graph.outDegrees.reduce(max) + "max of inDegrees:" + graph.inDegrees.reduce(max) + "max of Degrees:" + graph.degrees.reduce(max)) //顶点的转换操作,顶点age + 10 graph.mapVertices { case (id, (name, age)) => (id, (name, age + 10)) }.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}")) // graph.mapEdges(e => e.attr * 2).edges.collect .foreach(e => println(s"${e.srcId} to ${e.dstId} attr ${e.attr}")) println("顶点年纪>30的子图:") val subGraph = graph.subgraph(vpred = (id, vd) => vd._2 >= 30) println("子图所有顶点:") subGraph.vertices.collect.foreach(v => println(s"${v._2._1} is ${v._2._2}")) println("subGraph edges:") subGraph.edges.collect.foreach(e => println(s"${e.srcId} to ${e.dstId} att $e.attr")) println("collection:") val inDegrees: VertexRDD[Int] = graph.inDegrees case class User(name: String, age: Int, inDeg: Int, outDeg: Int) //创建一个新图,顶点VD的数据类型为User,并从graph做类型转换 val initialUserGraph: Graph[User, Int] = graph.mapVertices { case (id, (name, age)) => User(name, age, 0, 0) } // fill in the degree information val userGraph = initialUserGraph.outerJoinVertices(initialUserGraph.inDegrees) { case (id, u, inDegOpt) => User(u.name, u.age, inDegOpt.getOrElse(0), u.outDeg) }.outerJoinVertices(initialUserGraph.outDegrees) { case (id, u, outDegOpt) => User(u.name, u.age, u.inDeg, outDegOpt.getOrElse(0)) } }}
0 0
- Spark MLlib 学习资料
- spark MLlib 学习
- spark MLlib 学习
- Spark MLlib知识点学习整理
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib 入门学习笔记
- Spark MLlib
- spark MLlib
- Spark MLLib
- Spark MLlib
- Spark 0.9.1 MLLib 机器学习库
- 1016. Phone Bills (25)
- 《跨终端 Web》- 读书笔记(一)
- [LeetCode]242. Valid Anagram
- 紫书章四 习题4 Cube painting UVA
- 一篇文章教你如何协调投资理念跟量化模型的关系
- spark MLlib 学习
- intelliJ IDEA 编写邮箱注册功能报错 AddressException
- 在VirtualBox中使用Docker Machine管理主机
- 在BIOS中开启虚拟化技术
- Linux下安装PyCharm
- 1019. 数字黑洞 (20)-PAT乙级A1069
- SSD: Single Shot MultiBox Detector in TensorFlow(翻译)
- ubuntu下 google gmock使用
- Composer移除依赖