spark中各种连接操作以及实用方法

来源:互联网 发布:数据库如何自动生成id 编辑:程序博客网 时间:2024/05/16 05:44
 val a = sc.parallelize(Array(("123",4.0),("456",9.0),("789",9.0))     val b = sc.parallelize(Array(("123",8.0),("789",10)))    val c = a.join(b)    c.foreach(println)    /*    (123,(4.0,8.0))    (789,(9.0,10))     */    val d = a.cogroup(b)    d.foreach(println)    /*    (456,(CompactBuffer(9.0),CompactBuffer()))    (123,(CompactBuffer(4.0),CompactBuffer(8.0)))    (789,(CompactBuffer(9.0),CompactBuffer(10)))    */    val e = a.leftOuterJoin(b)    e.foreach(println)    /*      (456,(9.0,None))      (123,(4.0,Some(8.0)))      (789,(9.0,Some(10)))      */    val f = a.fullOuterJoin(b)    f.foreach(println)    /*      (456,(Some(9.0),None))      (123,(Some(4.0),Some(8.0)))      (789,(Some(9.0),Some(10)))      */    val g = a.cartesian(b)    g.foreach(println)    /*((123,4.0),(123,8.0))((123,4.0),(789,10))((456,9.0),(123,8.0))((456,9.0),(789,10))((789,9.0),(123,8.0))((789,9.0),(789,10))      */    /*val h = a.coalesce(6,true)    h.foreach(println)    a.dependencies.foreach(println)*/    val i = a.keyBy{case (k,v)=>("haha",234)}    i.foreach(println)    /*      ((haha,234),(123,4.0))      ((haha,234),(456,9.0))      ((haha,234),(789,9.0))     */

0 0
原创粉丝点击