Spark 2.0从入门到精通245讲——操作RDD(transformation案例实战)
来源:互联网 发布:民国报纸数据库 编辑:程序博客网 时间:2024/06/05 16:39
package cn.spark.study.coreimport org.apache.spark.SparkConfimport org.apache.spark.SparkContext/** * @author Administrator */object TransformationOperation { def main(args: Array[String]) { // map() // filter() // flatMap() // groupByKey() // reduceByKey() // sortByKey() join() } def map() { val conf = new SparkConf() .setAppName("map") .setMaster("local") val sc = new SparkContext(conf) val numbers = Array(1, 2, 3, 4, 5) val numberRDD = sc.parallelize(numbers, 1) val multipleNumberRDD = numberRDD.map { num => num * 2 } multipleNumberRDD.foreach { num => println(num) } } def filter() { val conf = new SparkConf() .setAppName("filter") .setMaster("local") val sc = new SparkContext(conf) val numbers = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) val numberRDD = sc.parallelize(numbers, 1) val evenNumberRDD = numberRDD.filter { num => num % 2 == 0 } evenNumberRDD.foreach { num => println(num) } } def flatMap() { val conf = new SparkConf() .setAppName("flatMap") .setMaster("local") val sc = new SparkContext(conf) val lineArray = Array("hello you", "hello me", "hello world") val lines = sc.parallelize(lineArray, 1) val words = lines.flatMap { line => line.split(" ") } words.foreach { word => println(word) } } def groupByKey() { val conf = new SparkConf() .setAppName("groupByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2("class1", 80), Tuple2("class2", 75), Tuple2("class1", 90), Tuple2("class2", 60)) val scores = sc.parallelize(scoreList, 1) val groupedScores = scores.groupByKey() groupedScores.foreach(score => { println(score._1); score._2.foreach { singleScore => println(singleScore) }; println("=============================") }) } def reduceByKey() { val conf = new SparkConf() .setAppName("groupByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2("class1", 80), Tuple2("class2", 75), Tuple2("class1", 90), Tuple2("class2", 60)) val scores = sc.parallelize(scoreList, 1) val totalScores = scores.reduceByKey(_ + _) totalScores.foreach(classScore => println(classScore._1 + ": " + classScore._2)) } def sortByKey() { val conf = new SparkConf() .setAppName("sortByKey") .setMaster("local") val sc = new SparkContext(conf) val scoreList = Array(Tuple2(65, "leo"), Tuple2(50, "tom"), Tuple2(100, "marry"), Tuple2(85, "jack")) val scores = sc.parallelize(scoreList, 1) val sortedScores = scores.sortByKey(false) sortedScores.foreach(studentScore => println(studentScore._1 + ": " + studentScore._2)) } def join() { val conf = new SparkConf() .setAppName("join") .setMaster("local") val sc = new SparkContext(conf) val studentList = Array( Tuple2(1, "leo"), Tuple2(2, "jack"), Tuple2(3, "tom")); val scoreList = Array( Tuple2(1, 100), Tuple2(2, 90), Tuple2(3, 60)); val students = sc.parallelize(studentList); val scores = sc.parallelize(scoreList); val studentScores = students.join(scores) studentScores.foreach(studentScore => { println("student id: " + studentScore._1); println("student name: " + studentScore._2._1) println("student socre: " + studentScore._2._2) println("=======================================") }) } def cogroup() { } }
阅读全文
0 0
- Spark 2.0从入门到精通245讲——操作RDD(transformation案例实战)
- Spark 2.0从入门到精通245讲——操作RDD(action案例实战)
- Spark核心编程:操作RDD(transformation和action案例实战)
- Spark修炼之道(进阶篇)——Spark入门到精通:第十节 Spark SQL案例实战(一)
- Spark总结(二)——RDD的Transformation操作
- Spark修炼之道(进阶篇)——Spark入门到精通:第十一节 Spark Streaming—— DStream Transformation操作
- Spark RDD transformation操作
- spark RDD transformation操作
- Spark Streaming 实战案例(二) Transformation操作
- 【Spark】RDD操作详解2——值型Transformation算子
- 【Spark】RDD操作详解2——值型Transformation算子
- (升级版)Spark从入门到精通(Scala编程、案例实战、高级特性、Spark内核源码剖析、Hadoop高端)
- Spark入门到精通视频学习资料--第八章:项目实战(2讲)
- [1.4]Spark RDD经典Transformation算子实战
- spark rdd详解二(transformation与action操作)
- JSP从入门到精通(一)——第一个servlet案例
- 【Spark】RDD操作详解1——Transformation和Actions概况
- 【Spark】RDD操作详解3——键值型Transformation算子
- flask 中使用 装饰器
- Spring Cloud | 第二篇:服务消费者(Ribbon)
- html5 postMessage解决跨域、跨窗口消息传递
- java多线程 关于synchronized wait notify CountDownLatch CyclicBarrier Semaphore
- 邂逅ReactNative(二):使用Eslint规范项目代码
- Spark 2.0从入门到精通245讲——操作RDD(transformation案例实战)
- java后台发送请求并获取返回值
- python 数值类型转换
- Ajax中什么时候用同步,什么时候用异步?
- 出现错误提示弹框: “Building workspace has encountered a problem. Errors during build.”
- ApkTool 反编译 重新打包
- AndroidStudio SVN插件配置与使用
- 正则表达式
- unix编程艺术读书笔记