Spark---Datasource(JSON)---Scala
来源:互联网 发布:windows 调试 safari 编辑:程序博客网 时间:2024/06/07 23:32
package com.spark.sparksql.datasource.scalaimport org.apache.spark.sql.types.{StructType, IntegerType, StringType, StructField}import org.apache.spark.sql.{SaveMode, Row, SQLContext}import org.apache.spark.{SparkContext, SparkConf}/** * Created by root on 2017/8/9. */object JSONDataSource { def main(args: Array[String]) { val conf = new SparkConf().setAppName("JSONDataSource").setMaster("local") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ val studentScoresDF = sqlContext.read.json("students.json") studentScoresDF.registerTempTable("student_scores") val goodStudentNamesDF = sqlContext.sql("select name, score from student_scores where score >= 80") val goodStudentNames = goodStudentNamesDF.map(x => x(0)).collect() var studentInfoJSONs = List[String]() studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Yasaka\",\"age\":18}") studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Xuruyun\",\"age\":17}") studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Liangyongqi\",\"age\":19}") val studentInfosRDD = sc.parallelize(studentInfoJSONs) val studentInfosDF = sqlContext.read.json(studentInfosRDD) studentInfosDF.registerTempTable("student_infos") var sql = "select name, age from student_infos where name in (" var i=0 for(name <- goodStudentNames){ sql += "'" + name + "'" if (i < goodStudentNames.length - 1){ sql += "," } i += 1 } sql = sql + ")" val goodStudentInfosDF = sqlContext.sql(sql) val goodStudentsRDD = studentScoresDF.map(x => (x(0),x(1))).join(goodStudentInfosDF.map(x => (x(0),x(1)))) val goodStudentsRDDRow = goodStudentsRDD.map(x=> Row(x._1.toString, x._2._1.toString.toInt, x._2._2.toString.toInt)) var arr = Array(StructField("name",StringType,true) ,StructField("score",IntegerType,true) ,StructField("age",IntegerType,true)) val structType = StructType(arr) val goodStudentDF = sqlContext.createDataFrame(goodStudentsRDDRow, structType) goodStudentDF.write.format("json").mode(SaveMode.Overwrite).save("goodStudentJson") }}
阅读全文
0 0
- Spark---Datasource(JSON)---Scala
- Spark---Datasource(JDBC)---Scala
- Spark---Datasource(JSON)---java
- Spark--Datasource(Hive)
- Spark---Datasource(JDBC)---java
- spark,scala
- scala 开发spark程序
- scala for spark
- scala spark开发模式
- spark scala wordcout
- Scala and Spark Begin
- Spark+Scala课程包
- 初学spark--scala--45
- Spark与Scala学习
- 实战Scala & Spark (1)
- 实战Scala & Spark (2)
- 实战Scala & Spark (3)
- Scala && Spark 安装
- React-Native与原生之间的交互简介
- 集成学习
- A
- (四)linux进程通讯、信号量
- android下挂http服务器Tomcat
- Spark---Datasource(JSON)---Scala
- 行人重识别之重排序(re-ranking)
- elasticsearch5.2集群部署
- 在Vue2.0中使用less做准备
- LeetCode
- 关于HTML5部分功能浏览器支持汇总
- linux gitlab 部署
- iOS invalid bitcode signature等常见错误
- numpy入门——数组属性操作