Spark---Datasource(JSON)---Scala

来源:互联网 发布:windows 调试 safari 编辑:程序博客网 时间:2024/06/07 23:32
package com.spark.sparksql.datasource.scalaimport org.apache.spark.sql.types.{StructType, IntegerType, StringType, StructField}import org.apache.spark.sql.{SaveMode, Row, SQLContext}import org.apache.spark.{SparkContext, SparkConf}/**  * Created by root on 2017/8/9.  */object JSONDataSource {  def main(args: Array[String]) {    val conf = new SparkConf().setAppName("JSONDataSource").setMaster("local")    val sc = new SparkContext(conf)    val sqlContext = new SQLContext(sc)    import sqlContext.implicits._    val studentScoresDF = sqlContext.read.json("students.json")    studentScoresDF.registerTempTable("student_scores")    val goodStudentNamesDF = sqlContext.sql("select name, score from student_scores where score >= 80")    val goodStudentNames = goodStudentNamesDF.map(x => x(0)).collect()    var studentInfoJSONs = List[String]()    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Yasaka\",\"age\":18}")    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Xuruyun\",\"age\":17}")    studentInfoJSONs = studentInfoJSONs.::("{\"name\":\"Liangyongqi\",\"age\":19}")    val studentInfosRDD = sc.parallelize(studentInfoJSONs)    val studentInfosDF = sqlContext.read.json(studentInfosRDD)    studentInfosDF.registerTempTable("student_infos")    var sql = "select name, age from student_infos where name in ("    var i=0    for(name <- goodStudentNames){      sql += "'" + name + "'"      if (i < goodStudentNames.length - 1){        sql += ","      }      i += 1    }    sql = sql + ")"    val goodStudentInfosDF = sqlContext.sql(sql)    val goodStudentsRDD = studentScoresDF.map(x => (x(0),x(1))).join(goodStudentInfosDF.map(x => (x(0),x(1))))    val goodStudentsRDDRow = goodStudentsRDD.map(x=> Row(x._1.toString, x._2._1.toString.toInt, x._2._2.toString.toInt))    var arr = Array(StructField("name",StringType,true)                    ,StructField("score",IntegerType,true)                    ,StructField("age",IntegerType,true))    val structType = StructType(arr)    val goodStudentDF = sqlContext.createDataFrame(goodStudentsRDDRow, structType)    goodStudentDF.write.format("json").mode(SaveMode.Overwrite).save("goodStudentJson")  }}
原创粉丝点击