Spark SQL
来源:互联网 发布:淘宝手机端发布详情页 编辑:程序博客网 时间:2024/06/03 20:12
package com.qhimport java.util.Propertiesimport org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.{Row, SaveMode, SparkSession}import org.apache.spark.sql.types._import scala.collection.mutable.ListBuffer/** * Created by hadoop on 8/19/16. */// case class 参考 http://www.yiibai.com/scala/scala_pattern_matching.htmlcase class people(id: Int, name: String, age: Int)object SparkSQL { private val url = "jdbc:sqlserver://172.16.177.45:1433;DataBase=CE_DQ" private val user = "sa" private val password = "123456" private val tableName = "SparkSQLTest" def main(args: Array[String]): Unit = { val conf = new SparkConf() conf.setAppName("ScalaTest") .setMaster("spark://master:7077") val sc = new SparkContext(conf) /* 在Spark2.0.0 中SQLContext和HiveContext,已经过时 其向后兼容,因此仍然可以使用 val sqlContext = new SQLContext(sc) val hiveContext = new HiveContext(sc) 在这里使用SparkSession val hiveContext = SparkSession.builder().enableHiveSupport().getOrCreate() */ val sqlContext = SparkSession.builder().getOrCreate().sqlContext // SparkSQL - SQLServer 读取数据 // 在使用SparkSQL连接关系型数据库的时候不需要定义Driver // 系统会根据url自动选择,但需要将对应的jdbc包引入到$SPARK_HOME/jars // -------------------------------------------------------------------- val conn = new Properties() conn.put("user", user) conn.put("password", password) var jdbcDF = sqlContext.read.jdbc(url, tableName, conn) jdbcDF.show() // -------------------------------------------------------------------- // SparkSQL - SQLServer 写入数据 // -------------------------------------------------------------------- val data = sc.parallelize(Seq(Row(1, null), Row(2, "b"), Row(3, "c"))) // 定义写入数据的类型 参数含义: // 1 对应的子段名称 // 2 对应的子段数据类型 // 3 输入数据对应子段是否为可以空 val schema = StructType( List( StructField("ID", DataTypes.IntegerType, nullable = false), StructField("Name", DataTypes.StringType, nullable = true) ) ) jdbcDF = sqlContext.createDataFrame(data, schema) // 将数据写入SparkSQLTest表中 // mode(SaveMode.Append) 表示的是在该表中追加数据。如果不添加该选项,则默认创建表并写入数据 jdbcDF.write.mode(SaveMode.Append).jdbc(url, tableName, conn) // -------------------------------------------------------------------- // SparkSQL 操作json作文件 // 文件格式: // {"name":"Michael"} // {"name":"Andy", "age":30} // {"name":"Justin", "age":19} val jsonDF = sqlContext.read.json("hdfs://master:9000/Spark/sqljson") jsonDF.show() /* +----+-------+ | age| name| +----+-------+ |null|Michael| | 30| Andy| | 19| Justin| +----+-------+ */ jsonDF.printSchema() /* root |-- age: long (nullable = true) |-- name: string (nullable = true) */ jsonDF.select("name").show() /* +-------+ | name| +-------+ |Michael| | Andy| | Justin| +-------+ */ jsonDF.select(jsonDF("name"), jsonDF("age") + 10).show() /* +-------+----------+ | name|(age + 10)| +-------+----------+ |Michael| null| | Andy| 40| | Justin| 29| +-------+----------+ */ jsonDF.filter(jsonDF("age") > 10).show() /* +---+------+ |age| name| +---+------+ | 30| Andy| | 19|Justin| +---+------+ */ // 创建DataFrame,并使用SQL语句查询 val peopleList = ListBuffer[people]() peopleList.append(people(0, "Michael", 18)) peopleList.append(people(1, "Tom", 32)) peopleList.append(people(2, "Andy", 20)) peopleList.append(people(3, "Justin", 40)) peopleList.append(people(4, "Miss", 17)) peopleList.foreach(println(_)) // 创建DataFrame var df = sqlContext.createDataFrame(peopleList) df.printSchema() /* root |-- id: integer (nullable = false) |-- name: string (nullable = true) |-- age: integer (nullable = false) */ // 修改指定的列名 df = sqlContext.createDataFrame(peopleList).withColumnRenamed("id", "ID") df.printSchema() df.show() /* root |-- ID: integer (nullable = false) |-- name: string (nullable = true) |-- age: integer (nullable = false) +---+-------+---+ | ID| name|age| +---+-------+---+ | 0|Michael| 18| | 1| Tom| 32| | 2| Andy| 20| | 3| Justin| 40| | 4| Miss| 17| +---+-------+---+ 将DataFrame注册成一张临时表 在sqlContext使用registerTempTable注册临时表 在SparkSession使用createOrReplaceTempView注册临时表 */ df.createOrReplaceTempView("peopleTable") // 使用sql语句查询 val sqlDF = sqlContext.sql("select * from peopleTable order by name desc") sqlDF.show() /* +---+-------+---+ | ID| name|age| +---+-------+---+ | 1| Tom| 32| | 4| Miss| 17| | 0|Michael| 18| | 3| Justin| 40| | 2| Andy| 20| +---+-------+---+ */ sc.stop() }}
后续完善。。。
1 0
- Spark Streaming+Spark SQL
- spark sql
- Spark SQL
- Spark SQL
- spark-sql
- spark sql
- spark sql
- spark sql
- Spark-Sql
- Spark SQL
- Spark SQL
- spark Sql
- spark-sql
- spark sql
- Spark Sql
- spark sql
- spark sql
- spark sql
- HTML5-----学习笔记1
- Linux下gitblit的安装和配置
- Oracle 11G R2 用exp无法导出空表解决方法
- [leetcode]113. Path Sum II -- JavaScript 代码
- 多线程编程学习(001)
- Spark SQL
- mdev的使用以及mdev.conf的规则配置--busybox
- WebService Xml 字符串形式传值
- php jQuery把后台返回的json数据赋值给页面标签
- 顺序表和链表
- 机器学习(1)机器学习的范围
- 给beanutils注册转换器
- 查找各大视频网站真实视频地址方法(乐视优酷土豆腾讯56酷6pptv激动网)
- Android SurfaceView使用详解