Spark SQL 常用操作
来源:互联网 发布:干支纪年法简便算法 编辑:程序博客网 时间:2024/05/21 17:59
package sqlTextimport org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContextimport org.apache.spark.sql.DataFrameimport org.apache.spark.sql.types.{StringType, StructField, StructType}import org.apache.spark.sql.Row/** * Created by xiaoxu */object SparkSQL { def main(args: Array[String]) { val conf = new SparkConf().setAppName("Simple Application").setMaster("local") val sparkContext = new SparkContext(conf) val sqlContext = new SQLContext(sparkContext) // import sqlContext.createSchemaRDD /* sparkSQL读取json文件 */ val jsonPath = "e:/data/text.txt" val dataFrame = sparkSqlReadJson(sqlContext, jsonPath) val txtPath = "e:/data/tt.txt" sparkSQLReadTxt(sqlContext, sparkContext, txtPath) sparkSQLInferrSchemaReadTxt(sqlContext, sparkContext, txtPath) val parquetPath = "e:/data/user.parquet" sparkSqlWriteParquet(sqlContext, parquetPath, jsonPath) sparkSQLReadParquet(sqlContext, parquetPath) } /** * 声明一个schame模型类 * * @param name * @param age */ case class Person(name: String, age: String) /** * sparkSQL读取json文件 * * @param sqlContext * @param path */ def sparkSqlReadJson(sqlContext: SQLContext, path: String): DataFrame = { import sqlContext.implicits._ val dataFrame = sqlContext.read.json(path) return dataFrame } /** * Programmatically Specifying the Schema(以编程方式指定架构) 它需要在程序中运用字符串转换成Schema模型 * 然后通过createDataFrame将RDD和模型关联,再通过registerTempTable注册表名 * * @param sqlContext * @param sparkContext * @param path */ def sparkSQLReadTxt(sqlContext: SQLContext, sparkContext: SparkContext, path: String): Unit = { val schemaString = "name age" val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true))) val people = sparkContext.textFile(path).map(_.split(",")).map(p => Row(p(0), p(1).trim)) val peopleDataFrame = sqlContext.createDataFrame(people, schema) peopleDataFrame.registerTempTable("Person") val results = sqlContext.sql("SELECT name, age FROM Person WHERE age >= 13") results.map(t => "Name: " + t(0)).collect().foreach(println) } /** * Inferring the Schema Using Reflection(使用反射推断模式) 使用这种方式需要先定义好schema的模式然后使用toDF函数转换成DataFrame * 然后运用 registerTempTable注册零时表名 * * @param sqlContext * @param sparkContext * @param path */ def sparkSQLInferrSchemaReadTxt(sqlContext: SQLContext, sparkContext: SparkContext, path: String): Unit = { import sqlContext.implicits._ val peoples = sparkContext.textFile(path).map(_.split(",")).map(people => Person(people(0), people(1))).toDF() peoples.registerTempTable("people") val result = sqlContext.sql("select * from people") result.collect().foreach(println) } /** * 读取json中数据 将数据写入parquet中 * * @param sqlContext * @param parquetPath * @param jsonPath */ def sparkSqlWriteParquet(sqlContext: SQLContext, parquetPath: String, jsonPath: String): Unit = { import sqlContext.implicits._ val dataFrame = sqlContext.read.json(jsonPath) dataFrame.select("name", "age").write.format("parquet").save(parquetPath) } /** * sparkSQL读取 Parquet数据 * * @param sqlContext * @param path */ def sparkSQLReadParquet(sqlContext: SQLContext, path: String): Unit = { val parquetData = sqlContext.read.parquet(path) parquetData.registerTempTable("parquetData") val result = sqlContext.sql("select * from parquetData") result.foreach(println) }}
0 0
- Spark SQL 常用操作
- Spark编程常用操作
- spark sql 操作mysql
- spark sql DataFrame操作
- spark sql dataframe操作
- Spark SQL 函数操作
- Spark SQL 函数操作
- Spark-SQL DataFrame操作
- spark sql 入门操作
- Kakfka-Spark Streaming-Spark SQL操作笔记
- java spark(spark sql)操作cassandar
- Spark常用RDD操作汇总
- spark/hdfs..操作命令,常用
- spark RDD常用函数/操作
- Spark SQL系列------2. Spark SQL Aggregate操作的实现
- Spark SQL概述,DataFrames,创建DataFrames的案例,DataFrame常用操作(DSL风格语法),sql风格语法
- my sql常用操作
- Sql 常用表操作
- 二叉树的编号
- 抽象工厂
- 要啥自行车,共享单车都来了
- iOS--音频播放、录音、视频播放、拍照、视频录制
- 不错的IT学习网站跟论坛
- Spark SQL 常用操作
- 腾讯2017暑期实习生编程题-有趣的数字
- PAT算法笔记(八)————有几个PAT*
- Windows 安装 Python2.7.9全解,包含错误:2503
- 凹凸多边形的拆分
- 修 改自己的电 脑动 态I.P
- sscanf函数
- git常用命令
- leetcode_3_Longest_Substring_Without_Repeating_Characters