sparksql 简单使用
来源:互联网 发布:阿里云服务器建站 编辑:程序博客网 时间:2024/06/06 07:10
标记一个博客怕找不到了:http://www.cnblogs.com/shishanyuan/p/4723713.html
第一种:
通过case class
package cn.lijieimport org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContext/** * Created by jie on 2017/7/31. */object SparkSql01 { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) System.setProperty("user.name","bigdata") val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt").map{ x =>{ val split = x.split(",") Game(split(0).toLong,split(1),split(2).toInt) }} import sqlContext.implicits._ val df = rdd.toDF df.registerTempTable("t_game") sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json")val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json") newDf.show }}case class Game(id:Long,name:String,level:Int)
第二种:
通过StructType
package cn.lijieimport org.apache.spark.sql.{Row, SQLContext}import org.apache.spark.sql.types._import org.apache.spark.{SparkConf, SparkContext}/** * Created by jie on 2017/7/31. */object SparkSql01 { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]") System.setProperty("user.name","bigdata") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt") val schema = StructType( List( StructField("id", LongType, true), StructField("name", StringType, true), StructField("age", IntegerType, true) ) ) val rowRDD = rdd.map(x => { Row(x(0).toLong, x(1), x(2).toInt) }) val df = sqlContext.createDataFrame(rowRDD, schema) df.registerTempTable("t_game") sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json") val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json") newDf.show }}case class Game(id: Long, name: String, level: Int)
pom
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>spark-sparksql</groupId> <artifactId>spark-sql</artifactId> <version>1.0-SNAPSHOT</version> <properties> <maven.compiler.source>1.7</maven.compiler.source> <maven.compiler.target>1.7</maven.compiler.target> <encoding>UTF-8</encoding> <scala.version>2.10.6</scala.version> <spark.version>1.6.1</spark.version> <hadoop.version>2.6.4</hadoop.version> </properties> <dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> </dependencies> <build> <sourceDirectory>src/main/scala</sourceDirectory> <testSourceDirectory>src/test/scala</testSourceDirectory> <plugins> <plugin> <groupId>net.alchim31.maven</groupId> <artifactId>scala-maven-plugin</artifactId> <version>3.2.2</version> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> <configuration> <args> <arg>-make:transitive</arg> <arg>-dependencyfile</arg> <arg>${project.build.directory}/.scala_dependencies</arg> </args> </configuration> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>2.4.3</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <filters> <filter> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <mainClass>cn.lijie.SparkSql01</mainClass> </transformer> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build></project>
阅读全文
0 0
- sparksql 简单使用
- SparkSQL使用的简单例子
- SparkSql 使用
- sparkSQL使用
- SparkSQL简单测试
- SparkSQL 简单测试
- SparkSql 函数的使用
- sparksql的基本使用
- 一个简单的sparkSQL案例
- sparkSQL官网简单解析
- sparkSQL
- SparkSQL
- SparkSQL
- SparkSQL
- spark1.1.0下使用SparkSQL
- SparkSQL配置和使用初探
- SparkSQL 使用jdbcRDD访问数据库
- sparkSQL中UDF的使用
- Mongodb与Elasticsearch实时同步
- android高级组件(2)AutoCompleteTextView,MutiAutoCompleteTextView,Spinner,ListView
- 图像处理18:透视变换
- Angular 实现类似博客评论的递归显示
- Vue之路之--Vue实例的一些理解(未完待续)
- sparksql 简单使用
- LiME + volatility2.4进行内存读取
- Android Pair记录
- Android关于中文url编码类似于%E5%8F%8C%E5%AD%90%E5%BA%A7
- poj 1033 Defragment 模拟+递归
- Neutron总结-Firewall as a Service(FWaaS)
- python爬虫学习第七天
- 第一章 实用R内置数据进行数据处理--笔记
- HDU-1848-组合博弈