sparksql 简单使用

来源:互联网 发布:阿里云服务器建站 编辑:程序博客网 时间:2024/06/06 07:10

标记一个博客怕找不到了:http://www.cnblogs.com/shishanyuan/p/4723713.html

第一种:
通过case class

package cn.lijieimport org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContext/**  * Created by jie on 2017/7/31.  */object SparkSql01 {  def main(args: Array[String]): Unit = {    val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]")    val sc = new SparkContext(conf)    val sqlContext = new SQLContext(sc)    System.setProperty("user.name","bigdata")    val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt").map{ x =>{      val split = x.split(",")      Game(split(0).toLong,split(1),split(2).toInt)    }}    import sqlContext.implicits._    val df = rdd.toDF    df.registerTempTable("t_game")    sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json")val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json")    newDf.show  }}case class Game(id:Long,name:String,level:Int)

第二种:
通过StructType

package cn.lijieimport org.apache.spark.sql.{Row, SQLContext}import org.apache.spark.sql.types._import org.apache.spark.{SparkConf, SparkContext}/**  * Created by jie on 2017/7/31.  */object SparkSql01 {  def main(args: Array[String]): Unit = {    val conf = new SparkConf().setAppName("sparksql").setMaster("local[2]")    System.setProperty("user.name","bigdata")    val sc = new SparkContext(conf)    val sqlContext = new SQLContext(sc)    val rdd = sc.textFile("C:\\Users\\jie\\Desktop\\game.txt")    val schema = StructType(      List(        StructField("id", LongType, true),        StructField("name", StringType, true),        StructField("age", IntegerType, true)      )    )    val rowRDD = rdd.map(x => {      Row(x(0).toLong, x(1), x(2).toInt)    })    val df = sqlContext.createDataFrame(rowRDD, schema)    df.registerTempTable("t_game")    sqlContext.sql("select * from t_game where id <= 2").write.json("C:\\Users\\jie\\Desktop\\game.json")    val newDf = sqlContext.load("C:\\Users\\jie\\Desktop\\game.json","json")    newDf.show  }}case class Game(id: Long, name: String, level: Int)

pom

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>spark-sparksql</groupId>    <artifactId>spark-sql</artifactId>    <version>1.0-SNAPSHOT</version>    <properties>        <maven.compiler.source>1.7</maven.compiler.source>        <maven.compiler.target>1.7</maven.compiler.target>        <encoding>UTF-8</encoding>        <scala.version>2.10.6</scala.version>        <spark.version>1.6.1</spark.version>        <hadoop.version>2.6.4</hadoop.version>    </properties>    <dependencies>        <dependency>            <groupId>org.scala-lang</groupId>            <artifactId>scala-library</artifactId>            <version>${scala.version}</version>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-core_2.10</artifactId>            <version>${spark.version}</version>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-sql_2.10</artifactId>            <version>${spark.version}</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-client</artifactId>            <version>${hadoop.version}</version>        </dependency>    </dependencies>    <build>        <sourceDirectory>src/main/scala</sourceDirectory>        <testSourceDirectory>src/test/scala</testSourceDirectory>        <plugins>            <plugin>                <groupId>net.alchim31.maven</groupId>                <artifactId>scala-maven-plugin</artifactId>                <version>3.2.2</version>                <executions>                    <execution>                        <goals>                            <goal>compile</goal>                            <goal>testCompile</goal>                        </goals>                        <configuration>                            <args>                                <arg>-make:transitive</arg>                                <arg>-dependencyfile</arg>                                <arg>${project.build.directory}/.scala_dependencies</arg>                            </args>                        </configuration>                    </execution>                </executions>            </plugin>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-shade-plugin</artifactId>                <version>2.4.3</version>                <executions>                    <execution>                        <phase>package</phase>                        <goals>                            <goal>shade</goal>                        </goals>                        <configuration>                            <filters>                                <filter>                                    <artifact>*:*</artifact>                                    <excludes>                                        <exclude>META-INF/*.SF</exclude>                                        <exclude>META-INF/*.DSA</exclude>                                        <exclude>META-INF/*.RSA</exclude>                                    </excludes>                                </filter>                            </filters>                            <transformers>                                <transformer                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">                                    <mainClass>cn.lijie.SparkSql01</mainClass>                                </transformer>                            </transformers>                        </configuration>                    </execution>                </executions>            </plugin>        </plugins>    </build></project>
原创粉丝点击