sparksql语法,通过编程方式读txt

来源:互联网 发布:qq远程监控软件 编辑:程序博客网 时间:2024/06/03 17:26
Programmatically Specifying the Schema(以编程的方式指定schema)scala> val people = sc.textFile("hdfs://node1:8020/test/input/people.txt")people: org.apache.spark.rdd.RDD[String] = MapPartitionsRDD[1] at textFile at <console>:21scala> val schemaString = "name age"schemaString: String = name agescala> import org.apache.spark.sql.types.{StructType,StructField,StringType};import org.apache.spark.sql.types.{StructType, StructField, StringType}scala> import org.apache.spark.sql.Row;import org.apache.spark.sql.Rowscala> val schema = StructType(schemaString.split(" ").map(fieldName => StructField(fieldName, StringType, true)))schema: org.apache.spark.sql.types.StructType = StructType(StructField(name,StringType,true), StructField(age,StringType,true))scala> val rowRDD = people.map(_.split(",")).map(p => Row(p(0), p(1).trim))rowRDD: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[3] at map at <console>:25scala> val peopleDataFrame = sqlContext.createDataFrame(rowRDD, schema)peopleDataFrame: org.apache.spark.sql.DataFrame = [name: string, age: string]scala> peopleDataFrame.registerTempTable("people")scala> val results = sqlContext.sql("SELECT name FROM people")15/12/15 09:46:17 INFO parse.ParseDriver: Parsing command: SELECT name FROM people15/12/15 09:46:18 INFO parse.ParseDriver: Parse Completedresults: org.apache.spark.sql.DataFrame = [name: string]scala> results.map(t => "Name: " + t(0)).collect().foreach(println)Name: MichaelName: AndyName: Justin

0 0
原创粉丝点击