
来源:互联网 发布:java api安卓版 编辑:程序博客网 时间:2024/06/08 00:59



case class StructField(    name: String,    dataType: DataType,    nullable: Boolean = true,    metadata: Metadata = Metadata.empty) {}

-----A field inside a StructType
name:The name of this field.
dataType:The data type of this field.
nullable:Indicates if values of this field can be null values.
metadataThe metadata of this field. The metadata should be preserved during transformation if the content of the column is not modified, e.g, in selection.

一个结构体内部的 一个StructField就像一个SQL中的一个字段一样,它包含了這个字段的具体信息,可以看如下列子:
def schema_StructField()={/**  * StructField 是 一个 case class ,其中是否可以为空,默认是 true,初始元信息是为空  * 它是作为描述 StructType中的一个字段  */val sf = new StructField("b",IntegerType)println({}}

A StructType object can be constructed by
StructType(fields: Seq[StructField])
在源码中structType是一个case class,如下:
case class StructType(fields: Array[StructField]) extends DataType with Seq[StructField] {}
它是继承Seq的,也就是说Seq的操作,它都拥有,但是从形式上来说,每个元素是用  StructField包住的。
package Datasetimport org.apache.spark.sql.types._/**  * Created by root on 9/21/16.  */object schemaAnalysis {  //--------------------------------------------------StructType analysis---------------------------------------  val struct = StructType(    StructField("a", IntegerType) ::      StructField("b", LongType, false) ::      StructField("c", BooleanType, false) :: Nil)  def schema_StructType()={    /**      * 一个scheme是      */    import org.apache.spark.sql.types.StructType    val schemaTyped = new StructType()      .add("a","int").add("b","string")    schemaTyped.foreach(println)    /**      * StructField(a,IntegerType,true)      * StructField(b,StringType,true)      */  }  def structType_extracted()={    // Extract a single StructField.    val singleField_a = struct("a")    println(singleField_a)    //省却的清空下表示:可以为空的,    //StructField(a,IntegerType,true)    val singleField_b = struct("b")    println(singleField_b)    //StructField(b,LongType,false)    //val nonExisting = struct("d")    //println(nonExisting)    //java.lang.IllegalArgumentException: Field "d" does not exist.    // Extract multiple StructFields. Field names are provided in a set.    // A StructType object will be returned.    val twoFields = struct(Set("b", "c"))    println(twoFields)    //StructType(StructField(b,LongType,false), StructField(c,BooleanType,false))    // Any names without matching fields will be ignored.    // For the case shown below, "d" will be ignored and    // it is treated as struct(Set("b", "c")).    val ignoreNonExisting = struct(Set("b", "c", "d"))    println(ignoreNonExisting)    // ignoreNonExisting: StructType =    //   StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false)))    //值得注意的是:当没有存在的字段的时候,官方文档说:单个返回的是null,多个返回的是当没有那个字段    //但是实验的时候,报错---Field d does not exist    //源码调用的是apply方法,确实还没有处理好这部分功能    //我是用的是spark2.0初始版本  }  def structType_opration()={    /**      * 源码:case class StructType(fields: Array[StructField]) extends DataType with Seq[StructField] {      * 它是继承与Seq的,也就是说 Seq的操作,StructType都有      * 可以查看scala的Seq的操作:      */    val tmpStruct = StructType(StructField("d", IntegerType)::Nil)    //集合与集合的操作    println(struct++tmpStruct)    // println(struct++:tmpStruct)    //List(StructField(a,IntegerType,true), StructField(b,LongType,false), StructField(c,BooleanType,false), StructField(d,IntegerType,true))    //集合与元素的操作    println(struct :+ StructField("d", IntegerType))    //可以用add来进行    println(struct.add("e",IntegerType))    //StructType(StructField(a,IntegerType,true), StructField(b,LongType,false), StructField(c,BooleanType,false), StructField(e,IntegerType,true))    //head 部分的元素    println(struct.head)    //StructField(a,IntegerType,true)    //last 部分的元素    println(struct.last)    //StructField(c,BooleanType,false)    println(struct.apply("a"))    //StructField(a,IntegerType,true)    println(struct.treeString)    /**      * root       |-- a: integer (nullable = true)       |-- b: long (nullable = false)       |-- c: boolean (nullable = false)      */    println(struct.contains(StructField("f", IntegerType)))    //false    println(struct.mkString)    //StructField(a,IntegerType,true)StructField(b,LongType,false)StructField(c,BooleanType,false)    println(struct.prettyJson)    /**      * {          "type" : "struct",          "fields" : [ {            "name" : "a",            "type" : "integer",            "nullable" : true,            "metadata" : { }          }, {            "name" : "b",            "type" : "long",            "nullable" : false,            "metadata" : { }          }, {            "name" : "c",            "type" : "boolean",            "nullable" : false,            "metadata" : { }          } ]        }      */    //更多操作可以查看API:  }  def main(args: Array[String]) {    //schema_StructType()    //structType_extracted()    structType_opration()  }}


def schema_op()={  case class Person(name: String, age: Long)  val sparkSession = SparkSession.builder().appName("data set example")    .master("local").getOrCreate()  import sparkSession.implicits._  val rdd = sparkSession.sparkContext.textFile("hdfs://master:9000/src/main/resources/people.txt")  val dataSet =",")).map(p =>Person(p(0),p(1).trim.toLong)).toDS()  println(dataSet.schema)  //StructType(StructField(name,StringType,true), StructField(age,LongType,false))  /**    * def schema: StructType = queryExecution.analyzed.schema    *    * def apply(name: String): StructField = {    * nameToField.getOrElse(name,    * throw new IllegalArgumentException(s"""Field "$name" does not exist."""))    * }    */  val tmp: StructField = dataSet.schema("name")  println(tmp)  //StructField(name,StringType,true)  println(  println(tmp.dataType)//StringType  println(tmp.nullable)//true  println(tmp.metadata)//{}

0 0