SQL中的DataFrame的两种创建方式。

来源:互联网 发布:西安行知中学张淑珍 编辑:程序博客网 时间:2024/06/15 19:47

动态创建:

package com.sparkproject.abc;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function;import org.apache.spark.sql.*;import org.apache.spark.sql.types.DataTypes;import org.apache.spark.sql.types.StructType;import java.util.Arrays;import java.util.UUID;public class DataFrame1 {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("DataFrame").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        JavaRDD<String> javaRDD = sc.textFile("");        StructType schema = DataTypes.createStructType(Arrays.asList(                DataTypes.createStructField("name",DataTypes.StringType,true),                DataTypes.createStructField("id",DataTypes.IntegerType,true),                DataTypes.createStructField("uuid",DataTypes.StringType,true)        ));        SQLContext sqlContext = new SQLContext(sc.sc());//        List<Row> rows = new ArrayList<Row>();        JavaRDD<Row> rowJavaRDD = javaRDD.map(new Function<String, Row>() {            @Override            public Row call(String v1) throws Exception {                Row row = RowFactory.create(v1.split(" ")[0],Integer.valueOf(v1.split(" ")[1]), UUID.randomUUID().toString());                return row;            }        });   //     Row row = RowFactory.create(javaRDD);        DataFrame df = sqlContext.createDataFrame(rowJavaRDD, schema);        df.registerTempTable("mes");        DataFrame df2 = sqlContext.sql("select * from mes");        df2.show();    }}

反射:

package com.sparkproject.abc;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function;import org.apache.spark.sql.DataFrame;import org.apache.spark.sql.SQLContext;import org.apache.spark.sql.api.java.UDF1;import org.apache.spark.sql.types.DataTypes;public class DataFrame2 {    public static void main(String[] args) {        SparkConf conf = new SparkConf().setAppName("DataFrame2").setMaster("local");        JavaSparkContext sc = new JavaSparkContext(conf);        SQLContext sqlContext = new SQLContext(sc);        JavaRDD<String> javaRDD = sc.textFile("");        JavaRDD<DataFrame2_Class> javaRDD1 = javaRDD.map(new Function<String, DataFrame2_Class>() {            @Override            public DataFrame2_Class call(String v1) throws Exception {                String[] vlVal = v1.split(" ");                DataFrame2_Class d1 = new DataFrame2_Class();                d1.setName(vlVal[0]);                d1.setAge(Integer.valueOf(vlVal[1]));                return d1;            }        });        DataFrame df = sqlContext.createDataFrame(javaRDD1,DataFrame2_Class.class);        df.registerTempTable("mess");        sqlContext.udf().register("strLength", new UDF1<String, Integer>() {            @Override            public Integer call(String s) throws Exception {                return s.length();            }        }, DataTypes.IntegerType);        sqlContext.sql("select name,strLength(name) from mess").show();        df.select("name").show();    }}


package com.sparkproject.abc;public class DataFrame2_Class {    private String name;    private int age;    public String getName() {        return name;    }    public void setName(String name) {        this.name = name;    }    public int getAge() {        return age;    }    public void setAge(int age) {        this.age = age;    }}