spark parquet 从hdfs 上读 和写 scala 版本

来源:互联网 发布:linux运维面试会问什么 编辑:程序博客网 时间:2024/06/05 00:28
import org.apache.spark.SparkConfimport org.apache.spark.SparkContextimport org.apache.spark.sql.SQLContextimport org.apache.spark.sql.DataFrameimport org.apache.spark.sql.SaveModeobject GenericLoadSave {    def main(args: Array[String]): Unit = {    val conf = new SparkConf()        .setAppName("GenericLoadSave")        .setMaster("local")    val sc = new SparkContext(conf)    val sqlContext = new SQLContext(sc)      //读取一个parquet文件    val usersDF = sqlContext.read.format("parquet").load("hdfs://hadoop1:9000/input/users.parquet")    usersDF.write.mode(SaveMode.Overwrite).format("parquet").save("hdfs://hadoop1:9000/output/namesAndFavColors_scala")          val tDF = sqlContext.read.format("parquet").load("hdfs://hadoop1:9000/output/namesAndFavColors_scala")    tDF.show()      }}

0 0
原创粉丝点击