pyspark执行hivesqlstr

来源:互联网 发布:苹果微信网络出错1202 编辑:程序博客网 时间:2024/06/06 01:48

spark执行hivesqlstr

法1:

spark = SparkSession \

   .builder \

   .appName("app_level") \

   .enableHiveSupport() \

.getOrCreate()

 

spark.sql(sql)

 

法2:

os.system("export PYSPARK_PYTHON=python2.7.5")

appName = "VL_caltion"

mode = "yarn-client"

conf =SparkConf().setAppName(appName).setMaster("yarn-client")

sc = SparkContext(conf=conf)

hiveContext= HiveContext(sc)

 

self.result=sku_result.join(dept_result,["dc_id","seller_code","dept_code"]).coalesce(10)\

               .select("seller_code","dept_code","sku_code","dc_id",F.when(sku_result.amt<10,dept_result.vl_mean_dept).otherwise(sku_result.vl_mean).alias("vl_mean"),

                       F.when(sku_result.amt<10,dept_result.vl_std_dept).otherwise(sku_result.vl_std).alias("vl_std"),

                       col("amt").alias("sku_order_count"),col("amt_dept").alias("dept_order_count"),

                       F.when(sku_result.amt<10,2).otherwise(1).alias("dim_type")).withColumn("dt",lit(endDate)).coalesce(10)

 

 

hiveContext.sql("set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.dynamic.partition=true;")

           hiveContext.sql("alter table app.app_ivlt drop if existspartition(dt='"+endDate+"')")

           self.result.write.mode("append").insertInto("app.app_ivlt ")

原创粉丝点击