pyspark执行hivesqlstr

来源：互联网发布：苹果微信网络出错1202 编辑：程序博客网时间：2024/06/06 01:48

spark执行hivesqlstr

法1：

spark = SparkSession \

.builder \

.appName("app_level") \

.enableHiveSupport() \

.getOrCreate()

spark.sql(sql)

法2：

os.system("export PYSPARK_PYTHON=python2.7.5")

appName = "VL_caltion"

mode = "yarn-client"

conf =SparkConf().setAppName(appName).setMaster("yarn-client")

sc = SparkContext(conf=conf)

hiveContext= HiveContext(sc)

self.result=sku_result.join(dept_result,["dc_id","seller_code","dept_code"]).coalesce(10)\

.select("seller_code","dept_code","sku_code","dc_id",F.when(sku_result.amt<10,dept_result.vl_mean_dept).otherwise(sku_result.vl_mean).alias("vl_mean"),

F.when(sku_result.amt<10,dept_result.vl_std_dept).otherwise(sku_result.vl_std).alias("vl_std"),

col("amt").alias("sku_order_count"),col("amt_dept").alias("dept_order_count"),

F.when(sku_result.amt<10,2).otherwise(1).alias("dim_type")).withColumn("dt",lit(endDate)).coalesce(10)

hiveContext.sql("set hive.exec.dynamic.partition.mode=nonstrict;set hive.exec.dynamic.partition=true;")

hiveContext.sql("alter table app.app_ivlt drop if existspartition(dt='"+endDate+"')")

self.result.write.mode("append").insertInto("app.app_ivlt ")

阅读全文

0 0