一个稍微复杂点的hive sql

来源:互联网 发布:礼仪卡片制作软件 编辑:程序博客网 时间:2024/05/16 06:17
with abc as(select substring(a.pay_time,1,10) dt,((if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ (if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))pay_fee,a.xxx_id as uidfrom order_all a inner join oobi_trade b on a.order_id = b.order_idwhere (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')union allselect substring(a.pay_time,1,10) dt,(b.pay_fee*1.00/100) pay_fee,a.uidfrom unidied_orders a inner join payments b on a.order_id=b.order_idwhere pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01'))select *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 1 as dt, count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 2 as dt, count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 3 as dt, count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 4 as dt, count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30from abc) a;


以上查询数据量太大,hive跑不起来,优化之后的查询如下:


with abc as(select substring(a.pay_time,1,10) dt,(sum(if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ sum(if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))pay_fee,a.user_id as uidfrom order_all a inner join oobi_trade b on a.order_id = b.order_idwhere (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')group by substring(a.pay_time,1,10), a.user_idunion allselect substring(a.pay_time,1,10) dt,sum(b.pay_fee*1.00/100) pay_fee,a.uidfrom unidied_orders a inner join payments b on a.order_id=b.order_idwhere pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-03-01')group by substring(a.pay_time,1,10), a.uid) select '2016-03-01' as dd, count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-02' as dd, count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-03' as dd,count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-04' as dd, count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30from abc;

最后改进:

with t0 as (select substring(pay_time,1,10) dt,user_id,sum(b.xxbi_consume+b.giftcard_consume) pay_feefrom order_all a inner join xxbi_trade b on a.order_id = b.order_id where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'group by substring(pay_time,1,10),user_idunion allselect substring(a.pay_time,1,10) dt,a.uid user_id,sum(b.pay_fee) pay_feefrom unified_orders a inner join payments b on a.order_id=b.order_idwhere pay_type in ('WXAPP','WXWAP')and substring(b.pay_time,1,10)>='2016-04-17' and substring(b.pay_time,1,10)<='2016-04-30'group by substring(a.pay_time,1,10),a.uid),t1 as (select distinct substring(pay_time,1,10) dt from order_all where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'),t2 as (select dt,user_id,sum(pay_fee) pay_fee from t0 group by dt,user_id),t3 as (select t2.*,t1.dt zdt from t1 join t2 on 1=1)select dt,sum(case when (datediff(dt,zdt)=0) then pay_fee end) pay,count(distinct case when (datediff(dt,zdt)=0) then user_id end) user_cnt,sum(case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then pay_fee end ) pay_seven,count(distinct case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then user_id end) user_sevenfrom t3 where dt >= '2016-04-24'and dt<= '2016-04-30' group by dt;






0 0
原创粉丝点击