一个稍微复杂点的hive sql
来源:互联网 发布:礼仪卡片制作软件 编辑:程序博客网 时间:2024/05/16 06:17
with abc as(select substring(a.pay_time,1,10) dt,((if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ (if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))pay_fee,a.xxx_id as uidfrom order_all a inner join oobi_trade b on a.order_id = b.order_idwhere (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')union allselect substring(a.pay_time,1,10) dt,(b.pay_fee*1.00/100) pay_fee,a.uidfrom unidied_orders a inner join payments b on a.order_id=b.order_idwhere pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01'))select *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 1 as dt, count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 2 as dt, count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 3 as dt, count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30from abc) aunion allselect *, round((total_pay_fee/total_pay_users),2) as avg_pay,round((total_pay_fee_7/total_pay_users_7),2) as avg_pay_7,round((total_pay_fee_30/total_pay_users_30),2) as avg_pay_30from (select 4 as dt, count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30from abc) a;
以上查询数据量太大,hive跑不起来,优化之后的查询如下:
with abc as(select substring(a.pay_time,1,10) dt,(sum(if(oobi_consume>0,(oobi_consume*1.00/100), 0))+ sum(if(giftcard_consume>0,(giftcard_consume*1.00/100), 0)))pay_fee,a.user_id as uidfrom order_all a inner join oobi_trade b on a.order_id = b.order_idwhere (oobi_consume> 0 or giftcard_consume>0) and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-02-01')group by substring(a.pay_time,1,10), a.user_idunion allselect substring(a.pay_time,1,10) dt,sum(b.pay_fee*1.00/100) pay_fee,a.uidfrom unidied_orders a inner join payments b on a.order_id=b.order_idwhere pay_fee>0 and (substring(a.pay_time,1,10)<= '2016-03-04' and substring(a.pay_time,1,10)>= '2016-03-01')group by substring(a.pay_time,1,10), a.uid) select '2016-03-01' as dd, count(distinct if(pay_fee>0 and dt='2016-03-01', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-01', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-24', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-24', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-02', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-02', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-02' as dd, count(distinct if(pay_fee>0 and dt='2016-03-02', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-02', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-25', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-25', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-03', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-03', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-03' as dd,count(distinct if(pay_fee>0 and dt='2016-03-03', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-03', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-26', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-26', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-04', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-04', pay_fee, 0)) as total_pay_fee_30from abcunion allselect '2016-03-04' as dd, count(distinct if(pay_fee>0 and dt='2016-03-04', uid, 0)) as total_pay_users,sum(if(pay_fee>0 and dt='2016-03-04', pay_fee, 0)) as total_pay_fee,count(distinct if(pay_fee>0 and dt>='2016-02-27', uid, 0)) as total_pay_users_7,sum(if(pay_fee>0 and dt>='2016-02-27', pay_fee, 0)) as total_pay_fee_7,count(distinct if(pay_fee>0 and dt>='2016-02-05', uid, 0)) as total_pay_users_30,sum(if(pay_fee>0 and dt>='2016-02-05', pay_fee, 0)) as total_pay_fee_30from abc;
最后改进:
with t0 as (select substring(pay_time,1,10) dt,user_id,sum(b.xxbi_consume+b.giftcard_consume) pay_feefrom order_all a inner join xxbi_trade b on a.order_id = b.order_id where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'group by substring(pay_time,1,10),user_idunion allselect substring(a.pay_time,1,10) dt,a.uid user_id,sum(b.pay_fee) pay_feefrom unified_orders a inner join payments b on a.order_id=b.order_idwhere pay_type in ('WXAPP','WXWAP')and substring(b.pay_time,1,10)>='2016-04-17' and substring(b.pay_time,1,10)<='2016-04-30'group by substring(a.pay_time,1,10),a.uid),t1 as (select distinct substring(pay_time,1,10) dt from order_all where substring(pay_time,1,10)>='2016-04-17' and substring(pay_time,1,10)<='2016-04-30'),t2 as (select dt,user_id,sum(pay_fee) pay_fee from t0 group by dt,user_id),t3 as (select t2.*,t1.dt zdt from t1 join t2 on 1=1)select dt,sum(case when (datediff(dt,zdt)=0) then pay_fee end) pay,count(distinct case when (datediff(dt,zdt)=0) then user_id end) user_cnt,sum(case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then pay_fee end ) pay_seven,count(distinct case when datediff(dt,zdt)>0 and datediff(dt,zdt)<=7 then user_id end) user_sevenfrom t3 where dt >= '2016-04-24'and dt<= '2016-04-30' group by dt;
0 0
- 一个稍微复杂点的hive sql
- 一个稍微复杂的awk & sed应用
- Annotation(二) 一个稍微复杂的示例
- 一个稍微复杂的VTK程序
- 今天终于调好了一个稍微复杂点的程序,很值!
- java构建一个格式稍微复杂点的JSON对象附查看json格式的小工具
- 拆分一个稍微有点复杂的字符串,用c#实现
- 一个复杂的sql
- 一个“复杂的”SQL
- 使用with语句来写一个稍微复杂sql语句(经典,转载)
- SQL点滴10—使用with语句来写一个稍微复杂sql语句,附加和子查询的性能对比
- SQL点滴10—使用with语句来写一个稍微复杂sql语句,附加和子查询的性能对比
- cocos2dX Action 稍微复杂些的动作
- js做的稍微复杂地计算器
- Orleans稍微复杂的例子—互动
- Orleans稍微复杂的例子—互动
- 一个比较复杂的sql
- 一个复杂的SQL函数
- 百度登录算法分析和实现(下 - 实现篇)
- poj2685——Numeral System新进制系统
- Android.mk详解
- 关于文件上传踩过的坑multipart/form-data
- [Sencha ExtJS] 异步 JavaScript: Promises
- 一个稍微复杂点的hive sql
- 引导页_视差显示
- button贴图,点击事件带参数和不带参数连接
- 五一的任务
- mac的apache配置
- [iOS]修改UILabel的行间距/段间距/缩进
- Android ListView监听上下滑动(判断是否显示返回顶部按钮)
- nyoj_77 开灯问题
- @Repository、@Service、@Controller 和 @Component