hive sql解决关联条件中不等值连接问题及累计值的计算

来源:互联网 发布:淘宝贷款怎么回事 编辑:程序博客网 时间:2024/04/27 08:51

工作中写的sql,本次的sql可以拓展出一些新内容,留下笔记

with a as (select  interface,case when action is NUll then 'null' else action end as action, request_time*1000 as request_time    ,count(request_time) as pvfrom xx_xxwhere ds >= %YYYYMMDD% || '00' and ds <= %YYYYMMDD% || '23'group by  interface,action,request_time * 1000),b as ( select  a1.interface ,a1.action,a1.request_time,a1.pv ,sum(case when  a1.request_time>= a2.request_time then a2.pv end) as sum_cnt    --解决hive sql关联条件中不等值连接问题 from   a a1 join   a a2on a1.interface = a2.interface and   a1.action = a2.action   group by  a1.interface ,a1.action,a1.request_time,a1.pv ),c as (select   b.interface  ,b.action  ,case when b.sum_cnt / t.total_pv >=0.90 then b.request_time end as geo_time_90  ,case when b.sum_cnt / t.total_pv >=0.95 then b.request_time end as geo_time_95  ,case when b.sum_cnt / t.total_pv >=0.99 then b.request_time end as geo_time_99from bjoin  (select interface,action,sum(pv) as total_pvfrom                 a             group by                   interface                          ,action)t  on b.interface = t.interfaceand b.action = t.action)select c.interface,c.action,min(geo_time_90) as geo_time_90,min(geo_time_95) as geo_time_95,min(geo_time_99) as geo_time_99from cgroup by c.interface,c.action