hive ETL之业绩报表sql
来源:互联网 发布:t95e6数据 编辑:程序博客网 时间:2024/06/06 03:34
-- case4 ----========== rates ==========--app0 1app1 2app2 2app3 3app4 3app5 3app6 5app7 5app8 5app9 5CREATE EXTERNAL TABLE rates ( app_name STRING , star_rates STRING)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'LOCATION '/tmp/db/rates';create table app_ranks as select app_name as app , star_rates as stars , NTILE(3) OVER (ORDER BY star_rates DESC) as nt , row_number() OVER (ORDER BY star_rates DESC) as rn , rank() OVER (ORDER BY star_rates DESC) as rk , dense_rank() OVER (ORDER BY star_rates DESC) as drk , CUME_DIST() OVER (ORDER BY star_rates) as cd , PERCENT_RANK() OVER (ORDER BY star_rates) as pr from rates order by stars desc;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) from app_ranks;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) from app_ranks;select app, stars, cd, sum(cd) OVER (PARTITION BY stars ORDER BY rn ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) from app_ranks;select app, stars, rn, lead(rn) OVER (PARTITION BY stars ORDER BY rn), lag(rn) OVER (PARTITION BY stars ORDER BY rn) from app_ranks;--========== visitors ==========--d001 201301 101d002 201301 102d003 201301 103d001 201302 111d002 201302 112d003 201302 113d001 201303 121d002 201303 122d003 201303 123d001 201304 131d002 201304 132d003 201304 133d001 201305 141d002 201305 142d003 201305 143d001 201306 151d002 201306 152d003 201306 153d001 201307 201d002 201307 202d003 201307 203d001 201308 211d002 201308 212d003 201308 213d001 201309 221d002 201309 222d003 201309 223d001 201310 231d002 201310 232d003 201310 233d001 201311 241d002 201311 242d003 201311 243d001 201312 301d002 201312 302d003 201312 303d001 201401 301d002 201401 302d003 201401 303d001 201402 211d002 201402 212d003 201402 213d001 201403 271d002 201403 272d003 201403 273d001 201404 331d002 201404 332d003 201404 333d001 201405 351d002 201405 352d003 201405 353CREATE EXTERNAL TABLE visitors ( domain STRING , month STRING , visitor STRING)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'LOCATION '/tmp/db/visitors';select * from visitors where domain = 'd001';select domain , month , visitor , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC) , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC) , lead(visitor) OVER (PARTITION BY domain ORDER BY month DESC) , lag(visitor) OVER (PARTITION BY domain ORDER BY month DESC)from visitorswhere domain = 'd001';select domain , month , visitor , first_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC) , last_value(visitor) OVER (PARTITION BY domain ORDER BY month DESC) , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC) , lag(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC) , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC) , lag(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC)from visitorswhere domain = 'd001';create table visitors_report as select domain , month , visitor , lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC) as last_mon , visitor - lead(visitor, 1, 0) OVER (PARTITION BY domain ORDER BY month DESC) as delta_mon , lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC) as last_year , visitor - lead(visitor, 12, 0) OVER (PARTITION BY domain ORDER BY month DESC) as delta_year from visitors;select * from visitors_report where domain = 'd001' and month > '2014';select month , domain , visitor , last_mon , last_yearfrom visitors_reportwhere (domain = 'd001' or domain = 'd002') and month > '2014'order by month desc, domain asclimit 100;select month , domain , visitor , max(visitor) OVER (PARTITION BY month) as max_visitors , min(visitor) OVER (PARTITION BY month) as min_visitorsfrom visitorswhere month > '2014'order by month desc, domain asc;select *from (select month , domain , visitor , max(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as max_visitors_last_12_mon , min(visitor) OVER (PARTITION BY domain ORDER BY month DESC ROWS BETWEEN CURRENT ROW AND 12 FOLLOWING) as min_visitors_last_12_monfrom visitors) vwhere month > '20131'order by month desc, domain asc;
本文出自 “点滴积累” 博客,请务必保留此出处http://tianxingzhe.blog.51cto.com/3390077/1717581
0 0
- hive ETL之业绩报表sql
- hive ETL之物流行业-订单跟踪SLA sql
- hive ETL之广告行业-用户行为归类sql
- hive ETL之电商零售行业-推荐系统sql
- HIVE数仓ETL之MongoDB
- sql 显示当日业绩和累计业绩
- ETL之三 MS SQL DTS
- Hive入门 准备用于ETL
- ETL学习笔记之三:MS SQL DTS
- ETL学习笔记之三:MS SQL DTS
- ETL学习之四:SQL Server Integration Services入门
- ETL学习笔记之三:MS SQL DTS
- ETL学习笔记之三:MS SQL DTS
- SQL报表之行列变换
- SQL报表之户籍管理实例
- HappyBI(报表/ETL/打印)实践网应用
- ETL技术入门之ETL初认识
- ETL技术入门之ETL初认识
- hive与hbase整合
- hive ETL之物流行业-订单跟踪SLA sql
- 碾压【路径问题】这条小臭虫!
- hive ETL之广告行业-用户行为归类sql
- hive ETL之电商零售行业-推荐系统sql
- hive ETL之业绩报表sql
- hbase REST API
- hbase手动compact与split
- spark访问hbase
- spark Sql
- 匿名函数和闭包
- crontab/cron详解
- java动态代理实现Proxy和InvocationHandler cglib
- Java String.intern()方法学习