hive基本操作2

来源:互联网 发布:免费海关数据查询 编辑:程序博客网 时间:2024/06/15 18:41

//a) 统计WIFI环境下用户最多的5个城市
select city_code,count(access) as cnt from user where access=’WIFI’ group by city_code order by cnt desc limit 5;

//b) 统计用户数>=3的access和city_code组合
select collect_set(uid),count(1) as ct from user group by access,city_code having ct>=3;
//或者
select access,city_code,count(1) as cnt from user
group by access,city_code having cnt >= 3 order by cnt desc;

//c) 机型中带有ne(不区分大小写),不同access及对应用户数
select access,count(1) as cnt from user where lower(model) like “%ne%” group by access order by cnt desc;


//找尾数 分别是0-3,4-7,8-9的uid数量
select
case
when uid % 10 in (0, 1, 2, 3) then ‘0-3’
when uid % 10 in (4, 5, 6, 7) then ‘4-7’
else ‘8-9’
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0, 1, 2, 3) then ‘0-3’
when uid % 10 in (4, 5, 6, 7) then ‘4-7’
else ‘8-9’
end;


//查看占有率
select sum(if(access=’2G’,1,0))/count(1) from user;

//去重 显示集合
select collect_set(access) from user;

//不去重 显示集合
select collect_list(access) from user;


//左连接 以左边的表为主
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
left join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;

//右连接 以右边的表为主
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
right join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;

//内连接 求的并集 都有值
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
inner join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;

//全连接 求的交集 求的公共部分 (两边都有可能为空)
select user.uid, user.city_code, city.city_name
from
(select * from user where uid <= 100) user
full join
(select * from city where province_code <= 30) city
on (user.city_code = city.city_code)
limit 20;


select access,city_code, uid
from
(
select uid, access, city_code,
row_number() over (partition by access order by city_code desc) as row_num
from user
) a
where row_num = 1;

原创粉丝点击