hive数据库基础学习

来源:互联网 发布:金相分析软件下载 编辑:程序博客网 时间:2024/06/06 02:44
which hive
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查找mysql中的内容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
 select * from DBS \G;
exit
ssh bigdata001
//创建自己的数据库
create databases luolin;
use luolin;
//创建一个citys表
CREATE TABLE citys(
  province_code INT,
  province_name string,
  city_code INT,
  city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载需要处理的数据文件
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基础回顾
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;

select distinct province_name from citys;

select count(*) from (  select province_name from citys group by province_name ) a;

select count(distinct province_name) from citys;
//创建一个city_ex表
CREATE TABLE city_ex(  
province_code int,
province_name string,
city_code int,  
city_name string
)
ROW FORMAT DELIMITED  FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//检查四川的数据
select * from city_ex where province_name = '四川';
// 创建一个user表
CREATE TABLE user(
  uid INT,
  city_code INT,
  model string,
  access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//加载远程数据
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//创建分区表
CREATE TABLE user_daily(
  uid INT,
  city_code INT,
  model string,
  access string
)
partitioned by (p_date string);
//插入分区
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01')  SELECT * FROM user;
//显示分区
show partitions user_daily;
//动态插入分区
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分区
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//删除分区
alter table user_daily drop partition ( p_date = '2017-09-02');
//作业解答
select * from user where access='WIFI';

select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;


select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;

select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各占用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段号的分组统计
select
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
    end as interval,
    count(*) as cnt
  from user
  group by
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
  end;
//去重
select collect_set(access) from user;
//统计user表city_code最多的4个城市的access分布情况,存储为map
select city_code,count(*) as cnt,
  map(
    'WIFI',sum(if(access='WIFI',1,0)),
    '2G',sum(if(access='2G',1,0)),
    '3G',sum(if(access='3G',1,0)),
    '4G',sum(if(access='4G',1,0))
  ) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左连接 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右连接 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//内连接inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全连接 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//窗口函数
//分组topN
select access,city_code, uid
from
(
  select uid, access, city_code,
  row_number() over (partition by access order by city_code desc) as row_num
  from user
) a
where row_num = 1;
//累计和
select p_date,
  sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
  select p_date, count(*) as cnt
  from user_daily   
  where p_date between '2017-09-01' and '2017-09-30'
  group by p_date
)a
;

原创粉丝点击