Hive 基本操作

来源:互联网 发布:docking软件 编辑:程序博客网 时间:2024/05/22 05:10

需求

假设一批文件,内容格式

001,192.168.1.101

002,198.135.1.236

002,156.124.138.9

.......

其中第一列代表用户uid,第二列代表用户登陆ip。要对这些文件进行分析,计算共有多少人登陆,在一个ip下共有几人登陆,等等

hive解决

如果你不建立数据库,那么你的表放在默认数据库default中,你加载的数据在hdfs中位置为  user/hadoop(这里是你的用户名)/warehouse/login/目录下


1,建立表

create table login(uid string,ip string) partitioned by (dt string) row format delimited fields terminated by ',' stored as textfile;


2 ,加载数据


load data local inputh '/home/hadoop/logs/*' overwrite into table login partition (dt='20141219');


3.查询数据


 select count(distinct uid) form login where dt=‘20141219’; (mapreduce 作用开始运行)



hive基本操作

1 静音模式,可以不进入交互模式直接操作,也没有日志输出到控制台

  hive -S -e 'select uid from login' > /home/hadoop/result.csv;

2,不进入交互模式执行 hive script

  hive -f /home/hadoop/hive-script.sql

3 hive 中执行dfs操作

  hive> dfs  -ls /

4 创建一般表

   create talbe page_view(viewTime int,params map<string,string>,userid bigint, page_url string,ip string comment 'IP Address') comment 'this is a page_view table' partitioned by(dt string,country string) row format delimited fileds terminated by '\001' collectionitems terminated by '\002' map keys terminated by '\003' stored as textfile;

5 删除表

drop table if exists table_name;

6 添加分区

alter table table_name add partition(dt=‘20141220’) location '/usr/hadoop/warehouse/talbe_name/dt=20141220;

7 删除分区

alter table login drop if exists partition (dt='20141220');

8 修改分区

alter table login partition(dt="20141220") set locaction '/hadoop/';

alter table login partition(dt="20101220") rename to partition(dt='20141221');

9 添加列

alter table table_name add columns(name string);//位置所有列之后,分区之前

10 修改lie

alter table login change uid usid int;

alter table login change uid usid string after ip

alter table login change uid usid string first

11 修改表属性

alter table table_name set tblproperties('external'='true');内转外

alter table table_name set tblproperties('external'='false');外传内


12 查出a表数据添加到b表

insert overwrite table login_user select distinct uid form login;

insert overwrite table lgoin_user patition (dt="20141221") select distinct uid from login;


13 从一张表中查出数据,分存到多张表中

form userinfo insert overwrite table id_table select id insert overwrite table num select count(distinct id)

,name group by name;


14.map

192.168.1.101,1001,z:123|q:4565|w:569

load data local inpath '/hadoop/loginmap.txt' overwrite into table log_map partition (dt="2012456")

select ip,uid from login_map where dt='2012446' wherw array_contains(map_keys(gameinfo),'wow');

15 struct

192.168.1.101,566488845|blue

create table login_struct(ip string,user stuct<uid:bigint,name:string>) partitioned by (dt=string) row format delimited filelds terminated by',' collectionitems termimated by'|' stored as textfile;

load date local inpath '/hadoop/log-struct.txt' overwrite into talbe login_struct;

slelect ip, user.uid from login_struct;


16 嵌套复合类型

creat table login_game_complex(ip string,uid string,gameinfo map<bigint,struct<name:string,score:bigint,level string>>) partition by (dt string) row format delimited stored as textfile.

对于这种表插入数据时分隔符不好设计,我们采用先把数据插入到简单表,然后再把数据插入到复合表


创建简单表

create table login_game_simple(ip string ,uid string ,gameid bigint, gamename string ,gamescore bigint,gamelevel string) partition by (dt string ) row formate delimited fileds terminated by ',' stored as textfile;

数据

192.168.1.101,001,124,'wow',54,3

load data local inpath '/hadoop/user/' overwrite into tabel login_game_simple parttition (dt='20141212');

插入数据

insert overwrite table login_game_complex partition (dt='20140213') select ip ,uid ,map(gameid

,named_struct('name',gamename,'score',gamescore,'level',gamelevel)) from login_game_simple where dt="20141212";


17 RegexSerDe

add jar /home/hive/lib/hive_contrib.jar

create table test_serde(co string ,c1 string ,c2 string) row format serde'org.apache.hadoop.hive.contrib.serde2 RegexSerDe' with serdeproperties ('input regex'='([^]*)([^]*)([^]*)'),'output format string'='%1$s %2$s %3$s') stored as textfile; //次正则表示已空格作为分隔符

数据  123 456 789

load data local inpath '/hadop/hh/' overwrite into tabel test_serde;











































 




0 0
原创粉丝点击