hiveQL详解

来源：互联网发布：秋水伊人地板知乎编辑：程序博客网时间：2024/04/19 13:21

一、创建表

1.创建内部表
//create & load
create table tab_ip(id int,name string,ip string,country string)
row format delimited
fields terminated by ‘,’
stored as textfile;
2、创建外部表
//external外部表（外部表不会再数据库目录下创建一个文件夹）
CREATE EXTERNAL TABLE tab_ip_ext(id int, name string,
ip STRING,
country STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
STORED AS TEXTFILE
LOCATION ‘/dbdata/’;//是目录，而不是具体文件是HDFS上的文件；（可以load本地文件，本地文件删除后，其也查询不到数据）
3、创建桶表
create table tab_ip_cluster(id int,name string,ip string,country string)
clustered by(id) into 3 buckets
row format delimited fields terminated by ‘,’;
4、创建分区表

//PARTITION 分区表
create table tab_ip_part(id int,name string,ip string,country string)
partitioned by (year string)
row format delimited fields terminated by ‘,’;
5、根据select创建表
// CTAS 根据select语句建表结构
CREATE TABLE tab_ip_ctas
AS
SELECT id new_id, name new_name, ip new_ip,country new_country
FROM tab_ip_ext
SORT BY new_id;
6、创建相似表
create table tab_ip_like like tab_ip;

二、导入数据到表

1)从本地导入数据到hive的表中（实质就是将文件上传到hdfs中hive管理目录下）
load data local inpath ‘/home/hadoop/ip.txt’ into table tab_ext;
2)从hdfs上导入数据到hive表中（实质就是将文件从原始目录移动到hive管理的目录下）
load data inpath ‘hdfs://ns1/aa/bb/data.log’ into table tab_user;
从hdfs中导入里面是没有local这个单词的，这个是和从本地导入的唯一的区别。
3）从其他表中导入
insert overwrite table tab_ip_like
select * from tab_ip;
或者
insert into table tab_ip_like
select * from tab_ip;

三、从表中导出数据

1、导入到本地
//write to hdfs 将结果写入到hdfs的文件中
insert overwrite local directory ‘/home/hadoop/hivetemp/test.txt’ select * from tab_ip_part where part_flag=’part1’;
2、导入到hdfs中
insert overwrite directory ‘/hiveout.txt’ select * from tab_ip_part where part_flag=’part1’;//其中的directory为文件夹名称

四、修改语句

//修改表字段
alter table tab_ip change id id_alter string;
//添加partition
ALTER TABLE tab_cts ADD PARTITION (partCol = ‘dt’) location ‘/external/hive/dt’;
show partitions tab_ip_part;
//修改表名
alter table tab_ip_ext rename to tab_ext;

五、增加语句

ALTER TABLE employee ADD COLUMNS (
dept STRING COMMENT ‘Department name’);

六、删除语句

//删除partition
alter table tab_partition drop partition(year=’oo1’);
//删除表
DROP TABLE IF EXISTS employee;

七、查询语句

八、数据库操作

CREATE DATABASE [IF NOT EXISTS] userdb;
SHOW DATABASES;
// 以下是使用CASCADE查询删除数据库。这意味着要全部删除相应的表在删除数据库之前。
DROP DATABASE IF EXISTS userdb CASCADE;

九、分区操作

添加分区
ALTER TABLE employee
ADD PARTITION (year=’2013’)
重命名分区：
ALTER TABLE employee PARTITION (year=’1203’)
RENAME TO PARTITION (Yoj=’1203’);
删除分区：
ALTER TABLE employee DROP [IF EXISTS]
PARTITION (year=’1203’);

0 0