Hive脚本
来源:互联网 发布:水果蔬菜批发软件 编辑:程序博客网 时间:2024/06/06 08:07
http://www.powerxing.com/hadoop-build-project-using-eclipse/
/*第四章 HiveQL:数据定义*/
/*4.1 Hive中的数据库*/
create database financials;
create database if not exists financials;
show databases;
create database humnan_resources;
show databases;
show databases Like 'h.*';
Create databases financials Location '/my/preferred/directory';
Create Database financials Comment 'hold all financials tables ';
Describe Database financials;
Create Database financials With dbproperties ('creator'='Mark Moneybags','Date'='2012-01-02');
describe Database financials ;
describe Database extended financials ;
User financials ;
Set hive.cli.print.current.db=True;
Drop Database If Exists financials ;
Drop Database Is Exists financials Cascade;
/*4.2 修改数据库*/
Alter Database financials Set dbproperties ('edited-By'='Joe Dba');
/*4.3 创建表*/
Create Table If Not Exists mydb.employess (
Name String Comment 'Employess name',
salary Float Comment 'Emplyess salary',
subordinates Array<String> Comment 'Name of subordinates ',
deductions Map<String,Float> Comment 'keys are deductions name, vlues are percentages',
address struct <street:String,city:String,state:String,zip:Int> Comment 'home address')
Comment 'Description of the table'
tblproperties('creator'='me','created_at'='2012-01-02 10:00:00',...)
Location 'user/hive/warehouse/mydb.db/employess';
Create Table If Not Exists mydb.employess2 Like mydb.employees ;
/* 4.3.2 外部表*/
Create External Table If Not Exists stocks (
Exchange String ,
symbol String ,
ymd String,
price_open Float;
price_high Float,
price_loe Float,
price_close Float,
volume Int ,
price_adj_close Float)
Row pormat delimited fields terminated By ','
Location '/data/stocks;'
Create External Table If Not Exists mydb.employees3
Like mybd.employees
Location '/path/to/data';
/* 4.4 分区表、管理表*/
Create Table employess (
Name String ,
salary Float ,
suborinates Array<String>,
deductions Map<String ,Float>,
address struct <street:String,city:String,stats:String,zip:Int>)
Partitioned By (country String ,stats String) ;
Set hive.mapred.mode=strict;
Select e.name ,e.salary From employees e Limit 100;
Set hive.mapred.mode=nostrict;
Select e.name ,e.salary From employees e Limit 100;
show Partitions employees ;
show Partitions enployees Partition (country ='US');
show Partitions employees Partition (country='US',state='AK');
describe extended employee;
load Data Local inpath '${env:HOME}/california-employee'
Into Table employees
Partition (country ='US',state='CA')
/*4.4.1 外部分区表*/
Create External Table If Not Exists log_messages (
hms Int ,
severity String ,
server String ,
proess_id Int ,
message String )
partitioned By (Year Int ,Month Int ,Day Int )
Row format delimited fields terminated By '\t';
Alter Table log_messages Add Partition (Year=212,Month=1,Day=2)
Location 'hdfs://master_server/data/log_messages/2012/01/02';
/*4.4.2 自定义表的存储格式*/
Create Table employees (
Name String,
salary Float,
subordinates Array<String>,
deductions Map<String,Float>
address strunct<street:String ,city:String,state:String,zip:Int>)
Row format delimited fields terminated By '\001'
collection items terminated By '\002'
Map keys terminated By '\003'
lines terminated By '\n' stored As textfile;
/*4.5 删除表*/
Drop Table If Exists employess ;
/*4.6 修改表*/
/* 4.6.1 表重命名*/
Alter Table log_messages Rename To logmsgs ;
/*4.6.2 增加,修改和删除表分区*/
Alter Table log_message Add If Not Exists
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/01'
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02'
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02';
Alter Table log_messages partition(Year=2011,Month=12,Day=2)
Set Location 's3n://outbuncket/logs/2011/01/02';
Alter Table log_messages Drop If Exists Partition (Year=2011,Month=12,Day=2);
/*4.6 3 修改列信息*/
Alter Table log_messages Change Column hms hours_minutes_seconds Int
Comment 'The hours , minutes ,and seconds part of the timestamp' After severity;
/*4.6 4 增加列*/
Alter Table log_messages Add Columns (app_name String Comment 'Application name ',
seesion_id Long Comment 'then current seeion id ');
/*4.6.5 删除后者替换列*/
Alter Table log_messages Replace Column (
hour_mins_sces Int Comment 'hour,minute,seconds from timestamp',
severity String Comment 'then message severity',
mssage String Comment 'then rest of the message '
/*4.6.6 修改表属性*/
Alter Table log_messags Set tblproperties (
'notes'= 'the process id is no longer captured ;this column is always null ');
/*4.6.6 修改存储属性*/
Alter Table log_messages Partition (Year=2012,Month=1,Day=1)
Set fileformat sequencefile;
Alter Table table_using_json_storage Set serde 'com.example.JOSNSerDe'
whit serdeproperties ( 'prop1'='value1', 'prop2'='value1');
Alter Table table_using_json_storage Set serdeproperties
whit serdeproperties ( 'prop3'='value3', 'prop4'='value4');
Alter Table stocks clusered By (Exchange ,symbol) sorted (symbol) Into 48 buckets ;
/*4.6.8 众多的修改表语句 */
Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1);
hive -e 'Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1)';
/*第五章 HiveQL:数据操作*/
/*5.1 向管理表中转载数据*/
load Data Local inpath '${env:home}/california-employess'
overwrite Into Table employess Partition (country='USA',state='CA');
/*5.2 通过查询语句想表中插入数据*/
Insert overwrite Table employess Partition (country='US',state='OR');
Select * From staged_employess se Where se.cnty='US' And se.st='OR';
From staged_employess se
Insert overwrite Table employess Partition (country='US',syate='OR')
Select * Where se.cnty='US' And se.st='OR'
Insert overwrite Table employess Partition (country='US',syate='CA')
Select * Where se.cnty='US' And se.st='CA'
Insert overwrite Table employess Partition (country='US',syate='IL')
Select * Where se.cnty='US' And se.st='IL'
/*动态分区插入*/
Insert overwrite Table employess Partition (country ,state)
Select ...,se.cnty,se.st From staged_employess se ;
/*动态静态混合插入*/
Insert overwrite Table employees Partition (country='US',state)
Select ... ,se.cnty ,se.st From staged_employees se Where se.cnty='US';
hive> Set hive.exec.dynamic.partition=True ;
hive> Set hive.exec.dynamic.partition.mode = nostrict ;
hive> Set hive.exec.dynamic.partition.pernode =1000 ;
/*5.3 当查询语句中创建表并加载数据*/
Create Table ca_employees As Select Name ,salary ,address From employees Where se.state='CA';
/* 5.4 导出数据*/
hadoop fs -cp source_path targe_parth
或
Insert overwrite Local Directory '/emp/ca-employees'
Select Name ,salary ,address From employees Where se.state ='CA';
hive > ! ls /tmp/ca_employees ;
hive > ! cat /tmp/ca_employees/ ??? ;
/* 第六章 HiveQL:查询*/
/*6.1 Select ...from */
Create Table employees (
Name String ,salary Float ,subordinates Array<String>,
deductions Map<String ,Float>,
address struct<street:String,city:String,state:String,zip:Int>)
partitioned By (country String ,state String );
Hive> Select Name ,salary From emplyees;
Hive> Select e.Name ,e.salary From emplyees e;
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess;
/* 6.1.5 Limit 语句*/
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess Limit 2;
/*6.1.6 列别名*/
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) As salary_minus From employess Limit 2;
/*6.1.7 嵌套select语句*/
from(Select upper(Name),salary ,deuctions["Federal Taxes"] As fed_taxes,
round(salary * (1-deductions["Federal Taxes"])) As salary_minus_fed_taxes From employess)
e Select e.name,e.salary_minus_fed_taxes Where e.salary_minus_fed_taxes>70000;
/*6.1.8 CASE...WHEN...THEN 句式*/
Select Name ,salary ,Case When salary <50000 Then 'low'
When salary>=7000 And salary <70000 Then 'niddle'
When salary >-70000 And salasr <100000 Then 'high'
Else 'Very high0'
End As brack From employess;
/*6.3 GROUPP BY 语句*/
select year(ymd) ,Avg(price_close) From stocks
Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd);
Having y语句;
select year(ymd) ,Avg(price_close) From stocks
Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd)
Having Avg(price_close) > 50;
/*6.4 Join 语句*/
/*6.4.1 Inner Join */
select a.ymd ,a.price, b.price_close
from stocks a
join stocks b on a.ymd = b.ymd
where a.symbol = 'AAPL'
and b.symbol = 'IBM';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d Join s.ymd = d.ymd and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d on s.ymd = d.ymd and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks a join dividends b on a.ymd = b.ymd
Join stocks c On a.ymd= c.ymd
where a.symbol = 'AAPL';
/* +streamtable(s) */
select /* +streamtable(s) */ s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d on s.ymd = d.ymd
and s.ymd = d.ymd
where s.symbol = 'AAPL';
/*第四章 HiveQL:数据定义*/
/*4.1 Hive中的数据库*/
create database financials;
create database if not exists financials;
show databases;
create database humnan_resources;
show databases;
show databases Like 'h.*';
Create databases financials Location '/my/preferred/directory';
Create Database financials Comment 'hold all financials tables ';
Describe Database financials;
Create Database financials With dbproperties ('creator'='Mark Moneybags','Date'='2012-01-02');
describe Database financials ;
describe Database extended financials ;
User financials ;
Set hive.cli.print.current.db=True;
Drop Database If Exists financials ;
Drop Database Is Exists financials Cascade;
/*4.2 修改数据库*/
Alter Database financials Set dbproperties ('edited-By'='Joe Dba');
/*4.3 创建表*/
Create Table If Not Exists mydb.employess (
Name String Comment 'Employess name',
salary Float Comment 'Emplyess salary',
subordinates Array<String> Comment 'Name of subordinates ',
deductions Map<String,Float> Comment 'keys are deductions name, vlues are percentages',
address struct <street:String,city:String,state:String,zip:Int> Comment 'home address')
Comment 'Description of the table'
tblproperties('creator'='me','created_at'='2012-01-02 10:00:00',...)
Location 'user/hive/warehouse/mydb.db/employess';
Create Table If Not Exists mydb.employess2 Like mydb.employees ;
/* 4.3.2 外部表*/
Create External Table If Not Exists stocks (
Exchange String ,
symbol String ,
ymd String,
price_open Float;
price_high Float,
price_loe Float,
price_close Float,
volume Int ,
price_adj_close Float)
Row pormat delimited fields terminated By ','
Location '/data/stocks;'
Create External Table If Not Exists mydb.employees3
Like mybd.employees
Location '/path/to/data';
/* 4.4 分区表、管理表*/
Create Table employess (
Name String ,
salary Float ,
suborinates Array<String>,
deductions Map<String ,Float>,
address struct <street:String,city:String,stats:String,zip:Int>)
Partitioned By (country String ,stats String) ;
Set hive.mapred.mode=strict;
Select e.name ,e.salary From employees e Limit 100;
Set hive.mapred.mode=nostrict;
Select e.name ,e.salary From employees e Limit 100;
show Partitions employees ;
show Partitions enployees Partition (country ='US');
show Partitions employees Partition (country='US',state='AK');
describe extended employee;
load Data Local inpath '${env:HOME}/california-employee'
Into Table employees
Partition (country ='US',state='CA')
/*4.4.1 外部分区表*/
Create External Table If Not Exists log_messages (
hms Int ,
severity String ,
server String ,
proess_id Int ,
message String )
partitioned By (Year Int ,Month Int ,Day Int )
Row format delimited fields terminated By '\t';
Alter Table log_messages Add Partition (Year=212,Month=1,Day=2)
Location 'hdfs://master_server/data/log_messages/2012/01/02';
/*4.4.2 自定义表的存储格式*/
Create Table employees (
Name String,
salary Float,
subordinates Array<String>,
deductions Map<String,Float>
address strunct<street:String ,city:String,state:String,zip:Int>)
Row format delimited fields terminated By '\001'
collection items terminated By '\002'
Map keys terminated By '\003'
lines terminated By '\n' stored As textfile;
/*4.5 删除表*/
Drop Table If Exists employess ;
/*4.6 修改表*/
/* 4.6.1 表重命名*/
Alter Table log_messages Rename To logmsgs ;
/*4.6.2 增加,修改和删除表分区*/
Alter Table log_message Add If Not Exists
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/01'
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02'
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02';
Alter Table log_messages partition(Year=2011,Month=12,Day=2)
Set Location 's3n://outbuncket/logs/2011/01/02';
Alter Table log_messages Drop If Exists Partition (Year=2011,Month=12,Day=2);
/*4.6 3 修改列信息*/
Alter Table log_messages Change Column hms hours_minutes_seconds Int
Comment 'The hours , minutes ,and seconds part of the timestamp' After severity;
/*4.6 4 增加列*/
Alter Table log_messages Add Columns (app_name String Comment 'Application name ',
seesion_id Long Comment 'then current seeion id ');
/*4.6.5 删除后者替换列*/
Alter Table log_messages Replace Column (
hour_mins_sces Int Comment 'hour,minute,seconds from timestamp',
severity String Comment 'then message severity',
mssage String Comment 'then rest of the message '
/*4.6.6 修改表属性*/
Alter Table log_messags Set tblproperties (
'notes'= 'the process id is no longer captured ;this column is always null ');
/*4.6.6 修改存储属性*/
Alter Table log_messages Partition (Year=2012,Month=1,Day=1)
Set fileformat sequencefile;
Alter Table table_using_json_storage Set serde 'com.example.JOSNSerDe'
whit serdeproperties ( 'prop1'='value1', 'prop2'='value1');
Alter Table table_using_json_storage Set serdeproperties
whit serdeproperties ( 'prop3'='value3', 'prop4'='value4');
Alter Table stocks clusered By (Exchange ,symbol) sorted (symbol) Into 48 buckets ;
/*4.6.8 众多的修改表语句 */
Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1);
hive -e 'Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1)';
/*第五章 HiveQL:数据操作*/
/*5.1 向管理表中转载数据*/
load Data Local inpath '${env:home}/california-employess'
overwrite Into Table employess Partition (country='USA',state='CA');
/*5.2 通过查询语句想表中插入数据*/
Insert overwrite Table employess Partition (country='US',state='OR');
Select * From staged_employess se Where se.cnty='US' And se.st='OR';
From staged_employess se
Insert overwrite Table employess Partition (country='US',syate='OR')
Select * Where se.cnty='US' And se.st='OR'
Insert overwrite Table employess Partition (country='US',syate='CA')
Select * Where se.cnty='US' And se.st='CA'
Insert overwrite Table employess Partition (country='US',syate='IL')
Select * Where se.cnty='US' And se.st='IL'
/*动态分区插入*/
Insert overwrite Table employess Partition (country ,state)
Select ...,se.cnty,se.st From staged_employess se ;
/*动态静态混合插入*/
Insert overwrite Table employees Partition (country='US',state)
Select ... ,se.cnty ,se.st From staged_employees se Where se.cnty='US';
hive> Set hive.exec.dynamic.partition=True ;
hive> Set hive.exec.dynamic.partition.mode = nostrict ;
hive> Set hive.exec.dynamic.partition.pernode =1000 ;
/*5.3 当查询语句中创建表并加载数据*/
Create Table ca_employees As Select Name ,salary ,address From employees Where se.state='CA';
/* 5.4 导出数据*/
hadoop fs -cp source_path targe_parth
或
Insert overwrite Local Directory '/emp/ca-employees'
Select Name ,salary ,address From employees Where se.state ='CA';
hive > ! ls /tmp/ca_employees ;
hive > ! cat /tmp/ca_employees/ ??? ;
/* 第六章 HiveQL:查询*/
/*6.1 Select ...from */
Create Table employees (
Name String ,salary Float ,subordinates Array<String>,
deductions Map<String ,Float>,
address struct<street:String,city:String,state:String,zip:Int>)
partitioned By (country String ,state String );
Hive> Select Name ,salary From emplyees;
Hive> Select e.Name ,e.salary From emplyees e;
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess;
/* 6.1.5 Limit 语句*/
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess Limit 2;
/*6.1.6 列别名*/
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) As salary_minus From employess Limit 2;
/*6.1.7 嵌套select语句*/
from(Select upper(Name),salary ,deuctions["Federal Taxes"] As fed_taxes,
round(salary * (1-deductions["Federal Taxes"])) As salary_minus_fed_taxes From employess)
e Select e.name,e.salary_minus_fed_taxes Where e.salary_minus_fed_taxes>70000;
/*6.1.8 CASE...WHEN...THEN 句式*/
Select Name ,salary ,Case When salary <50000 Then 'low'
When salary>=7000 And salary <70000 Then 'niddle'
When salary >-70000 And salasr <100000 Then 'high'
Else 'Very high0'
End As brack From employess;
/*6.3 GROUPP BY 语句*/
select year(ymd) ,Avg(price_close) From stocks
Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd);
Having y语句;
select year(ymd) ,Avg(price_close) From stocks
Where exchang = 'NASDAQ' And symbol ='AAPL' Group By Year(ymd)
Having Avg(price_close) > 50;
/*6.4 Join 语句*/
/*6.4.1 Inner Join */
select a.ymd ,a.price, b.price_close
from stocks a
join stocks b on a.ymd = b.ymd
where a.symbol = 'AAPL'
and b.symbol = 'IBM';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d Join s.ymd = d.ymd and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d on s.ymd = d.ymd and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
from stocks a join dividends b on a.ymd = b.ymd
Join stocks c On a.ymd= c.ymd
where a.symbol = 'AAPL';
/* +streamtable(s) */
select /* +streamtable(s) */ s.ymd ,s.symbol, s.price_close, d.divdend
from stocks s
join dividends d on s.ymd = d.ymd
and s.ymd = d.ymd
where s.symbol = 'AAPL';
0 0
- hive 脚本
- Hive 脚本
- Hive脚本
- hive脚本
- oracle脚本一键转hive脚本
- hive之脚本执行
- hive:条件判断脚本
- Hive MapReduce脚本
- hive 脚本接受参数
- Hive 脚本执行
- hive初始脚本
- hive 使用python脚本
- hive的脚本执行
- HIVE脚本优化
- Shell脚本执行Hive语句
- hive脚本+shell执行方式
- hive脚本深入解读感悟
- Hive中执行php脚本
- HDU 5701 (水题)
- SSH远程登录配置
- Java转义字符
- 蓝桥杯_打印图形
- static关键字及变量存储位置总结
- Hive脚本
- 【Leetcode】Sqrt(x)
- 在CentOS7命令行模式下安装虚拟机
- 质数统计
- 数据结构课后题源码
- 【Leetcode】 Search in Rotated Sorted Array
- 关机 重启
- 本地的mysql连接虚拟机中Linux中的MySQL需要注意的地方(权限问题)
- Android滑动分析(二)