Hive脚本

来源:互联网 发布:水果蔬菜批发软件 编辑:程序博客网 时间:2024/06/06 08:07
http://www.powerxing.com/hadoop-build-project-using-eclipse/
/*第四章  HiveQL:数据定义*/
  /*4.1 Hive中的数据库*/
create database financials;
create database if not exists  financials;
show  databases;
create database humnan_resources;
show  databases;
show databases Like 'h.*';
Create databases financials Location '/my/preferred/directory';
Create Database financials Comment 'hold all financials  tables ';
Describe Database financials;
Create Database financials With dbproperties ('creator'='Mark Moneybags','Date'='2012-01-02');
describe Database financials ;
describe Database  extended  financials ;
User financials ;
Set hive.cli.print.current.db=True;
Drop Database If Exists   financials ;
Drop Database Is Exists financials Cascade;
  /*4.2 修改数据库*/
Alter Database  financials Set dbproperties ('edited-By'='Joe Dba');
  /*4.3 创建表*/
Create Table If Not Exists mydb.employess (
Name String Comment 'Employess name',
salary  Float Comment 'Emplyess salary',
subordinates Array<String> Comment 'Name of subordinates ',
deductions  Map<String,Float>  Comment 'keys are deductions name,  vlues are percentages',
address struct <street:String,city:String,state:String,zip:Int> Comment 'home address')
Comment 'Description of the table'
tblproperties('creator'='me','created_at'='2012-01-02 10:00:00',...)
Location 'user/hive/warehouse/mydb.db/employess';
Create Table  If Not Exists mydb.employess2 Like mydb.employees ;
   /* 4.3.2 外部表*/
Create External Table If Not Exists stocks (
Exchange String ,
symbol String ,
ymd String,
price_open Float;
price_high Float,
price_loe Float,
price_close Float,
volume Int ,
price_adj_close Float)
Row pormat delimited fields terminated By ','
Location '/data/stocks;'
Create External Table If Not  Exists mydb.employees3 
Like mybd.employees 
Location '/path/to/data';
/* 4.4 分区表、管理表*/
Create Table employess (
Name String ,
salary Float ,
suborinates Array<String>,
deductions Map<String ,Float>,
address struct <street:String,city:String,stats:String,zip:Int>)
Partitioned  By (country String ,stats String) ;
Set hive.mapred.mode=strict;
Select e.name ,e.salary From employees e Limit 100;
Set hive.mapred.mode=nostrict;
Select e.name ,e.salary From employees e Limit 100;
show Partitions employees ;
show Partitions  enployees Partition (country ='US');
show Partitions  employees Partition (country='US',state='AK');
describe extended employee;
load Data Local inpath '${env:HOME}/california-employee'
Into Table employees
Partition (country ='US',state='CA')
   /*4.4.1 外部分区表*/
Create External Table If Not Exists log_messages (
hms Int ,
severity String ,
server String ,
proess_id  Int ,
message String )
partitioned  By (Year Int ,Month Int ,Day Int )
Row format delimited fields terminated By '\t';
Alter Table log_messages Add Partition (Year=212,Month=1,Day=2)
Location 'hdfs://master_server/data/log_messages/2012/01/02';
     /*4.4.2 自定义表的存储格式*/
Create Table employees (
Name String,
salary Float,
subordinates Array<String>,
deductions Map<String,Float>
address strunct<street:String ,city:String,state:String,zip:Int>)
Row format delimited fields terminated By '\001'
collection items terminated By '\002'
Map keys terminated By '\003'
lines terminated By '\n'  stored As textfile;
   /*4.5  删除表*/
  Drop  Table  If Exists employess ;
  /*4.6  修改表*/
   /*   4.6.1 表重命名*/
Alter Table log_messages Rename To logmsgs ;
   /*4.6.2 增加,修改和删除表分区*/
Alter Table log_message Add If Not Exists
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/01' 
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02' 
Partition (Year=2011, Month=1,Day=1 ) Location '/logs/2011/01/02';
Alter Table log_messages partition(Year=2011,Month=12,Day=2)
Set Location 's3n://outbuncket/logs/2011/01/02';
Alter Table log_messages Drop If Exists Partition (Year=2011,Month=12,Day=2);
/*4.6 3 修改列信息*/
Alter Table  log_messages Change Column hms hours_minutes_seconds Int 
Comment 'The hours , minutes ,and seconds part of the timestamp'  After severity;
/*4.6 4 增加列*/
Alter Table log_messages Add Columns (app_name String Comment 'Application name ',
seesion_id Long  Comment 'then current seeion id ');
  /*4.6.5 删除后者替换列*/
Alter Table log_messages Replace Column (
hour_mins_sces Int Comment 'hour,minute,seconds from timestamp',
severity String Comment 'then message severity',
mssage String Comment 'then rest of the message '
  /*4.6.6 修改表属性*/
Alter Table log_messags Set tblproperties (
'notes'= 'the process id is no longer captured ;this column is always null ');
  /*4.6.6 修改存储属性*/
Alter Table log_messages Partition (Year=2012,Month=1,Day=1)
Set fileformat sequencefile;

Alter Table table_using_json_storage Set serde 'com.example.JOSNSerDe'
whit serdeproperties ( 'prop1'='value1', 'prop2'='value1');

Alter Table table_using_json_storage Set serdeproperties
whit serdeproperties ( 'prop3'='value3', 'prop4'='value4');
Alter Table stocks clusered By (Exchange ,symbol) sorted (symbol) Into 48 buckets ;
    /*4.6.8 众多的修改表语句 */
Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1);
hive -e 'Alter Table log_messages touch Partition (Year=2012,Month =1 ,Day =1)';
/*第五章  HiveQL:数据操作*/
/*5.1 向管理表中转载数据*/
   load Data Local inpath '${env:home}/california-employess'  
overwrite Into  Table employess  Partition (country='USA',state='CA');
/*5.2 通过查询语句想表中插入数据*/
Insert overwrite Table employess Partition (country='US',state='OR');
Select * From  staged_employess se Where se.cnty='US' And se.st='OR';
From staged_employess se 
Insert overwrite  Table employess Partition (country='US',syate='OR')
Select *    Where  se.cnty='US' And se.st='OR'
Insert overwrite  Table employess Partition (country='US',syate='CA')
Select *    Where  se.cnty='US' And se.st='CA'
Insert overwrite  Table employess Partition (country='US',syate='IL')
Select *    Where  se.cnty='US' And se.st='IL'
    /*动态分区插入*/
Insert overwrite Table employess Partition (country ,state)
Select ...,se.cnty,se.st  From staged_employess se ;
    /*动态静态混合插入*/
Insert overwrite Table employees Partition (country='US',state)
Select ... ,se.cnty ,se.st  From staged_employees se Where se.cnty='US';
hive> Set hive.exec.dynamic.partition=True ;
hive> Set hive.exec.dynamic.partition.mode = nostrict ;
hive> Set hive.exec.dynamic.partition.pernode =1000 ;
   /*5.3 当查询语句中创建表并加载数据*/
Create Table ca_employees As Select Name ,salary ,address From employees Where se.state='CA';
   /* 5.4 导出数据*/
hadoop fs -cp source_path  targe_parth  
   或
Insert overwrite Local Directory  '/emp/ca-employees' 
Select Name ,salary ,address From employees Where se.state ='CA';
hive >  !  ls  /tmp/ca_employees ;
hive >  !  cat   /tmp/ca_employees/  ??? ;
/*    第六章   HiveQL:查询*/
/*6.1 Select ...from */
    Create Table employees ( 
     Name String ,salary Float ,subordinates Array<String>,
     deductions Map<String ,Float>, 
     address struct<street:String,city:String,state:String,zip:Int>)
     partitioned By (country String ,state String );
Hive> Select Name ,salary From emplyees;
Hive> Select e.Name ,e.salary From emplyees e;
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess;
/* 6.1.5 Limit 语句*/  
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) From employess Limit 2;
  /*6.1.6 列别名*/
Hive>
Select upper(Name),salary ,deuctions["Federal Taxes"],
round(salary * (1-deductions["Federal Taxes"])) As salary_minus From employess Limit 2;
/*6.1.7 嵌套select语句*/
from(Select upper(Name),salary ,deuctions["Federal Taxes"] As fed_taxes,
round(salary * (1-deductions["Federal Taxes"])) As salary_minus_fed_taxes  From employess)
e   Select e.name,e.salary_minus_fed_taxes Where e.salary_minus_fed_taxes>70000;
/*6.1.8 CASE...WHEN...THEN  句式*/
Select Name ,salary ,Case  When salary <50000  Then 'low'
                           When salary>=7000   And  salary <70000 Then 'niddle'
                           When salary >-70000 And salasr <100000 Then 'high' 
                   Else 'Very high0'
   End As brack From employess;
  /*6.3   GROUPP BY  语句*/
select year(ymd) ,Avg(price_close)  From stocks 
Where exchang = 'NASDAQ' And symbol ='AAPL'  Group By Year(ymd);
    Having y语句;
select year(ymd) ,Avg(price_close)  From stocks 
Where exchang = 'NASDAQ' And symbol ='AAPL'  Group By Year(ymd)
Having Avg(price_close) > 50;
     /*6.4 Join 语句*/
   /*6.4.1 Inner Join */
select a.ymd ,a.price, b.price_close
  from stocks a
  join stocks b on a.ymd = b.ymd
where a.symbol = 'AAPL'
   and b.symbol = 'IBM';
select s.ymd ,s.symbol, s.price_close, d.divdend
  from stocks s
  join dividends d Join s.ymd = d.ymd    and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
  from stocks s
  join dividends d  on s.ymd = d.ymd    and s.symbl = d.symbol
where a.symbol = 'AAPL';
select s.ymd ,s.symbol, s.price_close, d.divdend
  from stocks a  join dividends b  on a.ymd = b.ymd
                  Join stocks c On a.ymd= c.ymd               
where a.symbol = 'AAPL';  
   /* +streamtable(s) */  
select /* +streamtable(s) */   s.ymd ,s.symbol, s.price_close, d.divdend
  from stocks s
  join dividends d on s.ymd = d.ymd
                 and s.ymd = d.ymd
where s.symbol = 'AAPL';  

0 0
原创粉丝点击