hadoop hive 笔记
来源:互联网 发布:宜春学院网络教育平台 编辑:程序博客网 时间:2024/06/02 02:00
1. hive 安装:
Hive只在一个节点上安装即可1.上传tar包2.解压tar -zxvf hive-0.9.0.tar.gz -C /cloud/3.配置mysql metastore(切换到root用户)配置HIVE_HOME环境变量rpm -qa | grep mysqlrpm -e mysql-libs-5.1.66-2.el6_3.i686 --nodepsrpm -ivh MySQL-server-5.1.73-1.glibc23.i386.rpm rpm -ivh MySQL-client-5.1.73-1.glibc23.i386.rpm 修改mysql的密码/usr/bin/mysql_secure_installation(注意:删除匿名用户,允许用户远程连接)登陆mysqlmysql -u root -p4.配置hivecp hive-default.xml.template hive-site.xml 修改hive-site.xml(删除所有内容,只留一个<property></property>)添加如下内容:<property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://hadoop00:3306/hive?createDatabaseIfNotExist=true</value> <description>JDBC connect string for a JDBC metastore</description></property><property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> <description>Driver class name for a JDBC metastore</description></property><property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> <description>username to use against metastore database</description></property><property> <name>javax.jdo.option.ConnectionPassword</name> <value>123</value> <description>password to use against metastore database</description></property>5.安装hive和mysq完成后,将mysql的连接jar包拷贝到$HIVE_HOME/lib目录下如果出现没有权限的问题,在mysql授权(在安装mysql的机器上执行)mysql -uroot -p#(执行下面的语句 *.*:所有库下的所有表 %:任何IP地址或主机都可以连接)GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123' WITH GRANT OPTION;FLUSH PRIVILEGES;6.建表(默认是内部表)create table trade_detail(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t';建分区表create table td_part(id bigint, account string, income double, expenses double, time string) partitioned by (logdate string) row format delimited fields terminated by '\t';建外部表create external table td_ext(id bigint, account string, income double, expenses double, time string) row format delimited fields terminated by '\t' location '/td_ext';7.创建分区表普通表和分区表区别:有大量数据增加的需要建分区表create table book (id bigint, name string) partitioned by (pubdate string) row format delimited fields terminated by '\t'; 分区表加载数据load data local inpath './book.txt' overwrite into table book partition (pubdate='2010-08-22');load data local inpath '/root/data.am' into table beauty partition (nation="USA");select nation, avg(size) from beauties group by nation order by avg(size);
2. hive HQL练习
HQL优化总结:http://blog.csdn.net/preterhuman_peak/article/details/40649213
Hive用户指南(Hive_user_guide)_中文版 : http://download.csdn.net/detail/xj626852095/9646955
set hive.cli.print.header=true;CREATE TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User') COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001'STORED AS SEQUENCEFILE; TEXTFILE//sequencefilecreate table tab_ip_seq(id int,name string,ip string,country string) row format delimited fields terminated by ',' stored as sequencefile;insert overwrite table tab_ip_seq select * from tab_ext;//create & loadcreate table tab_ip(id int,name string,ip string,country string) row format delimited fields terminated by ',' stored as textfile;load data local inpath '/home/hadoop/ip.txt' into table tab_ext;//external外部表被drop时,只清除元数据,表数据并不会被删除CREATE EXTERNAL TABLE tab_ip_ext(id int, name string, ip STRING, country STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE LOCATION '/external/hive'; // CREATE AS 用于创建一些临时表存储中间结果CREATE TABLE tab_ip_ctas ASSELECT id new_id, name new_name, ip new_ip,country new_countryFROM tab_ip_extSORT BY new_id;//insert from select 用于向临时表中追加中间结果数据create table tab_ip_like like tab_ip;insert overwrite table tab_ip_like select * from tab_ip;//CLUSTER <--相对高级一点,你可以放在有精力的时候才去学习>create table tab_ip_cluster(id int,name string,ip string,country string)clustered by(id) into 3 buckets;load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_cluster;set hive.enforce.bucketing=true;insert into table tab_ip_cluster select * from tab_ip;select * from tab_ip_cluster tablesample(bucket 2 out of 3 on id); //PARTITIONcreate table tab_ip_part(id int,name string,ip string,country string) partitioned by (part_flag string) row format delimited fields terminated by ','; load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_part partition(part_flag='part1'); load data local inpath '/home/hadoop/ip_part2.txt' overwrite into table tab_ip_part partition(part_flag='part2');select * from tab_ip_part;select * from tab_ip_part where part_flag='part2';select count(*) from tab_ip_part where part_flag='part2';alter table tab_ip change id id_alter string;ALTER TABLE tab_cts ADD PARTITION (partCol = 'dt') location '/external/hive/dt';show partitions tab_ip_part; //write to hdfsinsert overwrite local directory '/home/hadoop/hivetemp/test.txt' select * from tab_ip_part where part_flag='part1'; insert overwrite directory '/hiveout.txt' select * from tab_ip_part where part_flag='part1';(不支持into,也就是不支持文件的追加)//array create table tab_array(a array<int>,b array<string>)row format delimitedfields terminated by '\t'collection items terminated by ',';示例数据tobenbrone,laihama,woshishui 13866987898,13287654321abc,iloveyou,itcast 13866987898,13287654321select a[0] from tab_array;select * from tab_array where array_contains(b,'word');insert into table tab_array select array(0),array(name,ip) from tab_ext t; //mapcreate table tab_map(name string,info map<string,string>)row format delimitedfields terminated by '\t'collection items terminated by ';'map keys terminated by ':';示例数据:fengjieage:18;size:36A;addr:usafurong age:28;size:39C;addr:beijing;weight:90KGload data local inpath '/home/hadoop/hivetemp/tab_map.txt' overwrite into table tab_map;insert into table tab_map select name,map('name',name,'ip',ip) from tab_ext; //structcreate table tab_struct(name string,info struct<age:int,tel:string,addr:string>)row format delimitedfields terminated by '\t'collection items terminated by ','load data local inpath '/home/hadoop/hivetemp/tab_st.txt' overwrite into table tab_struct;insert into table tab_struct select name,named_struct('age',id,'tel',name,'addr',country) from tab_ext;//cli shell[hadoop@yun12-01 ~]hive -S -e 'select country,count(*) from tab_ext' > /home/hadoop/hivetemp/e.txt 有了这种执行机制,就使得我们可以利用脚本语言(bash shell,python)进行hql语句的批量执行select * from tab_ext sort by id desc limit 5;select a.ip,b.book from tab_ext a join tab_ip_book b on(a.name=b.name);//UDFselect if(id=1,first,no-first),name from tab_ext;hive>add jar /home/hadoop/myudf.jar;hive>CREATE TEMPORARY FUNCTION my_lower AS 'org.dht.Lower';select my_upper(name) from tab_ext;
udf.java
package cn.itcas.hive.udf;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.hive.ql.exec.UDF;import org.apache.hadoop.io.Text;public class AreaUDF extends UDF{private static Map<Integer, String> areaMap = new HashMap<Integer, String>();static {areaMap.put(1, "北京");areaMap.put(2, "上海");areaMap.put(3, "广州");}public Text evaluate(Text in){String result = areaMap.get(Integer.parseInt(in.toString()));if(result == null){result = "其他";}return new Text(result);}}
0 0
- hadoop学习笔记 HIVE
- hadoop hive python 笔记
- hadoop hive 笔记
- hadoop学习之--Hive笔记
- Hadoop学习笔记之Hive
- Hadoop权威指南--Hive笔记
- hadoop学习笔记-hive安装及操作
- Hadoop学习笔记之操作hive
- Hadoop 学习笔记之Hive安装
- Hadoop学习笔记(三):Hive简介
- Hadoop之——Hive笔记
- hadoop学习笔记 Hive执行生命周期
- Hadoop笔记之Hive安装过程
- Hadoop 学习笔记四 Hive基本知识
- Hadoop 学习笔记五 Hive JDBC(未完)
- Hadoop 学习笔记之Hive安装
- Hadoop学习笔记(7)-HIVE安装
- Hadoop + HBase + Hive 完全分布式部署笔记
- 进程间通信之有名管道
- NAT技术的主要实现方式及其对网络应用程序的使用影响
- TankWar 单机(JAVA版)版本1.7&&版本1.8 坦克死亡的爆炸和画出多个敌方坦克
- vim编辑器跳转、复制、剪切(2)
- C++顺序容器类中对象初始化、赋值、swap
- hadoop hive 笔记
- PHP操作MySQL数据库(3)
- linux常用命令3—文件处理
- java.lang.instrument 学习(一)
- 算法小结
- Android字符串动态加载
- Servlet,filter, struts,jsp之间的关系
- JAVA当中变量什么时候需要初始化
- 酷炫的交互动画和视觉效果