hive数据挖掘

来源：互联网发布：淘宝客链接怎么生成编辑：程序博客网时间：2024/04/29 06:09

取一台HadoopMaster作为安装服务器，先启动Hadoop集群

1.下载hive

2.上传

3.解压到usr/local目录下，并修改解压后的目录名为hive,设定环境变量HADOOP_HOME,HIVE_HOME，将bin目录加入path中

4.修改hive的配置文件

1.cp hive-default.xml.template hive-site.xml

2.修改hive.metastore.schema.verification，设定为false

3.创建/usr/local/hive/tmp目录，替换${system:java.io.tmpdir}为该目录

4.替换${system:user.name}为root

5.schematool -initSchema -dbType derby //初始化元数据数据库

6.会在当前目录下简历metastore_db的数据库。
注意！！！下次执行hive时应该还在同一目录，默认到当前目录下寻找metastore。
遇到问题，把metastore_db删掉，重新执行命令
实际工作环境中，经常使用mysql作为metastore的数据

7.启动hive //前提是必须先启动hadoop

8.观察hadoop fs -ls /tmp/hive中目录的创建

1. show databases;

2. use default;

3.create table doc(line string);

4.show tables;

5.desc doc;

6.select * from doc;

7.drop table doc;

9.观察hadoop fs -ls /user

启动yarn
load data inpath '/wcinput' overwrite into table doc;
select * from doc;
select split(line, ' ') from doc; //空格拆分
select explode(split(line, ' ')) from doc; //把每个字段爆炸成一行行的
select word, count(1) as count from (select explode(split(line, ' ')) as word from doc) w group by word; //子查询创建一个新表W，他有一个字段word,把爆炸的字段放入word中，查询word和数量用groupby分组
select word, count(1) as count from (select explode(split(line, ' ')) as word from doc) w group by word order by word;
create table word_counts as select word, count(1) as count from (select explode(split(line, ' ')) as word from doc) w group by word order by word;
select * from word_counts;
dfs -ls /user/hive/...
使用sougou搜索日志做实验
将日志文件上传的hdfs系统，启动hive
create table sougou (qtime string, qid string, qword string, url string) row format delimited fields terminated by ','; //row format delimited 值这行的格式被什么分隔
load data inpath '/sougou.dic' into table sougou;
select count(*) from sougou;
create table sougou_results as select keyword, count(1) as count from (select qword as keyword from sougou) t group by keyword order by count desc;
select * from sougou_results limit 10;

阅读全文

0 0