实验七参考命令

来源:互联网 发布:渠道 知乎 编辑:程序博客网 时间:2024/06/05 04:44

以下只是参考,采用下载到本地再载入数据库的方式。有什么问题请微信回复,祝好!

在root用户下执行

sudo service mysql start

($ mysql -V)

$ sudo service ssh start

($ ssh -V)

 

登陆hadoop账户

sudo su - hadoop

start-all.sh

jps

 

下面的的建立文件夹和文件只是参考方法

mkdir -p /hadoop/hive/data/Crime_Data_2014-2015

cd /hadoop/hive/data/Crime_Data_2014-2015

下面下载时间有些长,请耐心等待

wget https://raw.githubusercontent.com/jihongfei/DataWarehouseCourse/master/resourse/csv/Crime_Data_2014-2015_without_comma.csv

检查下载对了没有

more Crime_Data_2014-2015_without_comma.csv

ls

回到根目录

cd

启动Hive数据库界面

hive

 

 

create external table crime_data_2014_2015 (dates STRING, happen_hour_min INT, crime_type STRING, victim_age INT, victim_gender STRING , weapon_code INT, weapon_describe STRING , taken_place STRING , location STRING) row format delimited fields terminated by ',';

 

load data local inpath '/hadoop/hive/data/Crime_Data_2014-2015/Crime_Data_2014-2015_without_comma.csv' overwrite into table crime_data_2014_2015;


 

show tables;desc crime_data_2014_2015;SELECT * FROM crime_data_2014_2015;


select count(dates) from crime_data_2014_2015 where  happen_hour_min between 1400 and 2159;或select count(*) from crime_data_2014_2015 where  happen_hour_min between 1400 and 2159;(42674)select count(dates) from crime_data_2014_2015 where  happen_hour_min between 2200 and 2359  or  happen_hour_min between 0000 and 659;或select count(*) from crime_data_2014_2015 where  happen_hour_min>=2200 and happen_hour_min<=2359  or  happen_hour_min>=0000 and happen_hour_min<=659;(25378   =9391+15987)