Hive日志分析案例二

来源:互联网 发布:苹果电脑安装软件 编辑:程序博客网 时间:2024/05/01 16:12

1、创建日志原始表

-- create tabledrop table if exists hive_ycong.track_log ;create table hive_ycong.track_log(id                string,url               string,referer           string,keyword           string,type              string,guid              string,pageId            string,moduleId          string,linkId            string,attachedInfo      string,sessionId         string,trackerU          string,trackerType       string,ip                string,trackerSrc        string,cookie            string,orderCode         string,trackTime         string,endUserId         string,firstLink         string,sessionViewNo     string,productId         string,curMerchantId     string,provinceId        string,cityId            string,fee               string,edmActivity       string,edmEmail          string,edmJobId          string,ieVersion         string,platform          string,internalKeyword   string,resultSum         string,currentPage       string,linkPosition      string,buttonPosition    string)partitioned by(date string, hour string)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS PARQUET ;

2、导入数据

--load dataset parquet.compression=SNAPPY ;load data local inpath '/data/tracklogs/20160321/2016032119' overwrite into table hive_ycong.track_log partition(date = '20160321' ,hour = '19'); load data local inpath '/data/tracklogs/20160321/2016032120' overwrite into table hive_ycong.track_log partition(date = '20160321' ,hour = '20');

3、创建会话临时表session_info_temp

drop table if exists hive_ycong.session_info_temp ;create table hive_ycong.session_info_temp as select a.sessionid session_id ,max(a.guid) guid, max(a.enduserid) user_id ,count(a.url) pv ,(unix_timestamp(max(a.tracktime)) - unix_timestamp(min(a.tracktime))) stay_time,min(a.tracktime) min_trackTime ,max(a.ip) ip,max(a.provinceid) provinceId from hive_ycong.track_log a where date = '20160321' group by sessionid;**

4、创建会话临时表session_url_temp

drop table if exists hive_ycong.session_url_temp ;create table hive_ycong.session_url_temp as select sessionid,tracktime,trackeru,url,refererfrom hive_ycong.track_log where date = '20160321';

5、创建会话表session_info,并加载数据

create table hive_ycong.session_info(session_id string ,guid string ,trackerU string ,landing_url string ,landing_url_ref string ,user_id string ,pv string ,stay_time string ,min_trackTime string ,ip string ,provinceId string )partitioned by (date string)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ;insert overwrite table hive_ycong.session_info partition(date = '20160321')select a.session_id session_id ,max(a.guid) guid ,max(b.trackeru) trackerU ,max(b.url) landing_url ,max(b.referer) landing_url_ref ,max(a.user_id) user_id ,max(a.pv) pv ,max(a.stay_time / 1000) stay_time ,max(a.min_trackTime) min_trackTime ,max(a.ip) ip  ,max(a.provinceId) provinceId from hive_ycong.session_info_temp ajoin hive_ycong.session_url_temp b on a.session_id = b.sessionid and a.min_trackTime = b.tracktimegroup by a.session_id;

6、结果分析:日期 UV PV 登陆人数 游客人数 平均访问时长 二跳率 独立IP数

drop table if exists hive_ycong.visit_daily_temp ;create table hive_ycong.visit_daily_temp as select date,count(distinct guid) uv,sum(pv) pv,count(distinct case when user_id != '' then user_id else null end) login_users,count(distinct case when user_id = '' then guid else null end) visit_users,avg(stay_time) avg_stay_time,count(case when pv >= 2 then session_id else null end) / count(session_id) second_rate,count(distinct ip) ip_numberfrom hive_ycong.session_info where date = '20160321'group by date;

结果

  • 没有权限的用户
    grant all on *.* to root@'bigdata.eclipse.com' identified by '123456'
0 0