用户行为分析业务系统日志处理方案

来源:互联网 发布:新加坡 经济形势 数据 编辑:程序博客网 时间:2024/05/25 18:12
1、日志要进入的目标表结构信息
1.1 Hive库上的目标表结构信息
CREATE TABLE `yemao_log`(  `id` int,   `time` int,   `url_from` string,   `url_current` string,   `url_to` string,   `options` string,   `uid` int,   `new_visitor` string,   `province` string,   `city` string,   `site` string,   `device` string,   `phone` string,   `token` string,   `dorm` string,   `order_phone` string,   `order_dormitory` string,   `order_amount` string,   `order_id` int,   `uname` string,   `site_id` int,   `address` string,   `dorm_id` int,   `dormentry_id` int,   `rid` int,   `cart_quantity` string)PARTITIONED BY (   `log_date` int)ROW FORMAT DELIMITED   FIELDS TERMINATED BY ','   LINES TERMINATED BY '\n' STORED AS INPUTFORMAT   'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT   'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'LOCATION  'hdfs://Master:9000/user/hive/warehouse/yemao_log'TBLPROPERTIES (  'transient_lastDdlTime'='1447308813');

1.2 Mysql库上当前表,其实就是一个临时表
CREATE TABLE `yemao_log` (  `id` varchar(8000) DEFAULT NULL,  `time` varchar(8000) DEFAULT NULL,  `url_from` text,  `url_current` text,  `url_to` text,  `options` text,  `uid` text,  `new_visitor` text,  `province` text,  `city` text,  `site` text,  `device` text,  `phone` text,  `token` text,  `dorm` text,  `order_phone` text,  `order_dormitory` text,  `order_amount` text,  `order_id` text,  `uname` text,  `site_id` text,  `address` text,  `dorm_id` text,  `dormentry_id` text,  `rid` text,  `cart_quantity` text) ENGINE=InnoDB DEFAULT CHARSET=utf8;

1.3 Mysql库上历史表,其实就是实际在用的表
CREATE TABLE `yemao_loghis` (  `id` varchar(8000) DEFAULT NULL,  `time` varchar(8000) DEFAULT NULL,  `url_from` text,  `url_current` text,  `url_to` text,  `options` text,  `uid` text,  `new_visitor` text,  `province` text,  `city` text,  `site` text,  `device` text,  `phone` text,  `token` text,  `dorm` text,  `order_phone` text,  `order_dormitory` text,  `order_amount` text,  `order_id` text,  `uname` text,  `site_id` text,  `address` text,  `dorm_id` text,  `dormentry_id` text,  `rid` text,  `cart_quantity` text,  `log_date` int(11) DEFAULT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8/*!50100 PARTITION BY LIST (log_date)(PARTITION p0 VALUES IN (0) ENGINE = InnoDB, PARTITION p20151109 VALUES IN (20151109) ENGINE = InnoDB, PARTITION p20151110 VALUES IN (20151110) ENGINE = InnoDB, PARTITION p20151111 VALUES IN (20151111) ENGINE = InnoDB, PARTITION p20151112 VALUES IN (20151112) ENGINE = InnoDB, PARTITION p20151113 VALUES IN (20151113) ENGINE = InnoDB, PARTITION p20151114 VALUES IN (20151114) ENGINE = InnoDB, PARTITION p20151115 VALUES IN (20151115) ENGINE = InnoDB, PARTITION p20151116 VALUES IN (20151116) ENGINE = InnoDB, PARTITION p20151117 VALUES IN (20151117) ENGINE = InnoDB, PARTITION p20151118 VALUES IN (20151118) ENGINE = InnoDB, PARTITION p20151119 VALUES IN (20151119) ENGINE = InnoDB, PARTITION p20151120 VALUES IN (20151120) ENGINE = InnoDB, PARTITION p20151121 VALUES IN (20151121) ENGINE = InnoDB, PARTITION p20151122 VALUES IN (20151122) ENGINE = InnoDB, PARTITION p20151123 VALUES IN (20151123) ENGINE = InnoDB, PARTITION p20151124 VALUES IN (20151124) ENGINE = InnoDB, PARTITION p20151125 VALUES IN (20151125) ENGINE = InnoDB, PARTITION p20151126 VALUES IN (20151126) ENGINE = InnoDB, PARTITION p20151127 VALUES IN (20151127) ENGINE = InnoDB, PARTITION p20151128 VALUES IN (20151128) ENGINE = InnoDB, PARTITION p20151129 VALUES IN (20151129) ENGINE = InnoDB, PARTITION p20151130 VALUES IN (20151130) ENGINE = InnoDB, PARTITION p20151201 VALUES IN (20151201) ENGINE = InnoDB, PARTITION p20151202 VALUES IN (20151202) ENGINE = InnoDB, PARTITION p20151203 VALUES IN (20151203) ENGINE = InnoDB) */;

2、数据处理的存储过程
CREATE DEFINER=`datahs`@`%` PROCEDURE `p_ymlog_maintain`(IN `v_log_date` int)BEGINDECLAREv_partition_exists INT;SELECTcount(1) INTO v_partition_existsFROMinformation_schema.`PARTITIONS`WHERETABLE_SCHEMA = 'logdata'AND table_name = 'yemao_loghis'AND partition_name = concat('p',v_log_date);IF v_partition_exists = 1 THENSET @exec_sql=concat("ALTER TABLE logdata.yemao_loghis DROP PARTITION p",v_log_date);PREPARE stmt FROM @exec_sql; EXECUTE stmt; END IF;SET @exec_sql=concat("ALTER TABLE logdata.yemao_loghis ADD PARTITION (PARTITION p",v_log_date,"VALUES IN (",v_log_date,"));");PREPARE stmt FROM @exec_sql; EXECUTE stmt; SET @exec_sql=concat("INSERT INTO logdata.yemao_loghis (id,time,url_from,url_current,url_to,OPTIONS,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity,log_date) SELECTa.id,a.time,a.url_from,a.url_current,a.url_to,a. OPTIONS,a.uid,a.new_visitor,a.province,a.city,a.site,a.device,a.phone,a.token,a.dorm,a.order_phone,a.order_dormitory,a.order_amount,a.order_id,a.uname,a.site_id,a.address,a.dorm_id,a.dormentry_id,a.rid,a.cart_quantity,  ",v_log_date," log_dateFROMlogdata.yemao_log aWHEREid <> 'id';");PREPARE stmt FROM @exec_sql; EXECUTE stmt; TRUNCATE TABLE logdata.yemao_log;END

3、数据处理及装载的Shell脚本
定时自动调度脚本
ymlog_proc.sh
#/bin/bashexport yesterday=`date -d last-day +%Y%m%d`cd /home/spark/opt/Log_Data/yemaofor tar in yemao*$yesterday.tar.gz; dotar xvf $tar;grep  -h "\[{.*}\]" *.log >> yemaolog;rm -rf /home/spark/opt/Log_Data/yemao/*.logdonesed -i 's/^.//' yemaologsed -i 's/..$//' yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoimport -d yemao -c yemao_log_$yesterday --drop /home/spark/opt/Log_Data/yemao/yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoexport -d yemao -c yemao_log_$yesterday --csv -f id,time,url_from,url_current,url_to,options,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity -o /home/spark/opt/Log_Data/yemao/yemao.dat/home/spark/opt/hive-1.2.1/bin/hive -e "alter table yemao_log drop if exists partition (log_date=$yesterday);alter table yemao_log add if not exists partition (log_date=$yesterday);load data local inpath '/home/spark/opt/Log_Data/yemao/yemao.dat' into table yemao_log partition (log_date=$yesterday);"#/usr/local/mysql/bin/mysqlimport -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 --fields-terminated-by=, --lines-terminated-by=\n logdata /home/spark/opt/Log_Data/yemao/yemao.dat --local/home/spark/opt/sqoop-1.4.6/bin/sqoop export --connect jdbc:mysql://120.55.189.188:3306/logdata --username datawarehouse --password datawarehouse2015 --table yemao_log --export-dir /user/hive/warehouse/yemao_log/log_date=$yesterday --input-fields-terminated-by ',';/usr/local/mysql/bin/mysql -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 -e "call logdata.p_ymlog_maintain($yesterday);"rm -rf /home/spark/opt/Log_Data/yemao/yemao.datrm -rf /home/spark/opt/Log_Data/yemao/yemaologrm -rf /home/spark/opt/Log_Data/yemao/yemao_log.java

手动调度处理脚本
ymlog_proc_manual.sh
#/bin/bash#export yesterday=`date -d last-day +%Y%m%d`echo -n "please enter a day for runing :"read yesterdaycd /home/spark/opt/Log_Data/yemaofor tar in yemao*$yesterday.tar.gz; dotar xvf $tar;grep  -h "\[{.*}\]" *.log >> yemaolog;rm -rf /home/spark/opt/Log_Data/yemao/*.logdonesed -i 's/^.//' yemaologsed -i 's/..$//' yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoimport -d yemao -c yemao_log_$yesterday --drop /home/spark/opt/Log_Data/yemao/yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoexport -d yemao -c yemao_log_$yesterday --csv -f id,time,url_from,url_current,url_to,options,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity -o /home/spark/opt/Log_Data/yemao/yemao.dat/home/spark/opt/hive-1.2.1/bin/hive -e "alter table yemao_log drop if exists partition (log_date=$yesterday);alter table yemao_log add if not exists partition (log_date=$yesterday);load data local inpath '/home/spark/opt/Log_Data/yemao/yemao.dat' into table yemao_log partition (log_date=$yesterday);"#/usr/local/mysql/bin/mysqlimport -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 --fields-terminated-by=, --lines-terminated-by=\n logdata /home/spark/opt/Log_Data/yemao/yemao.dat --local/home/spark/opt/sqoop-1.4.6/bin/sqoop export --connect jdbc:mysql://120.55.189.188:3306/logdata --username datawarehouse --password datawarehouse2015 --table yemao_log --export-dir /user/hive/warehouse/yemao_log/log_date=$yesterday --input-fields-terminated-by ',';/usr/local/mysql/bin/mysql -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 -e "call logdata.p_ymlog_maintain($yesterday);"rm -rf /home/spark/opt/Log_Data/yemao/yemao.datrm -rf /home/spark/opt/Log_Data/yemao/yemaologrm -rf /home/spark/opt/Log_Data/yemao/yemao_log.java

4、设置Corntab定时调度
[spark@Master ~]$ crontab -l
0 6 * * * sh /home/spark/opt/Log_Data/ymlog_proc.sh

5、处理流程说明
业务系统埋点产生的用户行为数据,是以Json方式进行保存和传送过来的。首先,将源日志数据进行一定程度的处理,使之成为标准的Json格式;然后将文件装载到MongoDB数据库;最后根据需要将必要字段分别装载到Hive及Mysql数据库中。
0 0
原创粉丝点击