用户行为分析业务系统日志处理方案
来源:互联网 发布:新加坡 经济形势 数据 编辑:程序博客网 时间:2024/05/25 18:12
1、日志要进入的目标表结构信息
1.1 Hive库上的目标表结构信息
CREATE TABLE `yemao_log`( `id` int, `time` int, `url_from` string, `url_current` string, `url_to` string, `options` string, `uid` int, `new_visitor` string, `province` string, `city` string, `site` string, `device` string, `phone` string, `token` string, `dorm` string, `order_phone` string, `order_dormitory` string, `order_amount` string, `order_id` int, `uname` string, `site_id` int, `address` string, `dorm_id` int, `dormentry_id` int, `rid` int, `cart_quantity` string)PARTITIONED BY ( `log_date` int)ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'LOCATION 'hdfs://Master:9000/user/hive/warehouse/yemao_log'TBLPROPERTIES ( 'transient_lastDdlTime'='1447308813');
1.2 Mysql库上当前表,其实就是一个临时表
CREATE TABLE `yemao_log` ( `id` varchar(8000) DEFAULT NULL, `time` varchar(8000) DEFAULT NULL, `url_from` text, `url_current` text, `url_to` text, `options` text, `uid` text, `new_visitor` text, `province` text, `city` text, `site` text, `device` text, `phone` text, `token` text, `dorm` text, `order_phone` text, `order_dormitory` text, `order_amount` text, `order_id` text, `uname` text, `site_id` text, `address` text, `dorm_id` text, `dormentry_id` text, `rid` text, `cart_quantity` text) ENGINE=InnoDB DEFAULT CHARSET=utf8;
1.3 Mysql库上历史表,其实就是实际在用的表
CREATE TABLE `yemao_loghis` ( `id` varchar(8000) DEFAULT NULL, `time` varchar(8000) DEFAULT NULL, `url_from` text, `url_current` text, `url_to` text, `options` text, `uid` text, `new_visitor` text, `province` text, `city` text, `site` text, `device` text, `phone` text, `token` text, `dorm` text, `order_phone` text, `order_dormitory` text, `order_amount` text, `order_id` text, `uname` text, `site_id` text, `address` text, `dorm_id` text, `dormentry_id` text, `rid` text, `cart_quantity` text, `log_date` int(11) DEFAULT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8/*!50100 PARTITION BY LIST (log_date)(PARTITION p0 VALUES IN (0) ENGINE = InnoDB, PARTITION p20151109 VALUES IN (20151109) ENGINE = InnoDB, PARTITION p20151110 VALUES IN (20151110) ENGINE = InnoDB, PARTITION p20151111 VALUES IN (20151111) ENGINE = InnoDB, PARTITION p20151112 VALUES IN (20151112) ENGINE = InnoDB, PARTITION p20151113 VALUES IN (20151113) ENGINE = InnoDB, PARTITION p20151114 VALUES IN (20151114) ENGINE = InnoDB, PARTITION p20151115 VALUES IN (20151115) ENGINE = InnoDB, PARTITION p20151116 VALUES IN (20151116) ENGINE = InnoDB, PARTITION p20151117 VALUES IN (20151117) ENGINE = InnoDB, PARTITION p20151118 VALUES IN (20151118) ENGINE = InnoDB, PARTITION p20151119 VALUES IN (20151119) ENGINE = InnoDB, PARTITION p20151120 VALUES IN (20151120) ENGINE = InnoDB, PARTITION p20151121 VALUES IN (20151121) ENGINE = InnoDB, PARTITION p20151122 VALUES IN (20151122) ENGINE = InnoDB, PARTITION p20151123 VALUES IN (20151123) ENGINE = InnoDB, PARTITION p20151124 VALUES IN (20151124) ENGINE = InnoDB, PARTITION p20151125 VALUES IN (20151125) ENGINE = InnoDB, PARTITION p20151126 VALUES IN (20151126) ENGINE = InnoDB, PARTITION p20151127 VALUES IN (20151127) ENGINE = InnoDB, PARTITION p20151128 VALUES IN (20151128) ENGINE = InnoDB, PARTITION p20151129 VALUES IN (20151129) ENGINE = InnoDB, PARTITION p20151130 VALUES IN (20151130) ENGINE = InnoDB, PARTITION p20151201 VALUES IN (20151201) ENGINE = InnoDB, PARTITION p20151202 VALUES IN (20151202) ENGINE = InnoDB, PARTITION p20151203 VALUES IN (20151203) ENGINE = InnoDB) */;
2、数据处理的存储过程
CREATE DEFINER=`datahs`@`%` PROCEDURE `p_ymlog_maintain`(IN `v_log_date` int)BEGINDECLAREv_partition_exists INT;SELECTcount(1) INTO v_partition_existsFROMinformation_schema.`PARTITIONS`WHERETABLE_SCHEMA = 'logdata'AND table_name = 'yemao_loghis'AND partition_name = concat('p',v_log_date);IF v_partition_exists = 1 THENSET @exec_sql=concat("ALTER TABLE logdata.yemao_loghis DROP PARTITION p",v_log_date);PREPARE stmt FROM @exec_sql; EXECUTE stmt; END IF;SET @exec_sql=concat("ALTER TABLE logdata.yemao_loghis ADD PARTITION (PARTITION p",v_log_date,"VALUES IN (",v_log_date,"));");PREPARE stmt FROM @exec_sql; EXECUTE stmt; SET @exec_sql=concat("INSERT INTO logdata.yemao_loghis (id,time,url_from,url_current,url_to,OPTIONS,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity,log_date) SELECTa.id,a.time,a.url_from,a.url_current,a.url_to,a. OPTIONS,a.uid,a.new_visitor,a.province,a.city,a.site,a.device,a.phone,a.token,a.dorm,a.order_phone,a.order_dormitory,a.order_amount,a.order_id,a.uname,a.site_id,a.address,a.dorm_id,a.dormentry_id,a.rid,a.cart_quantity, ",v_log_date," log_dateFROMlogdata.yemao_log aWHEREid <> 'id';");PREPARE stmt FROM @exec_sql; EXECUTE stmt; TRUNCATE TABLE logdata.yemao_log;END
3、数据处理及装载的Shell脚本
定时自动调度脚本
定时自动调度脚本
ymlog_proc.sh
#/bin/bashexport yesterday=`date -d last-day +%Y%m%d`cd /home/spark/opt/Log_Data/yemaofor tar in yemao*$yesterday.tar.gz; dotar xvf $tar;grep -h "\[{.*}\]" *.log >> yemaolog;rm -rf /home/spark/opt/Log_Data/yemao/*.logdonesed -i 's/^.//' yemaologsed -i 's/..$//' yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoimport -d yemao -c yemao_log_$yesterday --drop /home/spark/opt/Log_Data/yemao/yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoexport -d yemao -c yemao_log_$yesterday --csv -f id,time,url_from,url_current,url_to,options,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity -o /home/spark/opt/Log_Data/yemao/yemao.dat/home/spark/opt/hive-1.2.1/bin/hive -e "alter table yemao_log drop if exists partition (log_date=$yesterday);alter table yemao_log add if not exists partition (log_date=$yesterday);load data local inpath '/home/spark/opt/Log_Data/yemao/yemao.dat' into table yemao_log partition (log_date=$yesterday);"#/usr/local/mysql/bin/mysqlimport -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 --fields-terminated-by=, --lines-terminated-by=\n logdata /home/spark/opt/Log_Data/yemao/yemao.dat --local/home/spark/opt/sqoop-1.4.6/bin/sqoop export --connect jdbc:mysql://120.55.189.188:3306/logdata --username datawarehouse --password datawarehouse2015 --table yemao_log --export-dir /user/hive/warehouse/yemao_log/log_date=$yesterday --input-fields-terminated-by ',';/usr/local/mysql/bin/mysql -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 -e "call logdata.p_ymlog_maintain($yesterday);"rm -rf /home/spark/opt/Log_Data/yemao/yemao.datrm -rf /home/spark/opt/Log_Data/yemao/yemaologrm -rf /home/spark/opt/Log_Data/yemao/yemao_log.java
手动调度处理脚本
ymlog_proc_manual.sh
#/bin/bash#export yesterday=`date -d last-day +%Y%m%d`echo -n "please enter a day for runing :"read yesterdaycd /home/spark/opt/Log_Data/yemaofor tar in yemao*$yesterday.tar.gz; dotar xvf $tar;grep -h "\[{.*}\]" *.log >> yemaolog;rm -rf /home/spark/opt/Log_Data/yemao/*.logdonesed -i 's/^.//' yemaologsed -i 's/..$//' yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoimport -d yemao -c yemao_log_$yesterday --drop /home/spark/opt/Log_Data/yemao/yemaolog/home/spark/opt/mongodb-2.7.0/bin/mongoexport -d yemao -c yemao_log_$yesterday --csv -f id,time,url_from,url_current,url_to,options,uid,new_visitor,province,city,site,device,phone,token,dorm,order_phone,order_dormitory,order_amount,order_id,uname,site_id,address,dorm_id,dormentry_id,rid,cart_quantity -o /home/spark/opt/Log_Data/yemao/yemao.dat/home/spark/opt/hive-1.2.1/bin/hive -e "alter table yemao_log drop if exists partition (log_date=$yesterday);alter table yemao_log add if not exists partition (log_date=$yesterday);load data local inpath '/home/spark/opt/Log_Data/yemao/yemao.dat' into table yemao_log partition (log_date=$yesterday);"#/usr/local/mysql/bin/mysqlimport -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 --fields-terminated-by=, --lines-terminated-by=\n logdata /home/spark/opt/Log_Data/yemao/yemao.dat --local/home/spark/opt/sqoop-1.4.6/bin/sqoop export --connect jdbc:mysql://120.55.189.188:3306/logdata --username datawarehouse --password datawarehouse2015 --table yemao_log --export-dir /user/hive/warehouse/yemao_log/log_date=$yesterday --input-fields-terminated-by ',';/usr/local/mysql/bin/mysql -h120.55.189.188 -udatawarehouse -pdatawarehouse2015 -e "call logdata.p_ymlog_maintain($yesterday);"rm -rf /home/spark/opt/Log_Data/yemao/yemao.datrm -rf /home/spark/opt/Log_Data/yemao/yemaologrm -rf /home/spark/opt/Log_Data/yemao/yemao_log.java
4、设置Corntab定时调度
[spark@Master ~]$ crontab -l
0 6 * * * sh /home/spark/opt/Log_Data/ymlog_proc.sh
[spark@Master ~]$ crontab -l
0 6 * * * sh /home/spark/opt/Log_Data/ymlog_proc.sh
5、处理流程说明
业务系统埋点产生的用户行为数据,是以Json方式进行保存和传送过来的。首先,将源日志数据进行一定程度的处理,使之成为标准的Json格式;然后将文件装载到MongoDB数据库;最后根据需要将必要字段分别装载到Hive及Mysql数据库中。
业务系统埋点产生的用户行为数据,是以Json方式进行保存和传送过来的。首先,将源日志数据进行一定程度的处理,使之成为标准的Json格式;然后将文件装载到MongoDB数据库;最后根据需要将必要字段分别装载到Hive及Mysql数据库中。
0 0
- 用户行为分析业务系统日志处理方案
- 用户行为分析系统架构
- 日志分析系统ELK(业务日志)
- 业务系统JSON日志通过python处理并导入Mysql方案
- 业务系统JSON日志通过python处理并导入Hive方案
- 系统API模块——某大型互联网企业用户上网行为日志分析系统
- 基于用户行为的数据分析与挖掘+分布式日志管理系统
- 某大型互联网企业用户上网行为日志分析系统——云计算项目实战
- 项目背景介绍——某大型互联网企业用户上网行为日志分析系统
- 002中小规模电子商务网站用户行为日志收集方案
- 如何构建用户行为分析系统?
- 推荐系统之用户行为分析
- 推荐系统之用户行为分析
- 推荐系统之用户行为分析
- 业务日志收集方案
- 用户行为分析之--apache日志分析(一)
- 用户行为分析之--apache日志分析(二)
- 云计算项目实战——某大型互联网企业用户上网行为日志分析系统 PPT下载
- ViewPage + fragement+WebView 滑动卡死
- project euler 21
- Concurrent包
- 线程使用经验
- 性能-客户端测试(二)
- 用户行为分析业务系统日志处理方案
- Python调用C/C++初步
- 一起学libcef--源码文件介绍(VS2015运行DEMO)
- 数据结构与算法(线性表_顺序表存储结构)
- project euler 22
- JAVA 栈数据结构及其应用
- asio1
- js回到顶部,关闭一个模块的方法
- IOS OC 字典