用Python调度数据在Mysql及Hive间进行计算2例
来源:互联网 发布:北京天际网络 编辑:程序博客网 时间:2024/06/14 03:27
1、经济商务数据的按周计算
可以借鉴之处:
当前当年第几周的获取;对应周的周一和周末的日期的获取;mysql文本数据的装载。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/ouyjxQuery.py
2、idfa与日志比对进行激活及充值数据计算
可以借鉴之处:
特定目录下文件名的获取;Hive自动建表及分区的自动添加;根据特定目录下的文件自动装载数据到Hive分区;装载文本数据到Mysql并设定装载时的字符集。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/idfaQuery.py
可以借鉴之处:
当前当年第几周的获取;对应周的周一和周末的日期的获取;mysql文本数据的装载。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/ouyjxQuery.py
# -*- coding=utf-8 -*-import datetimeimport osimport warningsimport sysreload(sys)sys.setdefaultencoding('utf8')warnings.filterwarnings("ignore")def getNowYearLastWeek(): # 当前时间年第几周的计算 timenow = datetime.datetime.now() - datetime.timedelta(days=7) NowYearWeek = timenow.isocalendar() return str(NowYearWeek[0])+"#"+str(NowYearWeek[1])def getWeekFristLastDay(weekflag): yearnum = weekflag[0:4] # 取到年份 weeknum = weekflag[5:7] # 取到周 stryearstart = yearnum + '0101' # 当年第一天 yearstart = datetime.datetime.strptime(stryearstart, '%Y%m%d') # 格式化为日期格式 yearstartcalendarmsg = yearstart.isocalendar() # 当年第一天的周信息 yearstartweekday = yearstartcalendarmsg[2] yearstartyear = yearstartcalendarmsg[0] if yearstartyear < int(yearnum): daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 1) * 7 else: daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 2) * 7 week1day = (yearstart + datetime.timedelta(days=daydelat)).date().strftime('%Y-%m-%d') week7day = (yearstart + datetime.timedelta(days=daydelat + 6)).date().strftime('%Y-%m-%d') return week1day, week7daydef dataQuery(): lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0] lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1] yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') # 计算对应数据的充值情况 向目录路径写进结果数据 os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e "drop table if exists xxx_temp_tab_user_info; \ create table xxx_temp_tab_user_info as \ select a2.nickname,a1.id room_id,a2.uid \ from oss_room_v2 a1 \ left join oss_chushou_user_profile a2 on a1.creator_uid=a2.uid \ where a1.pt_day='{yesterday}' and a2.pt_day='{yesterday}' \ and a1.id in(21785,39849,3755657,54321,66629,18920,1180100,54001,55003,55001,41621,20185,55888,66601,52065) \ ; \ drop table if exists xxx_temp_tab_addsubscriber; \ create table xxx_temp_tab_addsubscriber as \ select a.room_id,a.uid,a.state,substr(a.created_time,1,10) subscriber_day,case when substr(a.created_time,1,10)=substr(b.created_time,1,10) and a.state=0 then 1 else 0 end newuser_addsubscriber_flag \ from oss_room_subscriber_roomid a \ left join oss_chushou_user_profile b on a.uid=b.uid \ where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=0 and substr(a.created_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \ drop table if exists xxx_temp_tab_cancelsubscriber; \ create table xxx_temp_tab_cancelsubscriber as \ select a.room_id,a.uid,a.state,substr(a.updated_time,1,10) cancel_subscriber_day,case when substr(a.updated_time,1,10)=substr(b.created_time,1,10) and a.state=-1 then 1 else 0 end newuser_cancel_subscriber_flag \ from oss_room_subscriber_roomid a \ left join oss_chushou_user_profile b on a.uid=b.uid \ where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=-1 and substr(a.updated_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \ drop table if exists xxx_temp_tab_frist_subscriber; \ create table xxx_temp_tab_frist_subscriber as \ select room_id,uid,state,created_time \ from (select room_id,uid,state,created_time,row_number()over(partition by uid order by created_time) rk \ from (select room_id,uid,state,created_time from oss_room_subscriber_roomid where pt_day='{yesterday}') x) xx \ where rk=1; \ drop table if exists xx0809_newadd_user_addsubscriber; \ create table xx0809_newadd_user_addsubscriber as \ select a1.nickname,a1.room_id,a1.uid, \ a2.subscriber_day,a2.newuser_addsubscriber_flag,case when a3.room_id is not null then 1 else 0 end newuser_addsubscriber_frist_flag, \ a2.uid addsubscriber_uid \ from xxx_temp_tab_user_info a1 \ left join xxx_temp_tab_addsubscriber a2 on a1.room_id=a2.room_id \ left join xxx_temp_tab_frist_subscriber a3 on a2.room_id=a3.room_id and a2.uid=a3.uid \ ; \ drop table if exists xx0809_newadd_user_cancelsubscriber; \ create table xx0809_newadd_user_cancelsubscriber as \ select a1.nickname,a1.room_id,a1.uid, \ a3.cancel_subscriber_day,a3.newuser_cancel_subscriber_flag, \ a3.uid cancel_subscriber_uid \ from xxx_temp_tab_user_info a1 \ left join xxx_temp_tab_cancelsubscriber a3 on a1.room_id=a3.room_id \ ; \ drop table if exists xx0809_ouy_result_addsubscriber; \ create table xx0809_ouy_result_addsubscriber as \ select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day, \ count(newuser_addsubscriber_flag) total_addsubscriber_flag, \ sum(newuser_addsubscriber_flag) newuser_addsubscriber_cnt, \ sum(newuser_addsubscriber_frist_flag) newuser_addsubscriber_frist_flag, \ sum(case when a1.newuser_addsubscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \ sum(case when a1.newuser_addsubscriber_flag=1 then amount else 0 end) pay_amount, \ a4.rk \ from xx0809_newadd_user_addsubscriber a1 \ left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.addsubscriber_uid=int(a2.uid) and a1.subscriber_day=a2.pt_day \ left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.addsubscriber_uid=int(a3.uid) and a1.subscriber_day=a3.pt_day \ left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.subscriber_day=a4.pt_day \ group by a1.nickname,a1.room_id,a1.uid,a1.subscriber_day,a4.rk; \ drop table if exists xx0809_ouy_result_cancelsubscriber; \ create table xx0809_ouy_result_cancelsubscriber as \ select a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day, \ count(newuser_cancel_subscriber_flag) total_cancelsubscriber_flag, \ sum(newuser_cancel_subscriber_flag) newuser_cancelsubscriber_cnt, \ sum(case when a1.newuser_cancel_subscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \ sum(case when a1.newuser_cancel_subscriber_flag=1 then amount else 0 end) pay_amount, \ a4.rk \ from xx0809_newadd_user_cancelsubscriber a1 \ left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.cancel_subscriber_uid=int(a2.uid) and a1.cancel_subscriber_day=a2.pt_day \ left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.cancel_subscriber_uid=int(a3.uid) and a1.cancel_subscriber_day=a3.pt_day \ left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.cancel_subscriber_day=a4.pt_day \ group by a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day,a4.rk; \ " """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday, yesterday=yesterday));def resultLoadMysql(): lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0] lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1] os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day data_day,a1.total_addsubscriber_flag total_addsubscriber_cnt,a1.newuser_addsubscriber_cnt,newuser_addsubscriber_frist_flag newuser_addsubscriber_frist_cnt,a1.message_send_in_thisRoom,a1.pay_amount,a2.total_cancelsubscriber_flag total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,a1.rk \ from xx0809_ouy_result_addsubscriber a1 \ left join xx0809_ouy_result_cancelsubscriber a2 on a1.room_id=a2.room_id and a1.uid=a2.uid and a1.subscriber_day=a2.cancel_subscriber_day \ order by a1.room_id,data_day;" > /home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt""") os.system("""source /etc/profile; \ /usr/bin/mysql -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass -e "use funnyai_data; \ delete from funnyai_data.static_ouy_byday_everweek where data_day between '{lastWeekMonday}' and '{lastWeekSunday}'; \ load data local infile '/home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt' ignore into table funnyai_data.static_ouy_byday_everweek character set utf8 (nickname,room_id,uid,data_day,total_addsubscriber_cnt,newuser_addsubscriber_cnt,newuser_addsubscriber_frist_cnt,message_send_in_thisRoom,pay_amount,total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,rk); \ " """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday))# fields terminated by '\\t' enclosed by '"' lines terminated by '\\n'# Batch TestdataQuery()resultLoadMysql()
2、idfa与日志比对进行激活及充值数据计算
可以借鉴之处:
特定目录下文件名的获取;Hive自动建表及分区的自动添加;根据特定目录下的文件自动装载数据到Hive分区;装载文本数据到Mysql并设定装载时的字符集。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/idfaQuery.py
# -*- coding=utf-8 -*-import osimport warningsimport datetimeimport sysreload(sys)sys.setdefaultencoding('utf8')warnings.filterwarnings("ignore")def file_path(file_dir): for root, dirs, files in os.walk(file_dir): # print(root) # print(dirs) return (files)def partitionAdd_DataLoad(): src_file_dir = '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData' partitionAddDataLoadAllSql = " \ drop table if exists xxxauto_idfa; \ CREATE TABLE xxxauto_idfa (active_time string, \ campaign_id string, \ tdid string, \ idfa string, \ android_id string, \ advertiser_id string, \ active_ip string, \ active_type string, \ device_type string, \ click_ip string, \ click_time string, \ creative_id string, \ keyword_id string, \ imei string) \ PARTITIONED BY (class string) \ ROW FORMAT DELIMITED \ FIELDS TERMINATED BY ',' \ LINES TERMINATED BY '\n' \ STORED AS TEXTFILE;" for srcFile in file_path(src_file_dir): srcFileKey = srcFile.replace('.csv', '') partitionAddDataLoadSql = "alter table xxxauto_idfa add partition (class='{srcFileKey}'); \ load data local inpath '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData/{srcFileKey}.csv' overwrite into table xxxauto_idfa partition(class='{srcFileKey}');".format(srcFileKey=srcFileKey) partitionAddDataLoadAllSql = partitionAddDataLoadAllSql + partitionAddDataLoadSql # 装载数据 os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ {partitionAddDataLoadAllSql} " \ """.format(partitionAddDataLoadAllSql=partitionAddDataLoadAllSql))def dataQuery(dateStart, dateEnd, batchFlag): batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d') # 计算对应数据的充值情况 向目录路径写进结果数据 os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ add jar /home/mysqlUser/nisj/udf-jar/mysqlUser_udf_radixChange.jar; \ create temporary function RadixChange as 'com.kascend.mysqlUser.RadixChange'; \ with tab_idfa_uid as (select distinct a1.idfa,RadixChange(lower(a1.uid),16,10) uid,a2.class,a1.pt_day \ from (select distinct x1.uid,x2.idfa,x1.pt_day \ from (select distinct identifier,uid,pt_day from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \ inner join (select distinct identifier,idfa,pt_day from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier and x1.pt_day=x2.pt_day) a1 \ inner join xxxauto_idfa a2 on a1.idfa=a2.idfa and a1.pt_day=substr(a2.active_time,1,10)) \ select '{batchFlag}' batchFlag,a1.pt_day,a2.class,sum(a1.amount) pay_amount,count(distinct a1.uid) pay_cnt \ from data_chushou_pay_info a1 \ inner join tab_idfa_uid a2 on a1.uid=a2.uid \ where a1.state=0 and a1.pt_day between '{dateStart}' and '{dateEnd}' and a2.idfa<>'00000000-0000-0000-0000-000000000000' \ and a1.pt_day>=a2.pt_day \ group by a1.pt_day,a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag)); # 与日志文件进行比对 os.system("""source /etc/profile; \ /usr/lib/hive-current/bin/hive -e " \ with tab_idfa_uid as (select distinct x1.uid,x2.idfa,x1.identifier \ from (select distinct identifier,uid from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \ inner join (select distinct identifier,idfa from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier \ ) \ select '{batchFlag}' batchFlag,a2.class,count(distinct a2.idfa) valid_idfa_inexcel_cnt,count(distinct a1.idfa) relation_idfa_inlog_cnt,count(distinct a1.uid) relation_uid_inlog_cnt,count(distinct a1.identifier) relation_identifier_inlog_cnt \ from xxxauto_idfa a2 \ left join tab_idfa_uid a1 on a1.idfa=a2.idfa \ where a2.idfa<>'00000000-0000-0000-0000-000000000000' \ group by a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag));def resultLoadMysql(batchFlag): batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d') os.system("""source /etc/profile; \ /usr/bin/mysql -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass --default-character-set=utf8 -e "use funnyai_data; \ delete from funnyai_data.static_idfa_pay_info where batchFlag = '{batchFlag}'; \ delete from funnyai_data.static_idfa_relation_info where batchFlag = '{batchFlag}'; \ load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt' ignore into table funnyai_data.static_idfa_pay_info character set utf8 (batchFlag,pt_day,class,pay_amount,pay_uid_cnt); \ load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt' ignore into table funnyai_data.static_idfa_relation_info character set utf8 (batchFlag,class,valid_idfa_inexcel_cnt,relation_idfa_inlog_cnt,relation_uid_inlog_cnt,relation_identifier_inlog_cnt); \ " """.format(batchFlag=batchFlag))dateStart = '2017-08-14'dateEnd = '2017-08-20'batchFlag = 'ChenM比对(2017-08-14~2017-08-20)'partitionAdd_DataLoad()dataQuery(dateStart, dateEnd, batchFlag)resultLoadMysql(batchFlag)
阅读全文
0 0
- 用Python调度数据在Mysql及Hive间进行计算2例
- 用Python调度数据在Mysql及Hive间进行计算2例(续)-idfa与日志比对进行留存充值数据计算
- 用Python在Mysql与Hive间数据计算一例
- 用Python在Mysql与Hive间数据计算一例(续)
- Hive元数据在MySQL的存储逻辑及关系
- 使用Python脚本从Hive中取数据计算后加载到Mysql示例
- 在Python中对MySQL中的数据进行可视化
- 在Python中对MySQL中的数据进行可视化
- OOzie调度sqoop1 Action 从mysql导入数据到hive
- python脚本 用sqoop把mysql数据导入hive
- hive存储元数据在mysql配置
- 《利用python进行数据计算》第六章
- hive 元数据 mysql-2
- mysql 在当前表中对某一列数据进行计算后替换
- Hadoop数据经Hive汇总计算之后导出到Mysql
- Java api 调用Sqoop2进行MySQL-->Hive的数据同步
- 在HortonWorks HDP 2.1 和2.2 集群间进行数据迁移 包括(Hive数据表)
- 在Impala 和Hive里进行数据分区(1)
- 过滤器、监听器、拦截器的区别
- 多条目加载
- Simple Strings CodeForces
- NSData NSString格式相互转换
- 安卓四大组件之activity笔记
- 用Python调度数据在Mysql及Hive间进行计算2例
- python数据分析
- three.js加载obj模型
- python with语句
- 一行代码实现Okhttp,Retrofit,Glide下载上传进度监听
- 中式装修木雕花格有哪些特点, 中式装修木雕花格寓意
- configure配置安装详解
- android开发,如何实现通过上拉由一个activity切换到另一个activity
- umeng(友盟)实现第三方登录和分享详解之分享