用Python调度数据在Mysql及Hive间进行计算2例

来源:互联网 发布:北京天际网络 编辑:程序博客网 时间:2024/06/14 03:27
1、经济商务数据的按周计算
可以借鉴之处:
当前当年第几周的获取;对应周的周一和周末的日期的获取;mysql文本数据的装载。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/ouyjxQuery.py
# -*- coding=utf-8 -*-import datetimeimport osimport warningsimport sysreload(sys)sys.setdefaultencoding('utf8')warnings.filterwarnings("ignore")def getNowYearLastWeek():    # 当前时间年第几周的计算    timenow = datetime.datetime.now() - datetime.timedelta(days=7)    NowYearWeek = timenow.isocalendar()    return str(NowYearWeek[0])+"#"+str(NowYearWeek[1])def getWeekFristLastDay(weekflag):    yearnum = weekflag[0:4]  # 取到年份    weeknum = weekflag[5:7]  # 取到周    stryearstart = yearnum + '0101'  # 当年第一天    yearstart = datetime.datetime.strptime(stryearstart, '%Y%m%d')  # 格式化为日期格式    yearstartcalendarmsg = yearstart.isocalendar()  # 当年第一天的周信息    yearstartweekday = yearstartcalendarmsg[2]    yearstartyear = yearstartcalendarmsg[0]    if yearstartyear < int(yearnum):        daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 1) * 7    else:        daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 2) * 7    week1day = (yearstart + datetime.timedelta(days=daydelat)).date().strftime('%Y-%m-%d')    week7day = (yearstart + datetime.timedelta(days=daydelat + 6)).date().strftime('%Y-%m-%d')    return week1day, week7daydef dataQuery():    lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0]    lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1]    yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')    # 计算对应数据的充值情况 向目录路径写进结果数据    os.system("""source /etc/profile; \                /usr/lib/hive-current/bin/hive -e "drop table if exists xxx_temp_tab_user_info; \                create table xxx_temp_tab_user_info as \                select a2.nickname,a1.id room_id,a2.uid \                from oss_room_v2 a1 \                left join oss_chushou_user_profile a2 on a1.creator_uid=a2.uid \                where a1.pt_day='{yesterday}' and a2.pt_day='{yesterday}' \                and a1.id in(21785,39849,3755657,54321,66629,18920,1180100,54001,55003,55001,41621,20185,55888,66601,52065) \                ; \                drop table if exists xxx_temp_tab_addsubscriber; \                create table xxx_temp_tab_addsubscriber as \                select a.room_id,a.uid,a.state,substr(a.created_time,1,10) subscriber_day,case when substr(a.created_time,1,10)=substr(b.created_time,1,10) and a.state=0 then 1 else 0 end newuser_addsubscriber_flag \                from oss_room_subscriber_roomid a \                left join oss_chushou_user_profile b on a.uid=b.uid \                where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=0 and substr(a.created_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \                drop table if exists xxx_temp_tab_cancelsubscriber; \                create table xxx_temp_tab_cancelsubscriber as \                select a.room_id,a.uid,a.state,substr(a.updated_time,1,10) cancel_subscriber_day,case when substr(a.updated_time,1,10)=substr(b.created_time,1,10) and a.state=-1 then 1 else 0 end newuser_cancel_subscriber_flag \                from oss_room_subscriber_roomid a \                left join oss_chushou_user_profile b on a.uid=b.uid \                where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=-1 and substr(a.updated_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \                drop table if exists xxx_temp_tab_frist_subscriber; \                create table xxx_temp_tab_frist_subscriber as \                select room_id,uid,state,created_time \                from (select room_id,uid,state,created_time,row_number()over(partition by uid order by created_time) rk \                from (select room_id,uid,state,created_time from oss_room_subscriber_roomid where pt_day='{yesterday}') x) xx \                where rk=1; \                drop table if exists xx0809_newadd_user_addsubscriber; \                create table xx0809_newadd_user_addsubscriber as \                select a1.nickname,a1.room_id,a1.uid, \                a2.subscriber_day,a2.newuser_addsubscriber_flag,case when a3.room_id is not null then 1 else 0 end newuser_addsubscriber_frist_flag, \                a2.uid addsubscriber_uid \                from xxx_temp_tab_user_info a1 \                left join xxx_temp_tab_addsubscriber a2 on a1.room_id=a2.room_id \                left join xxx_temp_tab_frist_subscriber a3 on a2.room_id=a3.room_id and a2.uid=a3.uid \                ; \                drop table if exists xx0809_newadd_user_cancelsubscriber; \                create table xx0809_newadd_user_cancelsubscriber as \                select a1.nickname,a1.room_id,a1.uid, \                a3.cancel_subscriber_day,a3.newuser_cancel_subscriber_flag, \                a3.uid cancel_subscriber_uid \                from xxx_temp_tab_user_info a1 \                left join xxx_temp_tab_cancelsubscriber a3 on a1.room_id=a3.room_id \                ; \                drop table if exists xx0809_ouy_result_addsubscriber; \                create table xx0809_ouy_result_addsubscriber as \                select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day, \                count(newuser_addsubscriber_flag) total_addsubscriber_flag,  \                sum(newuser_addsubscriber_flag) newuser_addsubscriber_cnt, \                sum(newuser_addsubscriber_frist_flag) newuser_addsubscriber_frist_flag, \                sum(case when a1.newuser_addsubscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \                sum(case when a1.newuser_addsubscriber_flag=1 then amount else 0 end) pay_amount, \                a4.rk \                from xx0809_newadd_user_addsubscriber a1 \                left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.addsubscriber_uid=int(a2.uid) and a1.subscriber_day=a2.pt_day \                left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.addsubscriber_uid=int(a3.uid) and a1.subscriber_day=a3.pt_day \                left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.subscriber_day=a4.pt_day \                group by a1.nickname,a1.room_id,a1.uid,a1.subscriber_day,a4.rk; \                drop table if exists xx0809_ouy_result_cancelsubscriber; \                create table xx0809_ouy_result_cancelsubscriber as \                select a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day, \                count(newuser_cancel_subscriber_flag) total_cancelsubscriber_flag,  \                sum(newuser_cancel_subscriber_flag) newuser_cancelsubscriber_cnt, \                sum(case when a1.newuser_cancel_subscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \                sum(case when a1.newuser_cancel_subscriber_flag=1 then amount else 0 end) pay_amount, \                a4.rk \                from xx0809_newadd_user_cancelsubscriber a1 \                left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.cancel_subscriber_uid=int(a2.uid) and a1.cancel_subscriber_day=a2.pt_day \                left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.cancel_subscriber_uid=int(a3.uid) and a1.cancel_subscriber_day=a3.pt_day \                left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.cancel_subscriber_day=a4.pt_day \                group by a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day,a4.rk; \                " """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday, yesterday=yesterday));def resultLoadMysql():    lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0]    lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1]    os.system("""source /etc/profile; \                /usr/lib/hive-current/bin/hive -e " \                select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day data_day,a1.total_addsubscriber_flag total_addsubscriber_cnt,a1.newuser_addsubscriber_cnt,newuser_addsubscriber_frist_flag newuser_addsubscriber_frist_cnt,a1.message_send_in_thisRoom,a1.pay_amount,a2.total_cancelsubscriber_flag total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,a1.rk \                from xx0809_ouy_result_addsubscriber a1 \                left join xx0809_ouy_result_cancelsubscriber a2 on a1.room_id=a2.room_id and a1.uid=a2.uid and a1.subscriber_day=a2.cancel_subscriber_day \                order by a1.room_id,data_day;" > /home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt""")    os.system("""source /etc/profile; \                 /usr/bin/mysql  -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass -e "use funnyai_data; \                 delete from funnyai_data.static_ouy_byday_everweek where data_day between '{lastWeekMonday}' and '{lastWeekSunday}'; \                 load data local infile '/home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt' ignore into table funnyai_data.static_ouy_byday_everweek character set utf8  (nickname,room_id,uid,data_day,total_addsubscriber_cnt,newuser_addsubscriber_cnt,newuser_addsubscriber_frist_cnt,message_send_in_thisRoom,pay_amount,total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,rk); \                 " """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday))# fields terminated by '\\t' enclosed by '"' lines terminated by '\\n'# Batch TestdataQuery()resultLoadMysql()

2、idfa与日志比对进行激活及充值数据计算
可以借鉴之处:
特定目录下文件名的获取;Hive自动建表及分区的自动添加;根据特定目录下的文件自动装载数据到Hive分区;装载文本数据到Mysql并设定装载时的字符集。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/idfaQuery.py
# -*- coding=utf-8 -*-import osimport warningsimport datetimeimport sysreload(sys)sys.setdefaultencoding('utf8')warnings.filterwarnings("ignore")def file_path(file_dir):    for root, dirs, files in os.walk(file_dir):        # print(root)        # print(dirs)        return (files)def partitionAdd_DataLoad():    src_file_dir = '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData'    partitionAddDataLoadAllSql = " \    drop table if exists xxxauto_idfa; \    CREATE TABLE xxxauto_idfa (active_time string, \    campaign_id string, \    tdid string, \    idfa string, \    android_id string, \    advertiser_id string, \    active_ip string, \    active_type string, \    device_type string, \    click_ip string, \    click_time string, \    creative_id string, \    keyword_id string, \    imei string) \    PARTITIONED BY (class string) \    ROW FORMAT DELIMITED \    FIELDS TERMINATED BY ','  \    LINES TERMINATED BY '\n' \    STORED AS TEXTFILE;"    for srcFile in file_path(src_file_dir):        srcFileKey = srcFile.replace('.csv', '')        partitionAddDataLoadSql = "alter table xxxauto_idfa add partition (class='{srcFileKey}'); \        load data local inpath '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData/{srcFileKey}.csv' overwrite into table xxxauto_idfa partition(class='{srcFileKey}');".format(srcFileKey=srcFileKey)        partitionAddDataLoadAllSql = partitionAddDataLoadAllSql + partitionAddDataLoadSql    # 装载数据    os.system("""source /etc/profile; \                /usr/lib/hive-current/bin/hive -e " \                {partitionAddDataLoadAllSql}                " \                """.format(partitionAddDataLoadAllSql=partitionAddDataLoadAllSql))def dataQuery(dateStart, dateEnd, batchFlag):    batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d')    # 计算对应数据的充值情况 向目录路径写进结果数据    os.system("""source /etc/profile; \                /usr/lib/hive-current/bin/hive -e " \                add jar /home/mysqlUser/nisj/udf-jar/mysqlUser_udf_radixChange.jar; \                create temporary function RadixChange as 'com.kascend.mysqlUser.RadixChange'; \                with tab_idfa_uid as (select distinct a1.idfa,RadixChange(lower(a1.uid),16,10) uid,a2.class,a1.pt_day \                from (select distinct x1.uid,x2.idfa,x1.pt_day \                from (select distinct identifier,uid,pt_day from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \                inner join (select distinct identifier,idfa,pt_day from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier and x1.pt_day=x2.pt_day) a1 \                inner join xxxauto_idfa a2 on a1.idfa=a2.idfa and a1.pt_day=substr(a2.active_time,1,10)) \                select '{batchFlag}' batchFlag,a1.pt_day,a2.class,sum(a1.amount) pay_amount,count(distinct a1.uid) pay_cnt \                from data_chushou_pay_info a1 \                inner join tab_idfa_uid a2 on a1.uid=a2.uid \                where a1.state=0 and a1.pt_day between '{dateStart}' and '{dateEnd}' and a2.idfa<>'00000000-0000-0000-0000-000000000000' \                  and a1.pt_day>=a2.pt_day \                group by a1.pt_day,a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag));    # 与日志文件进行比对    os.system("""source /etc/profile; \                /usr/lib/hive-current/bin/hive -e " \                with tab_idfa_uid as (select distinct x1.uid,x2.idfa,x1.identifier \                from (select distinct identifier,uid from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \                inner join (select distinct identifier,idfa from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier \                ) \                select '{batchFlag}' batchFlag,a2.class,count(distinct a2.idfa) valid_idfa_inexcel_cnt,count(distinct a1.idfa) relation_idfa_inlog_cnt,count(distinct a1.uid) relation_uid_inlog_cnt,count(distinct a1.identifier) relation_identifier_inlog_cnt \                from xxxauto_idfa a2 \                left join tab_idfa_uid a1 on a1.idfa=a2.idfa \                where a2.idfa<>'00000000-0000-0000-0000-000000000000' \                group by a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag));def resultLoadMysql(batchFlag):    batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d')    os.system("""source /etc/profile; \                 /usr/bin/mysql  -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass --default-character-set=utf8 -e "use funnyai_data; \                 delete from funnyai_data.static_idfa_pay_info where batchFlag = '{batchFlag}'; \                 delete from funnyai_data.static_idfa_relation_info where batchFlag = '{batchFlag}'; \                 load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt' ignore into table funnyai_data.static_idfa_pay_info character set utf8  (batchFlag,pt_day,class,pay_amount,pay_uid_cnt); \                 load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt' ignore into table funnyai_data.static_idfa_relation_info character set utf8  (batchFlag,class,valid_idfa_inexcel_cnt,relation_idfa_inlog_cnt,relation_uid_inlog_cnt,relation_identifier_inlog_cnt); \                 " """.format(batchFlag=batchFlag))dateStart = '2017-08-14'dateEnd = '2017-08-20'batchFlag = 'ChenM比对(2017-08-14~2017-08-20)'partitionAdd_DataLoad()dataQuery(dateStart, dateEnd, batchFlag)resultLoadMysql(batchFlag)

阅读全文
0 0