用python调用hive和sqoop

来源:互联网 发布:知行理工app最新版 编辑:程序博客网 时间:2024/06/14 08:33

来新公司了,要做一大坨的脚本调数据,看看用python怎么调这些东西,

#!/usr/bin/pythonimport time,osUSER="wilson.zhou"PWD="YCt452uz"URL="jdbc:postgresql://10.1.1.230:5432/xmo_dw"day = time.strftime('%Y-%m-%d',time.localtime(time.time() -24*60*60))[2::]tanx = '''insert overwrite directory '/shortdata/media_planner/{0}_tanx/'select show.adx,ip2city(show.ip),show.d,show.url,show.size,count(show.bid),count(click.bid),sum(win.price)FROM(SELECT a.ds,b.bid,b.price FROM bs_rtbwinner a LATERAL VIEW json_tuple(a.str, 'bid','price') b AS bid,price WHERE a.ds='{0}' and split(b.bid,'_')[0]='tanx') winleft join( select concat_ws('','20',a.ds) d,b.bid,split(b.bid,'_')[0] as adx,b.ip,parse_url(b.url,'HOST') url,concat_ws('x',c.w,c.h) SIZE from bs_showup a lateral view json_tuple(a.str, 'bid','ip','url','slotInfo') b as bid,ip,url,slotInfo lateral view json_tuple(b.slotInfo,'h','w') c as h ,w where a.ds='{0}' and split(b.bid,'_')[0]='tanx' and parse_url(b.url,'HOST') rlike '{3}') showon win.bid = show.bidleft join(  SELECT a.ds,b.bid FROM bs_click a LATERAL VIEW json_tuple(a.str, 'bid') b AS bid WHERE a.ds='{0}' and split(b.bid,'_')[0]='tanx')as clickON SHOW.bid = click.bidgroup by adx,ip,d,url,size"'''.format(day,day,day,'^[a-zA-Z0-9\-\.\_]+\.[a-zA-Z]{2,3}(/\S*)?\.?$')tanx_cmd = "hive -e \"add jar /opt/pig_home/Pig_script/media/iclick-hive-udf-0.0.1-SNAPSHOT.jar; create temporary function ip2city as 'com.iclick.hive.udf.getCityByIp'; "+tanxprint(tanx_cmd)os.system(tanx_cmd)try:    tanxline = "sqoop export --connect {0} --username {1} --password {2} --export-dir /shortdata/media_planner/{3}_tanx  --table media_planner_tanx --fields-terminated-by '\001'  --input-null-non-string '\\\N' --input-null-string '\\\N'  --columns adx,city,ds,url,size,impression,click,expense".format(URL,USER,PWD,day)    print("sqoop cmd="+tanxline)    os.system(tanxline)except:    print("insert database failure")


0 0
原创粉丝点击