pandas库生成数据透视表

来源:互联网 发布:文本相似度算法 java 编辑:程序博客网 时间:2024/06/02 02:00

一直在写pandas,都没记录下来,下面的语言是工作中用pandas生成了一个透视表,pandas比较灵活,具体代码如下:


#ecoding:utf-8'''Created on 2015年9月7日@author: ZHOUMEIXU204'''import MySQLdbimport  pandas  as pdimport  numpy  as npimport datetimecon=MySQLdb.connect(host="10.10.109.62", port=1333, user="zhoumeixu204", \                     passwd="zhoumeixu204@123456!", db="tracker", use_unicode=True, charset="utf8")result=pd.read_sql("select * from( \SELECT left(insert_time,8) as insert_date, option_label as position_name , count(*) as cnt  FROM tracker.hbase_event \where site = 'pc' and category = 'hp' and action = 'click' and insert_time between 20150831000000000 and 20150907000000000 \group by 1,2)a  union(SELECT left(insert_time,8) as insert_date,  position_name ,count(*) as cnt FROM tracker.hbase_campaign \where site = 'pc' and position = 'hp' \and insert_time between 20150831000000000 and 20150907000000000 group by 1,2)",con)result_pivot=pd.pivot_table(result, index=['position_name'],columns=["insert_date"],values=['cnt'],  aggfunc=np.sum)print(type(result_pivot))result_pivot[u'总计']=result_pivot.sum(axis=1)result_sum=pd.DataFrame(result_pivot.sum()).Tresult_pivot_sum=result_pivot.append(result_sum)result_pivot_sum=result_pivot_sum.rename(index={0:u'总计'})time_day=(datetime.datetime.now()-datetime.timedelta(days=1)).strftime("%Y.%m.%d")time_day_week=(datetime.datetime.now()-datetime.timedelta(days=8)).strftime("%Y.%m.%d")result='result'+time_day+"--"+time_day_week# result_pivot=result_pivot.append(result_pivot.sum(axis=0),ignore_index=True)result_pivot_sum.to_excel("D:\\Users\\zhoumeixu204\\Desktop\\%s.xls"%(result),encoding="utf-8")con.commit()con.close()print("sucess")# result_pivot=pd.pivot_table(result, index=['position_name'],columns=["insert_date"],values=['cnt'],  aggfunc=np.sum,margins=True)  有margins自动会all


0 0