【python 可视化】数据透视表的用法及画饼图

来源:互联网 发布:武汉汉阳美工 编辑:程序博客网 时间:2024/06/06 17:59

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

这里写图片描述

# encoding: utf-8###################设置utf-8编码##############import sysreload(sys)sys.setdefaultencoding('utf-8')####################导入包########import pandas as pdimport numpy as npimport matplotlib.pyplot as plt##########设置中文显示from pylab import *import pandas as pdmpl.rcParams['font.sans-serif'] = ['SimHei']font_size =11 # 字体大小# fig_size = (8, 6) # 图表大小# 更新字体大小mpl.rcParams['font.size'] = font_size#################读取csv格式的数据data=pd.read_csv("C:/result.csv")################1.统计每个医生的推荐热度,进行分组(取的平均值)################################a1=pd.pivot_table(data=data,index=['doctor_name'],values=['doctor_score'],fill_value=0,aggfunc=[np.mean,len])a2=pd.DataFrame({"doctor_name":a1.index,"score_mean":a1.iloc[:,0],"num":a1.iloc[:,1]})###########对每个医生的推荐热度进行降序排序a3=a2.sort_values(by='score_mean',ascending=False)a3['percent']=(a3['num']/sum(a3['num']))#########################1、分组区间统计a5=a3[a3['score_mean']>=4.5]a5=sum(a5['num'])a6=a3[(a3['score_mean']>=4.0)&(a3['score_mean']<4.5)]a6=sum(a6['num'])a7=a3[(a3['score_mean']>=3.5)&(a3['score_mean']<4.0)]a7=sum(a7['num'])a8=a3[a3['score_mean']<3.5]a8=sum(a8['num'])# ##########################第1饼图############################data=[a5,a6,a7,a8]print datalabels=['4.5-5.0','4.0-4.5','3.5-4.0','3.5以下']cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'消费者总体认可度分布图')plt.savefig("D:/a1.png")plt.show()#######################2.将这些医生按照科室进行分类,统计每一个科室的人数,占比b1=pd.DataFrame(data.groupby(['doctor_department'])['doctor_score'].count())b2=b1.sort_values(by='doctor_score',ascending=False)b3=b2.indexdepartment=[]for each in b3:    # print str(each).replace('华西医院 ','')    department.append(str(each).replace('华西医院 ',''))b3=pd.DataFrame({'department':department,"num":b2.iloc[:,0]})b3['percent']=b3['num']/sum(b3['num'])print b3##########################第2饼图############################data=b3.head(10).iloc[:,1]labels=b3.head(10).iloc[:,0]cols = ['c','m','red','blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'top10各科室在电话咨询板块参与度分布图')plt.savefig("D:/a2.png")plt.show()########################3.统计每种职称里的医生人数,占比###############################c1=pd.DataFrame(data.groupby(['doctor_job'])['doctor_score'].count())c2=c1.sort_values(by='doctor_score',ascending=False)c3=c2.indexdoctor=[]for each in c3:    # print str(each).replace("\n",'未知')    doctor.append(str(each).replace("\n",'未知'))c4=pd.DataFrame({'doctor':doctor,"num":c2.iloc[:,0]})c4['percent']=c4['num']/sum(c4['num'])print c4##########################第3饼图############################data=c4.iloc[:,1]labels=c4.iloc[:,0]cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'每种职称下的医生人数在电话咨询板块医生整体水平分布图')plt.savefig("D:/a3.png")plt.show()#############################4将每个科室的推荐热度前3名的医生的擅长部分d1=b3['department'].head(3)d1=['华西医院 神经外科','华西医院 肝胆外科','华西医院 心血管内科']data_sort=data.sort_values(by='doctor_score',ascending=False)##################################top1第一个人的import red2=data_sort[data_sort['doctor_department']=='华西医院 神经外科']# print d2.head(3)# print d2.head(3).iloc[:,4]d3= d2.head(3).iloc[:,4]k=[]for each in d3:    print each    print re.findall('\d+',each,re.S)    k.append(re.findall('\d+',each,re.S))data3=['22', '19', '3', '1']labels=['脑肿瘤','椎管内肿瘤','听神经瘤','脑膜瘤']cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data3, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'top1华西医院 神经外科兰志刚医生擅长的类型比例分布图')plt.savefig("D:/a5.png")plt.show()#############################top2第二个人import red2=data_sort[data_sort['doctor_department']=='华西医院 肝胆外科']# print d2.head(3)# print d2.head(3).iloc[:,4]d3= d2.head(3).iloc[:,4]k=[]for each in d3:    print each    print re.findall('\d+',each,re.S)    k.append(re.findall('\d+',each,re.S))data4=['26', '10', '5', '2']labels=['肝癌','胆结石','肝血管瘤','肝病']cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'top2华西医院 肝胆外科熊先泽医生擅长的类型比例分布图')plt.savefig("D:/a6.png")plt.show()#########################top3第3个人的#########################import red2=data_sort[data_sort['doctor_department']=='华西医院 心血管内科']# print d2.head(3)# print d2.head(3).iloc[:,4]d3= d2.head(3).iloc[:,4]k=[]for each in d3:    print each    print re.findall('\d+',each,re.S)    k.append(re.findall('\d+',each,re.S))data4=['99', '5', '4', '4']labels=['冠心病','心脏介入','高血压','心脏病']cols = ['blue','yellowgreen', 'gold', 'lightskyblue', 'lightcoral','orange','pink']   #y颜色plt.axis('equal')plt.pie(data4, labels=labels, autopct='%1.1f%%',colors=cols)plt.title(u'top3华西医院 心血管内科贺勇医生擅长的类型比例分布图')plt.savefig("D:/a7.png")plt.show()
原创粉丝点击