Python可视化方案增加

来源:互联网 发布:php商城订单 编辑:程序博客网 时间:2024/05/29 07:38


# coding: utf-8


# In[1]:






import os
import pymysql
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns
from ggplot import *




# In[2]:


#配置MySQL


conn = pymysql.connect(host='127.0.0.1', port=3306, user='*****', passwd='*****',db='******',charset='utf8')


cur = conn.cursor()




# In[3]:


# sql语句
xybase = "SELECT  f4 as ajbh,f5 as kehu, f8 as shfzh18,f22 as ywy,f14 as zjqkje,              f15 as zjshje,f19 as jdsj from xybase where f22 is not null and f5 in ('兴业','广发')"


# 利用pandas 模块导入mysql数据
mysqlxybase = pd.read_sql(xybase, conn)






# In[4]:


# sql语句
xydzd = "SELECT f23 as ajbh,f2 as kehu, f1 as shfzh18,f18 as ywy,f7 as hkrq,f10 as hkmx,              f17 as hkbz from xydzd where f10 > 0  and f2 in ('兴业','广发')"


# 利用pandas 模块导入mysql数据
mysqlxydzd = pd.read_sql(xydzd, conn)




# In[5]:


# sql语句
mysqlprovince = "SELECT * from province"


# 利用pandas 模块导入mysql数据
province = pd.read_sql(mysqlprovince, conn)




# In[6]:


xydzd = mysqlxydzd




# In[7]:


xydzd['hkmx'] = xydzd['hkmx'].astype(float)
xydzd['hkbz'] = xydzd['hkbz'].astype(int)




# In[8]:


xydzd = xydzd[xydzd.hkbz==1]
xydzd = xydzd[xydzd.hkmx>0]




# In[9]:


xydzddata =xydzd[['ajbh','kehu','hkrq','hkmx','shfzh18']]




# In[10]:


xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()
xydzddata = xydzddata[xydzddata.shfzhnum==18]
xydzddata["bornyear"]=xydzddata["shfzh18"].str.slice(6,10)
xydzddata["sex"]=xydzddata["shfzh18"].str.get(16)
xydzddata["address"]=xydzddata["shfzh18"].str.slice(0,2)
xydzddata["shfzhnum"] = xydzddata["shfzh18"].str.len()




# In[11]:


xydzddata['year'] = xydzddata['hkrq'].str.slice(0,4).astype(int)
xydzddata['month'] = xydzddata['hkrq'].str.slice(5,7).astype(int)




# In[12]:


xydzddata["nnn"] = 1
xydzddata["bornyear"] = xydzddata["bornyear"].astype(int)
xydzddata["age"] = 2017-xydzddata["bornyear"]
xydzddata["sex"] = xydzddata["sex"].astype(int)
xydzddata["sex"][xydzddata["sex"]%2==0]='女'
xydzddata["sex"][xydzddata["sex"]!='女']='男'




# In[13]:


xydzddata['address'] = xydzddata['address'].astype(int)




# In[14]:


xydzddata = pd.merge(xydzddata,province, left_on='address', right_on='shfnum', left_index=False, right_index=False ,how='left')
xydzddata = xydzddata[['ajbh','kehu','hkrq','hkmx','shfzh18','sex','age','province','year','month','nnn']]
xydzddata = xydzddata[xydzddata.year>2015]




# In[15]:


#每个客户每月回款情况
xydzdhkmx = xydzddata['hkmx'].groupby([xydzddata['kehu'], xydzddata['year'], xydzddata['month']]).sum().reset_index()




# In[16]:


#每个月还款次数
xydzddata1 = xydzddata[xydzddata.hkmx>0]
hkcsh = xydzddata1['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()




# In[17]:


#每个月还款案件
xydzddata2 = xydzddata[xydzddata.hkmx>0]
xydzddata2 = xydzddata2[['ajbh','kehu','year','month']]
xydzddata2 = xydzddata2.drop_duplicates()
hkaj = xydzddata2['ajbh'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['month']]).count().reset_index()




# In[18]:


#地区图
region = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['province']]).count().reset_index()




# In[19]:


bins = [18,30,40,50,100]




# In[20]:


group_names = ['20-30','30-40','40-50','50以上']




# In[21]:


xydzddata1['age1']=pd.cut(xydzddata1['age'],bins,labels=group_names)




# In[22]:


agedata = xydzddata1['nnn'].groupby([xydzddata1['kehu'], xydzddata1['age1']]).count().reset_index()
agedata = agedata[(agedata.kehu == '兴业') | (agedata.kehu == '广发')]




# In[23]:


mysqlxybase['zjqkje'] = mysqlxybase['zjqkje'].astype(float)
mysqlxybase['zjshje'] = mysqlxybase['zjshje'].astype(float)




# In[24]:


xybase = mysqlxybase
xybasedata = xybase[['ajbh','kehu','shfzh18','zjqkje','zjshje','jdsj']]
xybasedata = xybasedata[xybasedata.zjqkje>0]




# In[25]:


xybasedata['year'] = xybasedata['jdsj'].str.slice(0,4).astype(int)
xybasedata['month'] = xybasedata['jdsj'].str.slice(5,7).astype(int)




# In[26]:


xybasedata = xybasedata[xybasedata.year>2015]




# In[27]:


#xybase去重
xybasedata = xybasedata.drop_duplicates()




# In[28]:


xybasezhanbi = xybasedata[['zjqkje','zjshje']].groupby([xybasedata['kehu'], xybasedata['year'], xybasedata['month']]).sum().reset_index()




# In[29]:


xybasezhanbi['zhanbi'] =round(xybasezhanbi['zjshje']/xybasezhanbi['zjqkje'],5)




# In[30]:


xingyehkmx = xydzdhkmx[(xydzdhkmx.kehu == '兴业')]
guangfahkmx = xydzdhkmx[(xydzdhkmx.kehu == '广发')]




# In[31]:


get_ipython().magic('matplotlib inline')
sns.set_style("whitegrid")
sns.set_context("talk")
mpl.rcParams['font.sans-serif'] = ['Microsoft YaHei'] #指定默认字体  
mpl.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
ax = sns.barplot(x="month", y="hkmx", hue="year", data=xingyehkmx)
ax.set_xlabel('月份',fontsize=15)
ax.set_ylabel('还款总额',fontsize=15)
ax.set_title('兴业客户',fontsize=15)
plt.show()




# In[32]:


ax1 = sns.barplot(x="month", y="hkmx", hue="year", data=guangfahkmx)
ax1.set_xlabel('月份',fontsize=15)
ax1.set_ylabel('还款总额',fontsize=15)
ax1.set_title('广发客户',fontsize=15)
plt.show()




# In[33]:


xingyehkzhb = xybasezhanbi[(xybasezhanbi.kehu == '兴业')]
guangfahkzhb = xybasezhanbi[(xybasezhanbi.kehu == '广发')]
xingyehkzhb['year'] = xingyehkzhb['year'].astype(str)
guangfahkzhb['year'] = guangfahkzhb['year'].astype(str)




# In[34]:


ggplot(aes(x='month', y='zhanbi', colour='year'), data=xingyehkzhb) +   geom_point()+    geom_line()+    xlab('月份')+    ylab('还款占比')+    ggtitle('兴业客户还款占比情况')+    scale_x_continuous(breaks=range(1,13))




# In[35]:


ggplot(aes(x='month', y='zhanbi', colour='year'), data=guangfahkzhb) +   geom_point()+    geom_line()+    xlab('月份')+    ylab('还款占比')+    ggtitle('广发客户还款占比情况')+    scale_x_continuous(breaks=range(1,13))




# In[36]:


plt.figure(1)
plt.figure(2) 
plt1=plt.subplot(221)
plt2=plt.subplot(222)
plt.figure(1) 
ax2 = sns.barplot(x="age1", y="nnn", hue="kehu", data=agedata)
ax2.set_xlabel('年龄段',fontsize=15)
ax2.set_ylabel('数量',fontsize=15)
ax2.set_title('年龄段回款分析',fontsize=15)
plt.sca(plt1)  
explode = [0, 0.1, 0, 0] 
xingyeagedata = agedata[agedata.kehu=='兴业']


plt.pie(x=xingyeagedata['nnn'], labels=xingyeagedata['age1'],  explode=explode, autopct='%3.1f %%',        shadow=True, labeldistance=1.1,   startangle = 90,pctdistance = 0.6)


plt.title('兴业客户年龄段回款情况')


plt.sca(plt2)  
explode = [0, 0.1, 0, 0] 
guangfaagedata = agedata[agedata.kehu=='广发']


plt.title('广发客户年龄段回款情况')
plt.pie(x=guangfaagedata['nnn'], labels=guangfaagedata['age1'], explode=explode,  autopct='%3.1f %%',        shadow=True,  labeldistance=1.1,  startangle = 90,pctdistance = 0.6)


plt.show()




# In[37]:


#地区图
region1 = xydzddata1['hkmx'].groupby([xydzddata1['kehu'], xydzddata1['year'], xydzddata1['province']]).sum().reset_index()




# In[38]:


regiondata = pd.merge(region1,region,how='left',on=['kehu','year','province'])




# In[39]:


regiondata['newprovince'] = '地区'




# In[40]:


regiondata['province'] = regiondata['province'].astype(str)
regiondata['newprovince'] = regiondata['newprovince'].astype(str)




# In[41]:


regiondata['newprovince'][(regiondata.province == '北京') | (regiondata.province == '天津') | (regiondata.province == '河北') | (regiondata.province == '山西')                   | (regiondata.province == '内蒙古' )] = '华北'




# In[42]:


regiondata['newprovince'][(regiondata.province == '辽宁' )|( regiondata.province == '吉林') | (regiondata.province == '黑龙江') ] = '东北'




# In[43]:


regiondata['newprovince'][(regiondata.province == '上海' )| (regiondata.province == '江苏') |( regiondata.province == '浙江') | (regiondata.province == '安徽')                   | (regiondata.province == '福建' )| (regiondata.province == '江西' )| (regiondata.province == '山东' )] = '华东'




# In[44]:


regiondata['newprovince'][(regiondata.province == '河南') |( regiondata.province == '湖北') | (regiondata.province == '湖南') ] = '华中'




# In[45]:


regiondata['newprovince'][(regiondata.province == '重庆') | (regiondata.province == '四川') | (regiondata.province == '贵州' )|( regiondata.province == '云南')                   | (regiondata.province == '西藏') ] = '西南'




# In[46]:


regiondata['newprovince'][(regiondata.province == '陕西') | (regiondata.province == '甘肃') | (regiondata.province == '青海' )|( regiondata.province == '宁夏')                   | (regiondata.province == '新疆') ] = '西北'




# In[47]:


regiondata['newprovince'][(regiondata.province == '广东') | (regiondata.province == '广西' )| (regiondata.province == '海南' )] = '华南'




# In[48]:


regionplot = regiondata['hkmx'].groupby([regiondata['kehu'], regiondata['year'], regiondata['newprovince']]).sum().reset_index()




# In[49]:


regionplot['year'] = regionplot['year'].astype(str)




# In[50]:


plt.figure(1)
plt.figure(2) 
plt.figure(1) 
ax3 = sns.barplot(x="newprovince", y="hkmx", hue="kehu", data=regionplot[regionplot.year=='2016'])
ax3.set_xlabel('地区',fontsize=15)
ax3.set_ylabel('还款金额',fontsize=15)
ax3.set_title('2016地区回款分析',fontsize=15)
plt.figure(2)
ax4 = sns.barplot(x="newprovince", y="hkmx", hue="kehu", data=regionplot[regionplot.year=='2017'])
ax4.set_xlabel('地区',fontsize=15)
ax4.set_ylabel('还款金额',fontsize=15)
ax4.set_title('2017地区回款分析',fontsize=15)
plt.show()




# In[ ]:








# In[133]:


import plotly.plotly as py
import plotly.graph_objs as go


fig = {
  "data": [
    {
      "values": [16, 25, 12, 6],
      "labels": [
        "20-30",
        "30-40",
        "40-50",
        "50以上"
      
      ],
      "domain": {"x": [0, .48]},
      "name": "回款年龄段",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    },     
    {
      "values": [27, 11, 25, 8],
      "labels": [
        "20-30",
        "30-40",
        "40-50",
        "50以上"
      ],
      "text":"委案年龄段",
      "textposition":"inside",
      "domain": {"x": [.52, 1]},
      "name": "委案年龄段",
      "hoverinfo":"label+percent+name",
      "hole": .4,
      "type": "pie"
    }],
  "layout": {
        "title":"兴业客户回款与委案年龄段分析",
        "annotations": [
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "回款年龄段占比",
                "x": 0.20,
                "y": 0.5
            },
            {
                "font": {
                    "size": 20
                },
                "showarrow": False,
                "text": "委案年龄段占比",
                "x": 0.8,
                "y": 0.5
            }
        ]
    }
}


fig['data'][0]['values'] = agedata['nnn'][agedata.kehu=='兴业']






pltoff.plot(fig)




# In[141]:


'''
绘制饼图
'''
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
import plotly.offline as pltoff


dataset = {'labels':['男', '女'],
           'values':[280,  100]} 


sexdata = xydzddata.groupby('sex').count().reset_index()
sexdata = sexdata[['sex','nnn']]


dataset['values'] = sexdata['nnn']


data_g = []
tr_p = go.Pie(
    labels = dataset['labels'],
    values = dataset['values']
)
data_g.append(tr_p)
layout = go.Layout(title="兴业回款案件性别占比")
fig = go.Figure(data=data_g, layout=layout)
pltoff.plot(fig)




# In[125]:


import plotly.plotly as py
import plotly.graph_objs as go


trace0 = go.Bar(
    x=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    y=[20, 14, 25, 16, 18, 22, 19, 15, 12, 16, 14, 17],
    name='兴业',
    marker=dict(
        color='rgb(49,130,189)'
    )
)


trace0['y'] = xingyehkmx['hkmx'][xingyehkmx.year==2016]


trace1 = go.Bar(
    x=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    y=[50, 30,60, 30, 30, 40, 43, 32, 27, 32, 28, 35],
    name='广发',
    marker=dict(
        color='rgb(204,204,204)',
    )
)


trace1['y'] = guangfahkmx['hkmx'][guangfahkmx.year==2016]


data = [trace0, trace1]
layout = go.Layout(
    xaxis=dict(tickangle=-45),
    title='客户回款',
    barmode='group',
)


fig = go.Figure(data=data, layout=layout)
pltoff.plot(fig)




# In[110]:


import plotly.plotly as py
import plotly.graph_objs as go


trace0 = go.Bar(
    x=['东北', '华东','华中', '华北', '华南',  '西北',
       '西南'],
    y=[20, 14, 25, 16, 18, 22, 19],
    name='兴业',
    marker=dict(
        color='rgb(0,0,0)'
    )
)


trace0['y'] = regionplot['hkmx'][(regionplot.year=='2017')&(regionplot.kehu=='兴业')]


trace1 = go.Bar(
    x=['东北', '华东','华中', '华北', '华南',  '西北',
       '西南'],
    y=[50, 30,60, 30, 30, 40, 43],
    name='广发',
    marker=dict(
        color='rgb(255,0,0)',
    )
)


trace1['y'] = regionplot['hkmx'][(regionplot.year=='2017')&(regionplot.kehu=='广发')]


data = [trace0, trace1]
layout = go.Layout(
    xaxis=dict(tickangle=-45),
    title='地区回款分布',
    barmode='group',
)


fig = go.Figure(data=data, layout=layout)
pltoff.plot(fig)




# In[116]:


import plotly.offline as pltoff
'''
绘制普通线图
'''
#数据,x为横坐标,y,z为纵坐标的两项指标,三个array长度相同
dataset = {'x':[1,2,3,4,5,6,7,8,9,10,11,12],
           'y':[0.05,0.04,0.01,0.03,0.011,0.02,0.06,0.07,0.019,0.020,0.025,0.08],
           'z':[0.012,0.09,0.02,0.06,0.03,0.025,0.08,0.017,0.022,0.05,0.06,0.07]}


dataset['y'] = xingyehkzhb['zhanbi'][xingyehkzhb.year=='2016']
dataset['z'] = guangfahkzhb['zhanbi'][guangfahkzhb.year=='2016']


data_g = []
#分别插入 y, z
tr_x = go.Scatter(
    x = dataset['x'],
    y = dataset['y'],
    name = '兴业回收率' 
)
data_g.append(tr_x)
tr_z = go.Scatter(
    x = dataset['x'],
    y = dataset['z'],
    name = '广发回收率' 
)
data_g.append(tr_z)
#设置layout,指定图表title,x轴和y轴名称
layout = go.Layout(title="客户月回收率统计", xaxis={'title':'x'}, yaxis={'title':'value'})
#将layout设置到图表
fig = go.Figure(data=data_g, layout=layout)
#绘图,输出路径为name参数指定
pltoff.plot(fig)