处理银行的数据保留的程序

来源:互联网 发布:unity3d导入3dmax模型 编辑:程序博客网 时间:2024/06/08 17:35

处理银行的数据保留的程序

# -*- coding: utf-8 -*-"""Created on Mon Jul 03 22:21:37 2017@author: Administrator"""import pandas as pdimport numpy as npfrom numpy import *import datetimeimport matplotlib.pyplot as pltdata=pd.read_csv('loan.csv', header = 0)#sampledata=data.loc[0:700000]sampledata=data# 仿真的一些初始化参数interest_rate=sampledata['int_rate']debt_ability=sampledata['dti']user_grade=sampledata['grade']user_subgrade=sampledata['sub_grade']loan_term=sampledata['term']funded_amount=sampledata['funded_amnt']'''## 看看用户对自报月收入的评估user_grade_annual_inc=sampledata[['sub_grade','annual_inc']]##对自报月收入的异常数据的处理str_annual_true=[]k=0for i in range(user_grade_annual_inc.shape[0]):    print i    str_annual=user_grade_annual_inc.iloc[i,1]    if isinstance(str_annual, str)==1:        str_annual_true.append(i)#删除掉那些异样的数据 user_grade_annual_inc=user_grade_annual_inc.drop(str_annual_true)alphabet=['A','B','C','D','E','F','G']user_grade_average_annual_inc=zeros([len(alphabet)*5,1])## 循环grade级别的用户z=0for i in range(len(alphabet)):    print i#i=1# 循环subgrade级别的用户    for j in range(5):        #j=1        alphabet_grade_subgrade=alphabet[i]+str(j+1)        type_index=user_grade_annual_inc['sub_grade'] == alphabet_grade_subgrade        user_grade_annual_inc_type=user_grade_annual_inc[type_index]        user_grade_annual_inc_type=user_grade_annual_inc_type.drop('sub_grade',1)        #用矩阵的形式展现        user_grade_average_annual_inc[z,0]=user_grade_annual_inc_type['annual_inc'].mean()                z=z+1# 用户等级跟它自己的评估能力关系 plt.xlabel('subgrade')plt.ylabel('annualinc')        plt.plot(user_grade_average_annual_inc,color="blue", linewidth=2.5, linestyle="-", label="ability")''''''## 看看用户对于自己的借款能力的评估(只针对A,B,C....等级)user_grade_ability=sampledata[['grade','sub_grade','dti']]user_delay_payment=sampledata[['sub_grade','delinq_2yrs']]##对延迟还款次数的异常数据的处理str_delay_true=[]str_delay_index=zeros([user_delay_payment.shape[0],1])k=0for i in range(user_delay_payment.shape[0]):    str_delay=user_delay_payment.iloc[i,1]    if isinstance(str_delay, str)==1:        str_delay_true.append(i)    str_delay_index[i,0]=isinstance(str_delay, str)#删除掉那些异样的数据    user_delay_payment=user_delay_payment.drop(str_delay_true)alphabet=['A','B','C','D','E','F','G']user_grade_average_value={}user_grade_average_value1=zeros([len(alphabet)*5,1])user_delay_payment_average_value=zeros([len(alphabet)*5,1])#user_grade_average_value2=[len(alphabet)*4]## 循环grade级别的用户z=0for i in range(len(alphabet)):    print i#i=1# 循环subgrade级别的用户    for j in range(5):        #j=1        alphabet_grade_subgrade=alphabet[i]+str(j+1)        type_index= user_grade_ability['sub_grade'] == alphabet_grade_subgrade        user_grade_ability_type=user_grade_ability[type_index]        user_delay_payment_type=user_delay_payment[type_index]        user_delay_payment_type=user_delay_payment_type.drop('sub_grade',1)        user_grade_ability_type=user_grade_ability_type.drop('grade',1)        user_grade_ability_type=user_grade_ability_type.drop('sub_grade',1)        #这是用词典的形式展示的        user_grade_average_value[alphabet_grade_subgrade]=user_grade_ability_type['dti'].mean() #均值        #用矩阵的形式展现        user_grade_average_value1[z,0]=user_grade_ability_type['dti'].mean()        #对于用户的延迟还款的次数        user_delay_payment_average_value[z,0]=user_delay_payment_type['delinq_2yrs'].mean()        z=z+1# 用户等级跟它自己的评估能力关系 plt.xlabel('subgrade')plt.ylabel('ability')        plt.plot(1/user_grade_average_value1,color="blue", linewidth=2.5, linestyle="-", label="ability")#不良记录plt.xlabel('subgrade')plt.ylabel('delay payment')        plt.plot(user_delay_payment_average_value,color="red",  linewidth=2.5, linestyle="-")  ''''''## 看看用户对于自己的借款能力的评估user_subgrade_ability=sampledata[['sub_grade','dti']]user_subgrade_ability=user_subgrade_ability.sort_values(by=['sub_grade'])user_subgrade_ability=user_subgrade_ability.drop_duplicates(['sub_grade'])user_subgrade_ability=user_subgrade_ability.reset_index()user_subgrade_ability=user_subgrade_ability.drop('index',1)# 重新构建indexuser_subgrade_ability=user_subgrade_ability.set_index('sub_grade')user_subgrade_ability.plot(kind='bar')'''## 看看一个用户的等级会不会变#one_user_data=sampledata[sampledata.id==1060578]'''user_number=sampledata['member_id'].value_counts()#统计每个等级的个数user_subgrade_amount_term_rate=sampledata[['sub_grade','loan_amnt','term','int_rate']]'''## 看看利息的高低与金钱的数量的关系'''## 等级越来越低的时候,借贷的利率如何变化##结论就是等级越低利率就越高user_subgrade_rate=sampledata[['sub_grade','int_rate']]user_subgrade_rate=user_subgrade_rate.drop_duplicates(['sub_grade'])user_subgrade_rate=user_subgrade_rate.sort_values(by=['sub_grade'])user_subgrade_rate=user_subgrade_rate.reset_index()user_subgrade_rate=user_subgrade_rate.drop('index',1)#user_subgrade_rate=user_subgrade_rate.drop('sub_grade',1)user_subgrade_rate=user_subgrade_rate.set_index('sub_grade')  user_subgrade_rate.plot()''''''#user_subgrade_amount_term_rate=user_subgrade_amount_term_rate.sort_values(by=['sub_grade'])user_subgrade_number=user_subgrade_amount_term_rate['sub_grade'].value_counts()#统计每个子等级的个数user_subgrade_number=user_subgrade_number.reset_index()user_subgrade_number.rename(columns={'index':'subgrade'}, inplace = True)user_subgrade_number.rename(columns={'sub_grade':'subgrade_number'}, inplace = True)user_rate_number=user_subgrade_amount_term_rate['int_rate'].value_counts()#统计每个利率的个数user_rate_number=user_rate_number.reset_index()user_rate_number.rename(columns={'index':'rate'}, inplace = True)user_rate_number.rename(columns={'int_rate':'int_rate_number'}, inplace = True)user_subgrade_rate = pd.concat([user_subgrade_number,user_rate_number], axis=1)user_subgrade_rate=user_subgrade_rate.sort_values(by=['subgrade'])''''''#统计下子等级的个数## 得到的结论就是服从某个分布subgrade_number=user_subgrade.value_counts()#统计每个等级的个数sbugrade_number=subgrade_number.reset_index()# 修改列的名字sbugrade_number.rename(columns={'sub_grade':'number'}, inplace = True)sbugrade_number.rename(columns={'index':'subgrade'}, inplace = True)#把等级字母按顺序排序sbugrade_number=sbugrade_number.sort_values(by=['subgrade'])sbugrade_number=sbugrade_number.reset_index()sbugrade_number=sbugrade_number.drop('index',1)sbugrade_number=sbugrade_number.set_index('subgrade')#sbugrade_number2=sbugrade_number1[['subgrade','number']]sbugrade_number.plot(kind='bar')'''#统计下grade的个数是多少grade_number=user_grade.value_counts()#统计每个等级的个数grade_number=grade_number.reset_index()# 修改列的名字grade_number.rename(columns={'grade':'number'}, inplace = True)grade_number.rename(columns={'index':'grade'}, inplace = True)#把等级字母按顺序排序grade_number=grade_number.sort_values(by=['grade'])#grade_number=grade_number.reset_index()grade_number=grade_number.set_index('grade')  #grade_number= grade_number.drop('index', 1)grade_number.plot(kind='bar')
原创粉丝点击