爬取人人贷

来源:互联网 发布:人事软件下载 编辑:程序博客网 时间:2024/05/05 21:18
import sys reload(sys) sys.setdefaultencoding("utf-8")  import urllib2    import urllib import re import pandas as pdimport numpy as npimport matplotlib.pyplot as plturl='https://www.we.com/loan#page-'title=[]amount=[]interest=[]months=[]for i in range(1,52):    url = url + str(i)    myPage = urllib2.urlopen(url).read()    html=myPage.decode('utf-8')    a=re.findall(r'"title":(.*?),',html) #提取title字段    b=re.findall(r'"amount":(.*?),',html)    c=re.findall(r'"interest":(.*?),',html)    d=re.findall(r'"months":(.*?),',html)    title.extend(a)    amount.extend(b)    interest.extend(c)    months.extend(d)#转换字段类型rrd=pd.DataFrame({'title':title,'amount':amount,'interest':interest,'months':months})rrd[['amount','interest','months']]=rrd[['amount','interest','months']].astype(np.float64)title_count=rrd.groupby('title')['amount'].agg('count')title_sum=rrd.groupby('title')['amount'].agg('sum')#print title_count#print title_sumplt.rc('font', family='STXihei', size=10)a=np.array([1,2,3,4,5])plt.figure()#此处双向条形图无法实现,存疑。是否可以建2张图,然后共用y轴plt.barh(a,title_count, color = 'r',align='center', alpha = .5)plt.barh(a,-title_sum, color = 'b',align='center', alpha = .5)plt.ylabel('贷款用途分类')plt.title('贷款用户金额及笔数')plt.xticks(a,(''))plt.yticks(a,('扩大生产/经营','教育培训','日常生活消费','装修','资金周转'))plt.show()
0 0
原创粉丝点击