python84
来源:互联网 发布:java工作流视频教程 编辑:程序博客网 时间:2024/06/06 15:54
anzhi84.py
#!/usr/env python#-*- coding: utf-8 -*-import requestsimport os,sys import timeimport MySQLdbimport renum=0dataresult=[]def main(): try: conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='addressbookdb',charset="utf8") conn.query("set names utf8") except Exception,e: print e sys.exit() cursor=conn.cursor() for k in range(1,7773): try: url="http://www.anzhi.com/list_1_"+str(k)+"_hot.html" print url html=requests.get(url) result=html.content pattern=re.compile('<span class="app_name"><a href="(.+?)">') daresult=re.findall(pattern,result) global dataresult dataresult+=daresult dataresult=list(set(dataresult)) print len(dataresult) except: time.sleep(30) pass f=file("anzhi.txt","a+") content=str(len(dataresult)) f.write(content) f.close() print len(dataresult) for i in dataresult: print i t='http://www.anzhi.com/'+i try: html=requests.get(t) result=html.content except: time.sleep(30) pass pattern=re.compile('<div class="detail_line">[\s\S]*?<h3>(.+?)</h3>')#名称 data0=re.findall(pattern,result) print data0[0] pattern=re.compile('<span class="app_detail_version">(.+?)</span>')#版本号 data1=re.findall(pattern,result) print data1[0] pattern=re.compile('开发者:(.+?)</span>')#开发者 data2=re.findall(pattern,result) print data2[0] pattern=re.compile('发布时间:(.+?)</li>')#发布时间 data3=re.findall(pattern,result) print data3[0] pattern=re.compile('文件大小:(.+?)</span></li>')#文件大小 data4=re.findall(pattern,result) print data4[0] pattern=re.compile('系统支持:(.+?)</li>')#支持固件 data5=re.findall(pattern,result) print data5[0] pattern=re.compile('所属类别:(.+?)</li>')#类别 data6=re.findall(pattern,result) print data6[0] pattern=re.compile('<div class="app_detail_infor">([\s\S]*?)</div>')#介绍 data7=re.findall(pattern,result) for items in data7: print re.sub('<br />',' ',items) sql="insert into anzhi(name,version,developer,pubtime,filesize,support,classifyintroduction) values(%s,%s,%s,%s,%s,%s,%s,%s)" for items in data7: try: values=(data0[0],data1[0],data2[0],data3[0],data4[0],data5[0],data6[0],re.sub('</p> <br />',' ',items)) except: pass try: cursor.execute(sql,values) conn.commit() except: pass pattern=re.compile('<div class="detail_icon">[\s\S]*?<img src=(.+?)') data=re.findall(pattern,result) for j in data: print j try: temp=requests.get(j[1:-2]) except: time.sleep(30) pass global num f=file("anzhi/"+str(num),"w+") num=num+1 print num f.write(temp.content) cursor.close() conn.close() f.close()if __name__=="__main__": main()
- python84
- [leetcode刷题系列]Gray Code
- 排序算法Java实现
- iOS: UIScrollView的方法touchesShouldBegin:withEvent:inContentView:
- HDU1251 统计难题 解题报告--字典树
- 继承 实现 依赖 关联 聚合 组合之间的关系和联系
- python84
- 查找算法Java实现
- iOS: UIScrollView的方法touchesShouldCancelInContentView:
- LXC 轻量级虚拟化
- 处理异常
- iOS: UIScrollView的方法zoomToRect:animated:
- 通过 WM_CTLCOLOR 设置VC 组合框显示属性(字体、颜色、背景)
- Page Redirect Speed Test
- hdu Questions and answers