多线程下载sis001的网友自拍贴图版面的图片
来源:互联网 发布:苹果游戏辅助软件 编辑:程序博客网 时间:2024/04/27 23:44
多线程下载sis001的网友自拍贴图版面的图片,这个程序并不能下载全部版面而是只下载第二页的内容。因为第二页是最新的内容哦。你需要一个sis001,5级以上的用户名和密码否则无法访问此版面。
#!/usr/bin/python# -*- coding: cp936 -*-#coding utf-8import urllibimport urllib2import reimport cookielibimport Queueimport threadingdef downPic(tiezi_url): req = urllib2.Request(q.get(),None,headers) tiezi_html = opener.open(req).read() #print tiezi_html re_img = re.compile(r'\<img src\=\"(http\:\/\/.*?\.jpg|attachments\/.*?.jpg)\"') img_list = re_img.findall(tiezi_html) #print img_list for i in img_list: if re.match("http",i): print "%s downloading..."%i filename = re.split(r'/',i) try: req = urllib2.Request(i,None,headers) res = opener.open(req).read() open(filename[-1],'wb').write(res) except: pass else: img_url = "http://38.103.161.185/forum/%s"%i filename = re.split(r'/',img_url) print "inner link %s"%img_url try: req = urllib2.Request(img_url,None,headers) res = opener.open(req).read() open(filename[-1],'wb').write(res) except: passcj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))#urllib2.install_opener(opener)headers ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"}data = {"formhash":"3fec4925","referer":"index.php","loginfield":"username","240aa46b3893fb57c436c0a3785b61e7":"xxxx","ea32b1cadbde4b66ca614e0bb593d1c9":"xxxx","questionid":"0","answer":"","cookietime":"2592000","loginmode":"","styleid":"","loginsubmit":"true"}post_data = urllib.urlencode(data)req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers)content=opener.open(req)#print content.read()req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-2.html",None,headers)board_html = opener.open(req2).read()#print board_htmlre_link = re.compile(r'\<a href\=\"(thread-\d{7}-1-2.html)')title_list = re_link.findall(board_html)#http://38.103.161.185/forum/thread-(4917300-1-1.html)#http://38.103.161.185/forum/forum-62-(2).html#[\u4e00-\u9fa5]#print title_listq = Queue.Queue(100)for i in title_list: tiezi_url = "http://38.103.161.185/forum/%s"%i #print tiezi_url q.put(tiezi_url)print "total title:%s"%q.qsize()while True: if q.qsize()>0: th = threading.Thread(target=downPic,args=(tiezi_url,) ) th.start() else: break
=====
高效
#!/usr/bin/python# -*- coding: cp936 -*-#coding utf-8import urllibimport urllib2import reimport cookielibimport Queueimport threadingimport socketimport timeimport sysimport randomdef log(message): log = open("log.txt","a") log.write(time.ctime()+" "+message+"\n") log.close()def getPic():i = q.get()if re.match("http",i): #print "%s downloading..."%ifilename = re.split(r'/',i)try:req = urllib2.Request(i,None,headers)res = opener.open(req).read()savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'open(savefile,'wb').write(res)except:etype, value, tb = sys.exc_info()errormsg = i + "||"+str(etype) +"||"+ str(value)log(errormsg)passelse:img_url = "http://38.103.161.185/forum/%s"%ifilename = re.split(r'/',img_url)#print "%s"%img_urltry:req = urllib2.Request(img_url,None,headers)res = opener.open(req).read()savefile = '.\\img\\'+filename[-1]+ str(int(random.random()*100000000))+'.jpg'open(savefile,'wb').write(res)except:etype, value, tb = sys.exc_info()errormsg = i + "||"+str(etype) +"||"+ str(value)log(errormsg)passdef downPic(tiezi_url,q): req = urllib2.Request(tiezi_url,None,headers) tiezi_html = opener.open(req).read() #print tiezi_html re_img = re.compile(r'\<img src\=\"(http\:\/\/.*?\.jpg|attachments\/.*?.jpg)\"') img_list = re_img.findall(tiezi_html) img_list = list(set(img_list)) #print img_list for i in img_list: q.put(i) while True: if q.qsize()>0: th = threading.Thread(target=getPic) th.start() #print "Queue %s"%q.qsize() else: breakheaders ={"User-agent":"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1"}socket.setdefaulttimeout(30)cj = cookielib.CookieJar()#proxy = urllib2.ProxyHandler({'http': '127.0.0.1:8087'})opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))#urllib2.install_opener(opener)data = { "formhash":"3fec4925", "referer":"index.php", "loginfield":"username", "240aa46b3893fb57c436c0a3785b61e7":"xxx", "ea32b1cadbde4b66ca614e0bb593d1c9":"xxx", "questionid":"0", "answer":"", "cookietime":"2592000", "loginmode":"", "styleid":"", "loginsubmit":"true"}post_data = urllib.urlencode(data)req = urllib2.Request("http://38.103.161.185/forum/logging.php?action=login&",post_data,headers)content=opener.open(req)#print content.read()req2 = urllib2.Request("http://38.103.161.185/forum/forum-62-1.html",None,headers)board_html = opener.open(req2).read()#print board_htmlre_link = re.compile(r'\<a href\=\"(thread-\d{7}-1-\d{1}.html)')title_list = re_link.findall(board_html)title_list = list(set(title_list)) #去除list中的重复项#http://38.103.161.185/forum/thread-(4917300)-1-(1).html#http://38.103.161.185/forum/forum-62-(2).html#[\u4e00-\u9fa5]#print title_listfor i in title_list: tiezi_url = "http://38.103.161.185/forum/%s"%i print tiezi_url q = Queue.Queue(0) downPic(tiezi_url,q)print 'All threads terminate!'
- 多线程下载sis001的网友自拍贴图版面的图片
- 我的自拍123
- 网页版面的布局
- Swing的版面管理器
- Blogger的版面配置
- 简单的版面分析
- 版面的附加元素
- swing的版面结构
- 多线程下载图片的简单例子
- 修改网友写的下载软件
- 收藏网友的 源程序下载网
- 刚刚看到网友的签名 飞秋官网下载
- 影响版面大小的因素
- java swing的版面结构
- 版面中“线”的意义
- Android自拍相机应用——图片的镜像翻转
- PHP多线程批量采集下载美女图片的实现代码
- iOS-UIImageView加载网络下载的图片(异步+多线程)
- Linux中断处理体系结构分析(二)
- Red5的丢包处理
- oracle 时间处理
- 详解Linux中断处理中的hardirq与softirq机制
- 一个erlang的blog
- 多线程下载sis001的网友自拍贴图版面的图片
- jsp链接数据库代码大全
- [每日一题] 11gOCP 1z0-052 :2013-09-28 ORA-01555: snapshot too old......................C52
- serialVersionUID
- Win7(64位)下不必装虚拟机使用masm和debug方法(图文教程)
- 用gRaphaël画饼图和柱状图,修正官方柱状图label方法的bug
- transient
- UVA 12299 RMQ with Shifts
- 关于最大流的的各种算法