为爱而码

来源:互联网 发布:杭州linux运维工资 编辑:程序博客网 时间:2024/04/30 00:02

下载微信文章中图片
downloadIMage.py

#!/usr/bin/python# -*- coding: UTF-8 -*-import re,osimport urllib,urllib2;import timeimport sysreload(sys)sys.setdefaultencoding('utf8')#通过url获取网页def getHtml(url):    # 要设置请求头,让服务器知道不是机器人    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'    headers = {'User-Agent': user_agent}    request=urllib2.Request(url,headers=headers);    page = urllib2.urlopen(request);    html = page.read()    return html#通过正则表达式来获取图片地址,并下载到本地def getImg(html,savePath):    #http://mmbiz.qpic.cn/mmbiz_jpg/wlJkphkR2NMibwTo1cqHwdhLTMYmbV0IOw5vCaJuTsbvTdukCQwUicPClXRibcnY8RCsszAfBYlrJnfz8icUIBWWGw/640?wx_fmt=jpeg    reg = r'data-src="(.*?)"'    imgre = re.compile(reg)    imglist = imgre.findall(html)    x = 0    for imgurl in imglist:        try:            #通过urlretrieve函数把数据下载到本地的D:\\images,所以你需要创建目录            urllib.urlretrieve(imgurl, savePath+'\\%s.jpg' % x)            print "[+] imgurl =%s" % imgurl        except:            print "[-] imgurl =%s"%imgurl        finally:            if imgurl!='':                x = x + 1            time.sleep(1)def bookUrl(html):    reg = r'<a href=\"(.*?)\" target=\"_blank\">(.*?)<\/a>'    imgre = re.compile(reg)    imglist = imgre.findall(html)    for item in imglist[:6]:        url, bookName =item[0],item[1]        savePath =r'C:\Users\pradmin\Desktop\downloadImage\images\\'+bookName.decode("utf-8")        print("[+] url =%s"%url)        html=getHtml(url)        #os.mkdir(savePath)        getImg(html,savePath)originUrl="http://mp.weixin.qq.com/s?__biz=MzA4NjQzNzY4Mw==&mid=2454531002&idx=4&sn=67826657f4486bfa0cb8f195262a86f9&chksm=887131e6bf06b8f09b2ec821f49c71c64536cf585d9f17664709fcfc533d39c976c30da91a8d&mpshare=1&scene=1&srcid=1215qBhkFwNhLrfnZlSMmZSj#rd"html = getHtml(originUrl)bookUrl(html)
原创粉丝点击