【Python】批量下载新浪微博某用户的头像相册

来源:互联网 发布:java高级工程师要求 编辑:程序博客网 时间:2024/05/04 01:22


import ChromeCookiesimport requestsimport re,time,osUSER_NAMBER = '1800591743'      # 微博ID,如“1955032717”targetDir = 'result\\18-WeiboAnalbum.py\\'+USER_NAMBER    #文件保存路径  # 获取保存路径def destFile(path,name=''):    if not os.path.isdir(targetDir):        os.mkdir(targetDir)    pos = path.rindex('/')    if name=='':        t = os.path.join(targetDir, path[pos+1:])    else:        t = os.path.join(targetDir, name)    return t# 保存图片def saveImage(imgUrl,name=''):    response = requests.get(imgUrl, stream=True)    image = response.content    imgPath = destFile(imgUrl,name)    try:        with open(imgPath ,"wb") as jpg:            jpg.write(image)            print('保存图片成功!%s' % imgPath)                 return    except IOError:        print('保存图片成功!%s' % imgUrl)           return    finally:        jpg.close        if __name__=='__main__':    DOMAIN_NAME = '.weibo.com'    cookies = ChromeCookies.get_chrome_cookies(DOMAIN_NAME)    album_url = 'http://photo.weibo.com/photos/get_latest?uid='+USER_NAMBER    response = requests.get(album_url, cookies=cookies)    html_doc = response.text.encode('gbk','ignore').decode('gbk')    imgnum = re.search(r'"total":(.*?),', html_doc).group(1)    print(imgnum)    for n in range(int(imgnum)//20+1):        page = n+1        get_url = album_url + '&page='+str(page)        response = requests.get(get_url, cookies=cookies)        html_doc = response.text.encode('gbk','ignore').decode('gbk')        for match in re.finditer(r'"pic_name":"(.*?)"', html_doc,re.S):            picture = match.group(1)            pictureurl = 'http://ww3.sinaimg.cn/mw690/'+picture            saveImage(pictureurl)


其中,ChromeCookies 类是获取 Chrome 浏览器所保存的 Cookie。

import subprocessimport sqlite3import win32cryptimport re,osimport requestsdef get_chrome_cookies(url):    DIST_COOKIE_FILENAME = '.\python-chrome-cookies'    SOUR_COOKIE_FILENAME = os.path.join(os.environ['LOCALAPPDATA'],r'Google\Chrome\User Data\Default\Cookies')    if not os.path.exists(SOUR_COOKIE_FILENAME):        raise Exception('Cookies 文件不存在...')    subprocess.call(['copy', SOUR_COOKIE_FILENAME, DIST_COOKIE_FILENAME], shell=True)    conn = sqlite3.connect(".\python-chrome-cookies")    ret_dict = {}    for row in conn.execute("SELECT host_key, name, path, value, encrypted_value FROM cookies"):        if row[0] != url:            continue        ret = win32crypt.CryptUnprotectData(row[4], None, None, None, 0)        ret_dict[row[1]] = ret[1].decode()    conn.close()    subprocess.call(['del', '.\python-chrome-cookies'], shell=True)    return ret_dict


效果图:




GitHub地址:https://github.com/Jueee/05-WebCrawlers/blob/master/18-WeiboAnalbum.py

0 0
原创粉丝点击