Python爬取贴吧图片

来源:互联网 发布:js提示框美化 编辑:程序博客网 时间:2024/06/07 02:03
#!/usr/bin/env python# -*- coding: utf-8 -*-# @Time    : 2017/7/19 16:21# @Author  : wqj# @Contact : wqjhky@gmail.com# @Site    : # @File    : img.py# @Software: PyCharm Community Editionimport reimport urllibimport sysimport osreload(sys)sys.setdefaultencoding('utf8')def getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getImg(html, x):    reg = r'src="(http://img.*?\.jpg)"'    imgre = re.compile(reg)    imList = re.findall(reg, html)    print(imList)    for i in imList:        print(i)        print x        urllib.urlretrieve(i, '%s.jpg' % x)        x += 1    return xx = 1url = "http://tieba.baidu.com/f?ie=utf-8&kw=%E6%B2%B3%E5%8D%97%E7%A7%91%E6%8A%80%E5%AD%A6%E9%99%A2&fr=search?pn="for k in range(1, 28):    ul = url + str(k)    print ul    html = getHtml(ul)    # print html    x = getImg(html, x)    dirpath = r'D:\img'    filename = os.path.join(dirpath, str(x) + ".jpg")

原创粉丝点击