Python3 抓取网页中的图片

来源:互联网 发布:淘宝宝贝主图素材 编辑:程序博客网 时间:2024/04/30 20:37
import urllib.requestimport socketimport reimport sysimport ostargetDir = r"C:\Users\elqstux\Desktop\pic"def destFile(path):    if not os.path.isdir(targetDir):        os.mkdir(targetDir)    pos = path.rindex('/')    t = os.path.join(targetDir, path[pos+1:])    return tif __name__ == "__main__":    hostname = "http://www.douban.com"    req = urllib.request.Request(hostname)    webpage = urllib.request.urlopen(req)    contentBytes = webpage.read()    for link, t in set(re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes))):        print(link)        urllib.request.urlretrieve(link, destFile(link))      

import urllib.requestimport socketimport reimport sysimport ostargetDir = r"H:\pic"def destFile(path):    if not os.path.isdir(targetDir):        os.mkdir(targetDir)    pos = path.rindex('/')    t = os.path.join(targetDir, path[pos+1:])  #会以/作为分隔    return tif __name__ == "__main__":    hostname = "http://www.douban.com/"    req = urllib.request.Request(hostname)    webpage = urllib.request.urlopen(req)    contentBytes = webpage.read()    match = re.findall(r'(http:[^\s]*?(jpg|png|gif))', str(contentBytes) )#r'(http:[^\s]*?(jpg|png|gif))'中包含两层圆括号,故有两个分组,                                                          #上面会返回列表,括号中匹配的内容才会出现在列表中    for picname, picType in match:        print(picname)        print(picType)      '''输出:http://img3.douban.com/pics/blank.gifgifhttp://img3.douban.com/icon/g111328-1.jpgjpghttp://img3.douban.com/pics/blank.gifgifhttp://img3.douban.com/icon/g197523-19.jpgjpghttp://img3.douban.com/pics/blank.gifgif...'''

转载来源:http://blog.csdn.net/wangyangkobe/article/details/8712121
0 0