python 爬虫入门1 网页图片保存

来源:互联网 发布:综合办公软件下载 编辑:程序博客网 时间:2024/05/16 15:50

coding=utf-8#coding=utf-8

import urllib
import re

def getHtml(url):
page = urllib.urlopen(url)
html = page.read()
return html

def getImg(html):
reg = r’src=”(.+?.jpg)”’
imgre = re.compile(reg)
imglist = re.findall(imgre,html)
x = 0
for imgurl in imglist:
urllib.urlretrieve(imgurl,’%s.jpg’ % x)
x+=1
return imglist

html = getHtml(“http://www.cocoachina.com/bbs/read.php?tid=182334&page=1“)

print getImg(html)

0 0