python 爬虫小记之一

来源:互联网 发布:素手浣花 知乎 编辑:程序博客网 时间:2024/05/21 19:34

学习爬虫,先下载美女图片练练手

#coding=utf-8import urllibimport reFORBIDDEN = "403 Forbidden"def getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getImg(html):    reg = r'URL":"(http.+?\.jpg)",'    imgre = re.compile(reg)    imglist = re.findall(imgre,html)    x = 0    for imgurl in imglist:        status = urllib.urlopen(imgurl).code        if status == 200:            urllib.urlretrieve(imgurl,'%s.jpg' % x)            x+=1    return imglisthtml = getHtml("http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1491787331416_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E5%9B%BE%E7%89%87")print getImg(html)
0 0
原创粉丝点击