Python 爬虫

来源:互联网 发布:java项目开发实例 编辑:程序博客网 时间:2024/06/04 17:41

图片获取

获取百度图片,例子:

"""获取网页图片"""class Demo:#获取网页信息    def getHtml(self,url):        page = urllib.urlopen(url)        html = page.read()        return html#匹配网页中的图片    def getImg(self,html):        #reg = r'src="(.*?\.jpg)" alt'        reg = r'"thumbURL":"(.*?\.jpg)"'        imgre = re.compile(reg)        imglist = re.findall(imgre,html)        x = 0        for imgurl in imglist:            urllib.urlretrieve(imgurl,'%s.jpg' % x)#保存到本地            x += 1spider = Demo()html = spider.getHtml("https://image.baidu.com/search/index?ct=201326592&z=&tn=baiduimage&ipn=r&word=%E5%A3%81%E7%BA%B8%20%E4%B8%8D%E5%90%8C%E9%A3%8E%E6%A0%BC%20%E7%BE%8E%E5%A5%B3&pn=0&istype=2&ie=utf-8&oe=utf-8&cl=2&lm=-1&st=-1&fr=&fmq=&ic=0&se=&sme=&width=&height=&face=0")print htmlspider.getImg(html)
原创粉丝点击