基础的python抓取网站图片的例子

来源:互联网 发布:庄子諵哗 知乎 编辑:程序博客网 时间:2024/06/10 22:39
#-*- coding: utf-8 -*-import reimport urllibimport urllib2import osclass Spider:    def downLoadImage(self, url, imageDirName):        html = self.__getHtml(url)        imageList = self.__getImageUrls(html)        self.__saveImage(imageDirName, imageList)    def __mkdir(self, dirName):        if os.path.exists(dirName):            print(dict, "ready exits")        else:            os.makedirs(dirName)    def __getHtml(self, url):        urlCtx = urllib.urlopen(url)        return urlCtx.read()    def __getImageUrls(self, html):        rgx = re.compile(r'src="(.+?\.jpg)" pic_ext')        imageList = rgx.findall(html)        return  imageList    def __saveImage(self, dirName, imageList):        self.__mkdir(dirName)        imageIndex = 0        for s in imageList:            try:                data = urllib2.urlopen(s).read()                fileName = dirName + str("/%d.jpg" % imageIndex)                file = open(fileName, "wb+")                file.write(data)                imageIndex += 1            except urllib2.URLError as e:                print (e.reason)if __name__ == "__main__":    spider = Spider()    spider.downLoadImage('http://tieba.baidu.com/p/2460150866', "picture")

0 0
原创粉丝点击