斗图图片爬取
来源:互联网 发布:剑灵捏脸数据如何保存 编辑:程序博客网 时间:2024/05/17 06:48
# -*- coding:utf8 -*-import requestsimport timefrom lxml import etreeclass DouTu: def __init__(self): self.header = { "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0" } #设置header头 def get_url(self,page): url = "https://www.doutula.com/photo/list/"+page #设置页面链接 response = requests.get(url=url,headers=self.header).text #获取页面信息 self.parse_url(response) def parse_url(self,response): html = etree.HTML(response) link = html.xpath('//div[@class="page-content text-center"]/div/a/@href') #获取详情页面的链接 # print(link) for i in link: # print(i) self.parse(i) #将链接进行循环 time.sleep(3) #设置延迟时间 def parse(self,i): response = requests.get(url=i,headers=self.header).text #详情页面的纤细 html = etree.HTML(response) name = html.xpath('//div[@class="pic-title"]/h1/a/text()')[0] #图片的名字 img = html.xpath(".//*[@id='detail']/div/div[1]/li/div[3]/div/div/div/div[1]/table/tbody/tr[1]/td/img/@src")[0] #图片的链接 # print(img) self.write(name,img) def write(self,name,img): #将推按写入文件 try:#进行异常处理 response = requests.get(url=img,headers=self.header).content #过去图片的链接信息 with open('图片/%s' % name +'.jpg', 'wb') as f: #with open 打开一个文件夹加上图片名字进行存储 f.write(response) print('成功') except: print('图片有问题,跳过!')if __name__ == '__main__': dt = DouTu() for i in range(1,51):#设置分页爬取 print("第%s页开始"%i) page = '?page=%s'%i dt.get_url(page) print("第%s页结束"%i)
阅读全文