Python爬虫获取图片并下载保存至本地

来源:互联网 发布:拳皇2002um出招优化 编辑:程序博客网 时间:2024/04/27 23:04

1、抓取煎蛋网上的图片。

2、代码如下:

import urllib.requestimport os#to open the urldef url_open(url):    req=urllib.request.Request(url)    req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0')    response=urllib.request.urlopen(url)    html=response.read()    return html#to get the num of page like 1,2,3,4...def get_page(url):    html=url_open(url).decode('utf-8')    a=html.find('current-comment-page')+23 #add the 23 offset th arrive at the [2356]    b=html.find(']',a)    #print(html[a:b])    return html[a:b]#find the url of imgs and return the url of arrdef find_imgs(url):    html=url_open(url).decode('utf-8')    img_addrs=[]    a=html.find('img src=')    while a!=-1:        b=html.find('.jpg',a,a+255) # if false : return -1        if b!=-1:            img_addrs.append('http:'+html[a+9:b+4])        else:            b=a+9        a=html.find('img src=',b)    #print(img_addrs)            return img_addrs        #print('http:'+each)                        #save the imgs    def save_imgs(folder,img_addrs):    for each in img_addrs:        filename=each.split('/')[-1] #get the last member of arr,that is the name        with open(filename,'wb') as f:            img = url_open(each)            f.write(img)            def download_mm(folder='mm',pages=10):    os.mkdir(folder)    os.chdir(folder)    url='http://jandan.net/ooxx/'    page_num=int(get_page(url))        for i in range(pages):        page_num -= i        page_url = url + 'page-' + str(page_num) + '#comments'        img_addrs=find_imgs(page_url)        save_imgs(folder,img_addrs)        if __name__ == '__main__':    download_mm()


0 0
原创粉丝点击