python获取图片

来源:互联网 发布:分类信息系统源码 编辑:程序博客网 时间:2024/05/22 03:03
#! /usr/bin/env python#coding:utf-8import urllib.request as requestfrom os import listdirimport refrom functools import wrapsimport timefrom bs4 import BeautifulSoup'''这个类是用来获取网站上的图片根据抓取html中的img标签中的srcurl去下载图片因为目前没法下载PIL所以对于jpg, png之外的图片格式不能做预处理(((((((裁剪大小之类的PS:可以用这个来一键下载萌妹子们的图片啦!!!!一颗赛艇'''def time_caculate(func):    '''    the time_caculate decorator    :param func:    :return:    '''    i = 1    @wraps(func)    def wrapper(*args, **kwargs):        nonlocal i        start_time = time.time()        result = func(*args, **kwargs)        end_time = time.time()        print('the {} th call func expends {} s'.format(i, (end_time-start_time)) )        i += 1        return result    return wrapper@time_caculatedef read_img_from_url(url_):    return request.urlopen(url_).read()def get_avaliabel_index(filepath):    '''    :param filepath: the specific path you want to read    :return: the unused index of the file    '''    file_name_list = {}    file_list = listdir(filepath)    for index, filename in enumerate(file_list, 1):        if re.search('^.*\.jpg$', filename):            file_name_list[index] = filename    index += 1    return indexdef get_html(url_):    '''    :param url_: the url you want to open with    :return: the converted string content    '''    response = request.urlopen(url_)    html = response.read()    data = str(html, encoding='utf-8')    return datadef analyse(data, key, tag):    '''    :param data: the converted string content which is the style of html    :param key:  the key of tag in the content like 'img', 'p'    :param tag:  the tag in the segment of key   like 'src', 'alt'    :return: nothing    '''    soup = BeautifulSoup(data, "lxml")    path = 'f:/i/'    count = get_avaliabel_index(path)    for list_ in soup.find_all(key):        #拆分属性        dict_ = list_.attrs        if tag in dict_:            image = dict_[tag]            img = image[image.rfind('.')::]            filepath = path + str(count)+img            try:                with open(filepath, 'wb') as file:                    str_ = dict_[tag]                    image_data = read_img_from_url(str_)                    file.write(image_data)                count += 1                file.close()            except:                passif __name__ == '__main__':    url = 'http://www.moko.cc/post/1161657.html'    content = get_html(url)    analyse(content, key= 'img', tag= 'src')
1 0
原创粉丝点击