python第一天

来源:互联网 发布:广东广电网络收费标准 编辑:程序博客网 时间:2024/06/14 22:46
# coding=utf-8import reimport timeimport urllib2import Queueimport threadingdef get_file(url):    try:        req = urllib2.Request(url)        data = urllib2.urlopen(req).read()        return data    except BaseException, e:        print e        return Nonedef savePic(data):    if data == None:        return    file = open("/Users/jumpbox/Desktop/Pic/" + str(time.time()) + ".png", "wb")    file.write(data)    file.flush()    file.close()##获取详情页里的图片def readySavePic(nextHtmlPic):    print("开始下载图片:" + nextHtmlPic);    content = openUrl(nextHtmlPic);    pattern = re.compile('<img src="(.*?)"', re.S);    items = re.findall(pattern, content);    for item in items:        print(item);        savePic(get_file(item))##获取详情页地址def getNextHtmlPic(page):    content = openUrl("http://www.2cto.com/meinv/sexmv/list_1_" + page + ".html");    pattern = re.compile('<div class="name"><a target="_blank" href="(.*?)"', re.S);    items = re.findall(pattern, content);    threads = [];    queue = Queue.Queue(30);    for item in items:        # print(item);        # 使用多线程执行telnet函数        queue.put(get_file(item));        thread = threading.Thread(target=readySavePic(item));        threads.append(thread);    for t in threads:        t.start();def openUrl(url):    try:        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'};        request = urllib2.Request(url, None, headers);        return urllib2.urlopen(request).read();    except BaseException, e:        print(e);        return;# 1.找到下一级网址# 2.找到图片# 3.下载图片def main():    page = 4;    while (1):        getNextHtmlPic(str(page))        print("**************第" + str(page) + "页下载完毕*****************");        page += 1;main();
0 0
原创粉丝点击