多线程实际应用
来源:互联网 发布:access数据库下载 绿色 编辑:程序博客网 时间:2024/06/14 19:25
# -*- coding:utf-8 -*-import sysreload(sys)sys.setdefaultencoding("utf-8")import requestsfrom Queue import Queuefrom lxml import etreeimport threadingfrom time import sleepimport codecsfile_handle = codecs.open("result.txt", "w", encoding="utf-8")class DownloadThread(threading.Thread): def __init__(self, thread_name, queue): super(DownloadThread, self).__init__() self.thread_name = thread_name self.queue = queue def run(self): while True: if self.queue.empty(): file_handle.write(self.thread_name + "已经下班" + "\n") break page = self.queue.get() file_handle.write(self.thread_name + "准备下载" + str(page)+"\n") url = "http://blog.jobbole.com/all-posts/page/ " + str(page) headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0", } response = requests.get(url=url, headers=headers) if response and response.status_code == 200: content = response.content parse_content_queue.put(content) else: file_handle.write(self.thread_name + "网页下载失败" + url + "\n") file_handle.write(self.thread_name + "下载完成" + str(page)+"\n")class ParseThread(threading.Thread): def __init__(self, thread_name): super(ParseThread, self).__init__() self.thread_name = thread_name def run(self): global parse_switch while parse_switch: file_handle.write("--------------------------" + "\n") content = parse_content_queue.get() doc = etree.HTML(content) file_handle.write(self.thread_name + "开始解析" + "\n") img_list = doc.xpath("//img/@src") for img in img_list: file_handle.write(img + "\n") file_handle.write(self.thread_name + "结束解析" + "\n")if __name__ == '__main__': download_queue = Queue(maxsize=30) parse_content_queue = Queue() parse_switch = True for page in range(1, download_queue.maxsize + 1): download_queue.put(page) download_thread_names = [ "下载器1", "下载器2", "下载器3", "下载器4", ] download_thread_list = [] for thread_name in download_thread_names: d = DownloadThread(thread_name, download_queue) d.start() download_thread_list.append(d) while not download_queue.empty(): pass for thread in download_thread_list: thread.join() parse_thread_names = [ "解析器1", "解析器2", "解析器3", ] parse_thread_list = [] for thread_name in parse_thread_names: p = ParseThread(thread_name) p.start() parse_thread_list.append(p) while not parse_content_queue.empty(): pass parse_switch = False for thread in parse_thread_list: thread.join()
阅读全文
0 0
- 多线程实际应用
- 多线程的实际应用
- vector在实际多线程开发中的应用
- Java多线程的两种实际应用
- java多线程有哪些实际的应用场景?
- C++多线程——三种线程实现方式的区别与实际应用建议
- html实际应用
- JavaScript实际应用:innerHTMl
- aop实际应用
- SOA 的实际应用
- SVN实际应用篇
- Filter的实际应用
- 反射的实际应用
- 考试--学习--实际应用
- 门禁的实际应用
- awk sed 实际应用
- PowerDesigner 实际当中应用
- highcharts 实际开发应用
- python学习六(类、不定长参)
- python 之 pandas 详解
- GitLab和SourceTree的使用参考网址
- systemctl命令
- Max Sum Plus Plus HDU
- 多线程实际应用
- 字符串字符数统计
- 数据结构与算法之动态规划和贪心
- 计算机科学基础知识(一):The Memory Hierarchy
- Bean property 'xxx' is not writable or has an invalid setter method
- uva12105 【DP】
- js的正则表达式
- Python使用MySQL数据库的方法以及一个实例
- Caffe学习:使用pycaffe绘制网络结构