Multithread download tool for massive tiny files.
来源:互联网 发布:插画师卤猫 知乎 编辑:程序博客网 时间:2024/05/16 10:10
Http Files Downloader
with Python 2.7 standard library
# -*- coding: utf-8 -*-""" Multithread download tool for massive tiny files."""#-------------------------# Author: Kun Liu # Start date: 2017-03-06 # Latest edit: 2017-03-16 # email = lancelotdev@163.com# python_version = Python 2.7.11#===================================#-----Python 3 Compatiblefrom __future__ import absolute_importfrom __future__ import divisionfrom __future__ import print_functionfrom __future__ import unicode_literals#---------------------------------import reimport osimport sysimport Queueimport threadingimport urllibimport urllib2import jsonimport loggingfrom time import ctime,sleep# 模拟浏览器访问参数user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'headers = { 'Connection': 'Keep-Alive', 'Accept': 'text/html, application/xhtml+xml, */*', 'Accept-Language': 'en-US,en;q=0.8,zh-Hans-CN;q=0.5,zh-Hans;q=0.3', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko'}# log settings 抓取记录日志的设置logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename='pythonDownload.log', filemode='w')def agent_request(url): req_timeout = 40 req = urllib2.Request(url, None, headers) resp = urllib2.urlopen(req, None, req_timeout) html_content = resp.read() return html_content class DownLoad(threading.Thread): def __init__(self, file_que, folder_name = "PyDownload"): # Make directory 'PyDownload' to save files if not os.path.exists('PyDownload') and folder_name == "PyDownload": os.mkdir('PyDownload') self.que = Queue.Queue() if isinstance(file_que, list): for i in file_que: self.que.put(i) else: self.que = file_que self.folder_name = folder_name self.fail_file_list = [] threading.Thread.__init__(self) def run(self): global success_case_num print("%d thread is working!"%threading.active_count()) while True: if not self.que.empty(): file_tuple = self.que.get() file_name = file_tuple[0] # Legalize file name file_name = file_name.translate((None,"|\\?*<\":>+[]/'")) file_url = file_tuple[1] try: if not file_name.endswith(".torrent"): file_name += ".torrent" with open(os.path.join(self.folder_name,file_name), 'wb') as file: file_data = agent_request(file_url) file.write(file_data) if mutex.acquire(1): success_case_num += 1 mutex.release() except Exception as e: self.fail_file_list.append(file_url) logging.warning("DownLoad error:" + str(e) + "Fail file: " + file_url) # print (e, file_url) continue else: returnmutex = threading.Lock()success_case_num = 0class DownLoadDispatcher: def __init__(self, name_url_tuple_list): self.file_list = name_url_tuple_list def start_download(self): try: file_que = Queue.Queue() # que=queue.Queue()#py 3 for f_tuple in self.file_list: file_que.put(f_tuple) # 线程个数 for _ in range(1): d = DownLoad(file_que) d.start() # Download frequency control sleep(1) except Exception as e: print ("pic_downloader exception:" + str(e))
0 0
- Multithread download tool for massive tiny files.
- C for Multithread 笔记
- Tiny Download&&Exec ShellCode
- USB/DVD download tool 错误:we were unable to copy your files 解决办法
- Apktool—A tool for reverse engineering Android apk files
- Download Etl Tool Freeware
- massive
- Webkit Download files
- use python download files
- Hibernate Tool & Files
- LNetAnt(The download tool for all POXSIX platform as linux) project site is onlining!
- Install YouTube-DL – A Command Line Video Download Tool for Linux
- esp8266 download tool的使用
- php error when download files
- Download Files from Web [C#]
- python multi-thread download files
- python multi process download files
- ubuntu use commandline download files
- List常用的方法,以ArrayList来说明
- 大话Linux内核中锁机制之信号量、读写信号量
- eclipse中打点出不来的解决办法
- nyoj 一笔画问题
- SQL内链接,外连接,交叉连接,联合连接区别详解
- Multithread download tool for massive tiny files.
- C#连接sqlserver时如何让Integrated Security=True生效
- 面向对象五大基本原则
- 【图论】最短路径算法大全
- RedHat机器opencv安装
- 实现自动构建编译javaweb项目并发布到N台服务器
- 数字签名和数字证书的学习总结
- Android-25种开源炫酷动画框架
- 用happen-before规则重新审视DCL