Python多线程错误

来源:互联网 发布:java 心跳实现 编辑:程序博客网 时间:2024/06/05 16:04

源代码

# coding=utf-8'''Created on 2017年8月16日@author: Lihhz'''from spider.url_manager import UrlManagerfrom spider.html_downloader import HtmlDownloaderfrom spider.html_parser import HtmlParserimport loggingimport threadlogging.basicConfig(level=logging.DEBUG,                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',                datefmt='%a, %d %b %Y %H:%M:%S',                filename='myapp.log',                filemode='w')urlManager = UrlManager();htmlDownloader = HtmlDownloader('d:/test1')htmlParser = HtmlParser()url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=%s&gsm=&ct=&ic=0&lm=-1&width=0&height=0'class Main(object):    def __init__(self,rootUrl):        self.rootUrl = rootUrl    def download(self,u):        htmlContent = htmlDownloader.downloadHtml(u)        imageUrls = htmlParser.parseHtml('',htmlContent)        htmlDownloader.downloadImage(imageUrls)    def craw(self):        for i in range(0,100):#[::-1]:            u = url % (i*20)            logging.info('======%s' % (u))            thread.start_new_thread(self.download, (u,))if __name__ == '__main__':    main = Main('http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=0&gsm=50&ct=&ic=0&lm=-1&width=0&height=0')    main.craw()

错误信息

pydev debugger: starting (pid: 10112)Traceback (most recent call last):Traceback (most recent call last):Traceback (most recent call last):  File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)Traceback (most recent call last):  File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)  File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)  File "_pydevd_bundle\pydevd_cython_win32_27_32.pyx", line 918, in _pydevd_bundle.pydevd_cython_win32_27_32.ThreadTracer.__call__ (_pydevd_bundle/pydevd_cython_win32_27_32.c:15143)Unhandled exception in thread started by sys.excepthook is missinglost sys.stderrUnhandled exception in thread started by sys.excepthook is missinglost sys.stderrUnhandled exception in thread started by sys.excepthook is missinglost sys.stderr

原因分析

  • 在craw中,thread.start_new_thread开启了若干个子线程,然而子线程尚未结束,main线程就结束了.导致main线程提前退出

解决办法

  • 为每个子线程加锁.在main线程中判断每一个子线程是否结束,若全部结束则退出,否则不退出
# coding=utf-8'''Created on 2017年8月16日@author: Lihhz'''from spider.url_manager import UrlManagerfrom spider.html_downloader import HtmlDownloaderfrom spider.html_parser import HtmlParserimport loggingimport threadlogging.basicConfig(level=logging.DEBUG,                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',                datefmt='%a, %d %b %Y %H:%M:%S',                filename='myapp.log',                filemode='w')urlManager = UrlManager();htmlDownloader = HtmlDownloader('d:/test1')htmlParser = HtmlParser()url = 'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=%s&gsm=&ct=&ic=0&lm=-1&width=0&height=0'class Main(object):    def __init__(self,rootUrl):        self.rootUrl = rootUrl    def download(self,u,lock):        htmlContent = htmlDownloader.downloadHtml(u)        imageUrls = htmlParser.parseHtml('',htmlContent)#                     urlManager.add_new_urls(notImageUrls)        htmlDownloader.downloadImage(imageUrls)        lock.release()#在子线程中释放锁    def craw(self):        locks = [];        for i in range(0,100):#[::-1]:            u = url % (i*20)            logging.info('======%s' % (u))            # 设计子线程的锁            lock = thread.allocate_lock() # 分配锁            lock.acquire()#获取锁            locks.append(lock)            thread.start_new_thread(self.download, (u,lock,))        #在主线程中判断是否所有的锁都已释放        for lock in locks:            while lock.locked():                passif __name__ == '__main__':    main = Main('http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word=周杰伦&pn=0&gsm=50&ct=&ic=0&lm=-1&width=0&height=0')    main.craw()
原创粉丝点击