python实现线程池

来源:互联网 发布:破解同花顺数据 编辑:程序博客网 时间:2024/06/13 23:27

之前利用多线程去检验大量代理ip是否可用,由于程序是IO密集型程序,故用多线程是一个很明智的选择,但是我实现的时候,是一直在创建和销毁线程,这样代价很高,同时对于线程的数量也不能很好的去控制,于是想自己实现一个线程池。

一个典型的线程池,应该包括如下几个部分:
1、线程池管理器(ThreadPool),用于启动、停用,管理线程池
2、工作线程(WorkThread),线程池中的线程
3、请求接口(WorkRequest),创建请求对象,以供工作线程调度任务的执行
4、请求队列(RequestQueue),用于存放和提取请求
5、结果队列(ResultQueue),用于存储请求执行后返回的结果

这里写图片描述

实现代码如下:

class WorkerThread(threading.Thread):    def __init__(self, requestQueue, resultQueue, pollTimeout=5):        threading.Thread.__init__(self)        self.setDaemon(True)        self._requestQueue = requestQueue        self._resultQueue = resultQueue        self._pollTimeout = pollTimeout        #设置一个信号用于线程间通信        self._signal = threading.Event()        self.start()    def run(self):        while True:            if self._signal.is_set():                break            try:                request = self._requestQueue.get(block=True, timeout=self._pollTimeout)            except Queue.Empty:                #这里也可以设置为break,使得阻塞一段时间请求队列还是没数据的话线程直接退出                time.sleep(1)                continue             else:                if self._signal.is_set():                    self._requestQueue.put(request)                    break                try:                    result = request.callfunc(*request.args, **request.kwds)                    self._resultQueue.put((request, result))                    request.callback(request, result)                except Exception, e:                    print 'Exception:', e    def done(self):        self._signal.set()class ThreadPool:    def __init__(self, numWorkers, reqSize=0, resSize=0, pollTimeout=5):        self._requestQueue = Queue.Queue(reqSize)        self._resultQueue = Queue.Queue(resSize)        self.workers = []        self.createWorkers(numWorkers=numWorkers, pollTimeout=pollTimeout)    def createWorkers(self, numWorkers, pollTimeout=5):        for i in range(numWorkers):            self.workers.append(WorkerThread(self._requestQueue, self._resultQueue, pollTimeout=pollTimeout))    def putRequest(self, request, block=True, timeout=None):        assert isinstance(request, WorkRequest)        self._requestQueue.put(request, block=block, timeout=timeout)    def workersize(self):        return len(self.workers)    def poll(self, block=True, timeout=5):        #for ll in self.workers:        #    ll.start()        for ll in self.workers:            ll.join()        while not self._resultQueue.empty():            print self._resultQueue.get()    def addWorker(self, pollTimeout):        self.createWorkers(1, pollTimeout)    def outWorker(self):        if self.workers:            self.workers.pop().done()    def allWorkerDone(self):        for worker in self.workers:            worker.done()            worker.join()        self.workers = 0#callfunc是执行的任务函数,callback是执行完任务的回调函数,exceptionFunc是处理异常的函数class WorkRequest:    def __init__(self, callfunc, args=None, kwds=None, requestId=None, callback=None, exceptionFunc=None):        if requestId == None:            self.requestId = id(self)        else:            if not isinstance(requestId, int):                raise TypeError('requestId must be interger')        self.callfunc = callfunc        self.args = args or []        self.kwds = kwds or {}        self.callback = callback        self.exceptionFunc = exceptionFunc    def __str__(self):        return "WorkRequest id=%d args=%r kwargs=%r" % (self.requestId, self.args, self.kwds)
0 0