How to craw the Info of BiliBIli with python in Parallel

来源:互联网 发布:淮安悠迅网络 编辑:程序博客网 时间:2024/06/05 22:44

Based on the basic code of How to craw the Info of BiliBIli with python,add the parallel:

# -*- coding:utf-8 -*-#craw bilibili info with parallelimport  timeimport requestsimport sysfrom prettytable import PrettyTableimport threadingimport importlib#set the encodingimportlib.reload(sys)lock = threading.Lock()#the logic codedef startCraw(url,beginNum,crawNum):    times = 0    while(times < crawNum):        myRequest = requests.get(url.format(beginNum),headers = {})        if myRequest.status_code == 200:            lock.acquire()            try:                jsDict = myRequest.json()['data']                av_num = 'av' + str(beginNum)                view = str(jsDict['view'])                danmaku = str(jsDict['danmaku'])                reply = str(jsDict['reply'])                favorite = str(jsDict['favorite'])                coin = str(jsDict['coin'])                share = str(jsDict['share'])                tableItem.add_row([av_num,view,danmaku,reply,favorite,coin,share])            except Exception as e:                print('error:%s' %(e))                pass            finally:                lock.release()        else:            print('the status_code is not 200,url:%s,status_code:%d' %(url.format(beginNum),myRequest.status_code))        beginNum += 1        times += 1#main functionif __name__ == '__main__':    url = 'https://api.bilibili.com/x/web-interface/archive/stat?aid={}'    beginNum = int(input('please enter the begin av_number:'))    crawNum = int(input('please enter the number you want to craw:'))    threadNum = int(input('please enter the thread number you want to craw:'))    MyThread = []    global tableItem    tableItem = PrettyTable(['av_num', 'view', 'danmaku', 'replay', 'favorite', 'coin', 'share'])    for i in range(threadNum):        MyThread.append(threading.Thread(target=startCraw,args=(url,beginNum,crawNum,)))        beginNum += crawNum    for i in MyThread:        i.start()    for i in MyThread:        i.join()    print(tableItem)
阅读全文
0 0