题库统计的小玩意_python

来源:互联网 发布:怎么截图给淘宝客服 编辑:程序博客网 时间:2024/05/01 00:28

BG


最近搞的一个扒学校题库的评测记录然后扔进数据库的小玩意,打算用matplotlib画个图统计一下玩一玩

Code


半成品

import urllib2 as ubimport pymongoimport redef getPage(url):    headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 5.5; Windows NT)'}    request = ub.Request(url, headers = headers)    html = ub.urlopen(request)    page = html.read()    return pagedef getStatusList(page):    pageReg = re.compile(r'<tr align=center><td>(.+?)</td><td><a href=.+?>(.+?)</a></td><td><a href=.+?>(.+?)</a></td><td><font color=.+?>(.+?)</font></td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td><td>(.+?)</td></tr>')    tmpList = re.findall(pageReg, page)    return tmpListdef getNextUrl(page):    if (re.search(r'Next Page', page) == None):        return None    urlReg = re.compile(r'\[<a href=status\?top=(.+?)><font color=blue>Next Page</font></a>\]')    url = re.findall(urlReg, page)    return 'http://10.156.17.250/JudgeOnline/status?top=' + url[0]def getStatusType(problemId, user, problem, result, memory, time, language, length, time):    tmpStatusType = {'id': problemId, 'user': user, 'problem': problem, 'result': result, 'memory': memory, 'time': time, 'language': language, 'length': length, 'time': time}    return tmpStatusTypedef getRec():    client = pymongo.MongoClient("localhost", 27017)    st = client['statusDb']    rec = st['statusRec']    return recurl = 'http://10.156.17.250/JudgeOnline/status'# file = open('Status (Online).txt', 'w')rec = getRec()while url != None:    page = getPage(url)    nexUrl = getNextUrl(page)    if (nexUrl == url):        break    else:        url = nexUrl    statusList = getStatusList(page)    for item in statusList:        status = getStatusType(item[0], item[1], item[2], item[3], item[4], item[5], item[6], item[7], item[8])        rec.insert(status)        # for i in item:            # file.write(i + '\n')
0 0
原创粉丝点击