青岛大学教务爬虫

来源：互联网发布：淘宝条码扫描编辑：程序博客网时间：2024/04/29 18:50
# -*- coding: utf-8 -*-from PIL import Imageimport ioimport reimport getpassimport requestsimport pytesseractfrom lxml import etreefrom prettytable import PrettyTableclass qdujw:    def __init__(self):        self.userid = 0        self.s = requests.Session()        self.headers = {            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.109 Safari/537.36'        }    # 教务登录    def login(self):        global sid, passwd, jw        loginurl = 'http://jw.qdu.edu.cn/academic/j_acegi_security_check'        codeurl = 'http://jw.qdu.edu.cn/academic/getCaptcha.do'        userurl = 'http://jw.qdu.edu.cn/academic/showPersonalInfo.do'        # 验证码        code = self.s.get(codeurl, headers=self.headers, stream=True)        img = Image.open(io.BytesIO(code.content))        # 降噪        threshold = 140        table = []        for i in range(256):            if i < threshold:                table.append(0)            else:                table.append(1)        # 将彩色图像转换为灰度图像        imgry = img.convert('L')        # 讲图像中噪声去除        out = imgry.point(table, '1')        codetext = pytesseract.image_to_string(out, config='digits')        # 去除空格，将.替换为0        codetext = codetext.replace(' ', '')        codetext = codetext.replace('.', '0')        # 登录        postdata = {            'j_username': sid,            'j_password': passwd,            'j_captcha': codetext        }        r = self.s.post(loginurl, postdata)        # 密码错误        if re.search(u'\u5bc6\u7801\u4e0d\u5339\u914d', r.text):            print sid + ' 密码不匹配!'            print            choice = raw_input('请选择(1.重新输入 2.退出)：')            if choice == '1':                sid = raw_input('学号：')                passwd = getpass.getpass('密码：')            else:                exit()            jw.login()        # 用户名不存在        elif re.search(u'\u4e0d\u5b58\u5728', r.text):            print sid + ' 用户名不存在!'            print            choice = raw_input('请选择(1.重新输入 2.退出)：')            if choice == '1':                sid = raw_input('学号：')                passwd = getpass.getpass('密码：')            else:                exit()            jw.login()        # 验证码错误        elif re.search(u'\u9a8c\u8bc1\u7801\u4e0d\u6b63\u786e', r.text):            print '正在登录，请耐心等待......'            jw.login()        # 成功        else:            userpage = self.s.get(userurl).content            name = etree.HTML(userpage.decode(                'utf-8', 'ignore')).xpath('/html/body/center/table[1]/tr[1]/td[2]/text()')            for n in name:                name = n.encode('utf-8')            userid = re.findall(b'.*?userid=(.*?)"', userpage, re.S)            for i in userid:                self.userid = self.userid * 10 + int(i)            print name + '登录成功！'    # 查询成绩    def scores(self):        print '======= 成绩查询 ========'        print        year = raw_input('请输入查询学年: ')        term = raw_input('春季学期输入[1],秋季学期输入[2]: ')        scoresurl = 'http://jw.qdu.edu.cn/academic/manager/score/studentOwnScore.do'        # 字符串计算  eval()        postdata = {            'year': eval(year + '-' + '1980'),            'term': term,            'para': '0'        }        scorespage = self.s.post(scoresurl, postdata).content        aa = '<td>' + year + '[\s\S]*?<td>.*?</td>[\s\S]*?<td>.*?</td>[\s\S]*?<td>([\s\S]*?)</td>[\s\S]*?<td>.*?</td>[\s\S]*?<td>([\s\S]*?)</td>'        # str转types  .encode(encoding="utf-8")        scores = re.findall(aa.encode(encoding="utf-8"), scorespage, re.S)        print        print '======= 考试成绩 ========'        for s in scores:            print            print s[0].strip()+' : '+s[1].strip()    # 查询课表    def kebiao(self):        print '======= 课表查询 ========'        print        year = raw_input('请输入查询学年: ')        term = raw_input('春季学期输入[1],秋季学期输入[2]: ')        print        # 使用 PrettyTable 打印表格        x = PrettyTable(["周一", "周二", "周三", "周四", "周五", "周六", "周日"])        x.padding_width = 1        get = {            'id': self.userid,            'yearid': eval(year + '-' + '1980'),            'termid': term,            'timetableType': 'STUDENT',            'sectionType': 'COMBINE'        }        kb = self.s.get(            'http://jw.qdu.edu.cn/academic/manager/coursearrange/showTimetable.do', params=get)        html = etree.HTML(kb.content)        kb = html.xpath('//*[@id="timetable"]//*[@class="center"]')        i = 0        list = []        list1 = ['', '', '', '', '', '', '']        for k in kb:            # print k.xpath('string()').encode('utf-8')            # 通过 list 将课程信息添加到 PrettyTable 中            if i < 7:                list.append(                    k.text.split(';')[0].replace('<<', '').replace('>>', ''))                i += 1                if i == 7:                    # 上下显示一行空白，方便阅读                    x.add_row(list1)                    x.add_row(list)                    x.add_row(list1)                    list = []                    i = 0        print x    # 教务通知    def news(self):        newsurl = requests.get(            'http://jw.qdu.edu.cn/homepage/infoArticleList.do;jsessionid=E06A6E2B5FA3F797FAB8FA5F6331AC92?columnId=358')        html = etree.HTML(newsurl.content.decode('utf-8', 'ignore'))        news = html.xpath('//*[@id="thirdcontent"]/div[2]/ul/li/div/a')        for new in news:            print new.xpath('string(.)').encode('utf-8').replace(' ', '').replace('\n', '')            print 'http://jw.qdu.edu.cn/homepage/' + new.attrib['href']            print    # 个人信息    def user(self):        print '======= 个人信息 ========'        print        user = self.s.get('http://jw.qdu.edu.cn/academic/showPersonalInfo.do')        html = etree.HTML(user.content.decode('utf-8', 'ignore'))        for n in html.xpath('/html/body/center/table[1]/tr[1]/td[2]/text()'):            print '姓名: ' + n.encode('utf-8')        for n in html.xpath('/html/body/center/table[1]/tr[2]/td[1]/text()'):            print '院系: ' + n.encode('utf-8')        for n in html.xpath('/html/body/center/table[1]/tr[2]/td[2]/text()'):            print '专业: ' + n.encode('utf-8')        for n in html.xpath('/html/body/center/table[1]/tr[4]/td[1]/text()'):            print '年级: ' + n.encode('utf-8')        for n in html.xpath('/html/body/center/table[1]/tr[4]/td[2]/text()'):            print '班级: ' + n.encode('utf-8')jw = qdujw()print '======== 教务通知 ========'jw.news()printprint '======== 登录教务系统 ========'# 学号和密码print '请输入学号和密码'sid = raw_input('学号：')passwd = raw_input('密码：')jw.login()while 1:    print    print '======== 青岛大学教务系统 ========'    print    print '[1]查询成绩'    print '[2]查询课表'    print '[3]个人信息'    print '[4]退出'    print    choice = raw_input('请选择: ')    print    if choice == '1':        jw.scores()    elif choice == '2':        jw.kebiao()    elif choice == '3':        jw.user()    elif choice == '4':        exit()
阅读全文
0 0