python3+BeautifulSoup+tkinter 爬虫 获取学校成绩

来源:互联网 发布:ubuntu arm 源 编辑:程序博客网 时间:2024/05/18 04:52

写的是一个小爬虫,有界面,爬取学校成绩

从最开始只用python3+正则表达式

然后加界面用tkinter

到最后加了BeautifulSoup

现在看起来和谐多了

<pre name="code" class="python">#获取学校成绩
import re,string,urllib.parse,urllib.requestfrom tkinter import *from tkinter import ttkfrom bs4 import BeautifulSoupclass AhutScore:    def __init__(self):        print('已经启动安工大成绩查询爬虫,咔嚓咔嚓')    def getAhutScore(self,stuNo,idCard,xn,xq):        #通过HttpWatch抓包的当前网站的形式        postdata=urllib.parse.urlencode({            '__EVENTVALIDATION':'/wEWIQLH/uyCBwLs0bLrBgLs0fbZDALWrMSACwKEx5fABgKFx/uABQKax7/ABwKax6OABgKbx6OABgKYx+dBAsKF4K8GAs2FiJQIAsqF5O0IAsOF8PcLAsCFjO0JAvGV4pUFAv/6yPsJAv76yPsJAvbLmuYBAq7k2jACzqvD4A4CrvycrAcCi+uC+wwCn/nbgQ0C4d349AoC9PbF/AwCrZj0xQsCrZiIoQQC0sqYtwoC6MqwtAcC1srwtQoChobTsw4C1orq2A/lc4cMuGz9/vf0WzeaMjk2B63pi/yD0c3bh6AkZ2usTA==',            '__VIEWSTATE':'/wEPDwUKLTc5MTY3NzY2OA9kFgICAw9kFg4CBQ8QZBAVDA09Peivt+mAieaLqT09CTIwMTQtMjAxNQkyMDEzLTIwMTQJMjAxMi0yMDEzCTIwMTEtMjAxMwkyMDExLTIwMTIJMjAxMC0yMDExCTIwMDktMjAxMAkyMDA4LTIwMDkJMjAwNy0yMDA4CTIwMDYtMjAwNwkyMDA1LTIwMDYVDAAJMjAxNC0yMDE1CTIwMTMtMjAxNAkyMDEyLTIwMTMJMjAxMS0yMDEzCTIwMTEtMjAxMgkyMDEwLTIwMTEJMjAwOS0yMDEwCTIwMDgtMjAwOQkyMDA3LTIwMDgJMjAwNi0yMDA3CTIwMDUtMjAwNhQrAwxnZ2dnZ2dnZ2dnZ2dkZAIHDxBkEBUDDT096K+36YCJ5oupPT0BMgExFQMAATIBMRQrAwNnZ2dkZAIdD2QWAgIFDzwrABEAZAIfD2QWAgIBDzwrABEAZAIjD2QWAgIJDzwrABEAZAIlD2QWAgIDDxBkZBYBZmQCJw9kFgICAQ88KwARAQEQFgAWABYAZBgEBQlHcmlkVmlldzMPZ2QFCUdyaWRWaWV3MQ9nZAUMR3JpZFZpZXdfY2owD2dkBQtHcmlkVmlld19jag9nZJ3osNiaHFKtpB351twVA++gU7GdyOdYypVlNUYHNaNo',            '__VIEWSTATEGENERATOR':'DCA2160B',            'Button_cjcx':'查询',              'drop_type':'全部成绩',              'drop_xn':xn,              'drop_xq':xq,              'hid_dqszj':'',            'TextBox1':stuNo,            'TextBox2':idCard        }).encode(encoding='utf-8')        #伪装消息头,伪装用户访问        headers = {              'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'          }        #创建请求对象        req = urllib.request.Request(            url = 'http://211.70.149.134:8080/stud_score/brow_stud_score.aspx',              data = postdata,            headers = headers        )        #接收结果对象        result = urllib.request.urlopen(req)        #读取并解码        unicodePage = result.read().decode('utf-8')        soup = BeautifulSoup(unicodePage)        #print(soup.prettify())        stuMsg = soup.find('span',id='Label1').string        #stuMsg = soup.html('span',attrs={"id": "Label1"})[0].string        #scoreMsg = re.findall('<tr align="left" onmouseover="c=this.style.backgroundColor;this.style.backgroundColor='#EEC470'" onmouseout="this.style.backgroundColor=c" style=".*?">(.*?)</tr>',unicodePage,re.S)        # <tr class="Freezing" style="color:White;background-color:#006699;border-color:#6666FF;font-weight:bold;height:25px;"><th>*n</tr>        #<span id="Label_SHOW" style="font-size:Small;color: #003300; font-family: 微软雅黑;">共找到4条记录!</span>        items = []        labelShow = soup.find('span',id='Label_SHOW').string        if labelShow == '没有返回记录!':            return items,stuMsg,labelShow        scoreTitle = soup.find('tr',class_='Freezing')('th')        scoreMsg = soup('tr',align='left',onmouseout="this.style.backgroundColor=c")                for itemMsg in scoreMsg:            #itemScore = re.findall('<td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td style=".*?">(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td><td>(.*?)</td>',itemMsg,re.S)            itemScore = itemMsg('td')            dicts ={}            for i in range(15):                dicts[scoreTitle[i].string] = itemScore[i].string.replace('\xa0','')            items.append(dicts)        print(items)        return items,stuMsg,labelShow    def getGUI(self):        def test():            result = self.getAhutScore(stuNo.get(),idCard.get(),xn.get(),xq.get())            rstk = Tk()            rstk.title('成绩查询结果')            rstk.resizable(width=False, height=False)            t=Text(rstk)            t.insert(1.0,result[1]+'\n')            t.insert(2.0,'-'*32+result[2]+'-'*32+'\n')            if result[0]!=[]:                for item in result[0]:                    t.insert(3.0,item['课程名']+':'+item['总评成绩']+'\n')            else:                t.insert(3.0,'暂无信息!\n')            t.grid(row=5, column=0,columnspan=4)        root = Tk()        root.geometry()        root.resizable(width=False, height=False)        root.title('ahut成绩查询')        Label(root,text='学号:').grid(row=0, column=0,sticky=W)        Label(root,text='身份证号:').grid(row=0, column=2,sticky=W)        Label(root,text='学年:').grid(row=1, column=0,sticky=W)        Label(root,text='学期:').grid(row=1, column=2,sticky=W)        stuNo = StringVar()        idCard = StringVar()        xn = StringVar()        xq = StringVar()        #Entry(root, textvariable=e, state="readonly", show="*")        Entry(root,textvariable = stuNo).grid(row=0, column=1)        Entry(root,textvariable = idCard).grid(row=0, column=3)        stuNo.set('11908***')        idCard.set('34082*************')        xnBox = ttk.Combobox(root,textvariable=xn,state='readonly')        xnBox['values'] = ('','2010-2011', '2011-2012', '2012-2013','2013-2014', '2014-2015')        xnBox.set('2014-2015')        xnBox.grid(row=1, column=1)        xqBox = ttk.Combobox(root,textvariable=xq,state='readonly')        xqBox['values'] = ('','1', '2')        xqBox.set('1')        xqBox.grid(row=1, column=3)        b = Button(root,text='查询',command=test).grid(row=4, columnspan=4)        #label = Label(root,text='© F8').grid()        root.mainloop()ahutScore = AhutScore()ahutScore.getGUI()



0 0
原创粉丝点击