爬教务处玩玩

来源:互联网 发布:mars建筑软件下载 编辑:程序博客网 时间:2024/04/28 09:39
from HTMLParser import HTMLParserfrom htmlentitydefs import entitydefsimport urlparseimport urllibimport urllib2import cookielibimport stringimport re#sava string to file     def savatofile(filepath,contents):        fl = open(filepath,"w");        fl.write(contents)        fl.close()          # list convert to str    def ListToStr(list):        return ''.join(list)  class MyhttpParser(HTMLParser):        def __init__(self):        self.it_flag = 0        self.it = ""        HTMLParser.__init__(self)    def handle_startendtag(self, tag, attrs):        if tag == "input":            if len(attrs)==0:                pass            else:                for(type,value) in attrs:                    if value == "lt":                        self.it_flag = 1                    if  type == "value":                        if self.it_flag == 1:                           self.it = value                            self.it_flag=0    def Get_lt(self):        return self.itclass MyhttpParserTicket(HTMLParser):          def __init__(self):        self.href_flag = 0        self.href = ""        HTMLParser.__init__(self)    def handle_starttag(self, tag, attrs):        if tag == "a":            if len(attrs)==0:                pass            else:                for(type,value)in attrs:                    if type == "class" and value == "btn btn-primary":                        self.href_flag = 1                    if type == "href" and self.href_flag ==1:                        self.href = value                        self.href_flag = 0    def Get_ticket_href(self):        return self.href                hosturl = 'https://matrix.dean.swust.edu.cn/acadmicManager/index.cfm?event=studentPortal:DEFAULT_EVENT'posturl = 'https://ids-swust.fayea.com/cas/login?service=https%3A%2F%2Fmatrix.dean.swust.edu.cn%2FacadmicManager%2Findex.cfm%3Fevent%3DstudentPortal%3ADEFAULT_EVENT'def get_num_lt():    response = urllib.urlopen(posturl)      htmlcode = response.read()     hp =MyhttpParser()      hp.feed(htmlcode)      hp.close()      str = hp.Get_lt()    return strdef getticketherf(htmlcode):    hp =MyhttpParserTicket()      hp.feed(htmlcode)      hp.close()      str = hp.Get_ticket_href()    return strdef Login():    It_str = get_num_lt()    print It_str    Referer = 'https://ids-swust.fayea.com/cas/login?service=https%3A%2F%2Fmatrix%2Edean%2Eswust%2Eedu%2Ecn%2FacadmicManager%2Findex%2Ecfm%3Fevent%3DstudentPortal%3ADEFAULT%5FEVENT'    headers = {          'Host':'matrix.dean.swust.edu.cn',          'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:14.0) Gecko/20100101 Firefox/39',            'Referer' : Referer}          Postdata = {'lt':It_str,                'username':'xxxx',                'password':'xxxx',                'service':'https://matrix.dean.swust.edu.cn/acadmicManager/index.cfm?event=studentPortal:DEFAULT_EVENT'                }    Postdata = urllib.urlencode(Postdata)    cookieJar = cookielib.CookieJar()    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))    request = urllib2.Request('https://ids-swust.fayea.com/cas/login', Postdata)     result = opener.open(request)    savatofile('1.txt',result.read())    ticketurl = getticketherf(result.read())    result = opener.open(ticketurl)    savatofile('2.txt',result.read())    result = opener.open(hosturl)    savatofile('3.txt',result.read())Login()

0 0