python日常学习笔记4--模拟登陆知乎

来源:互联网 发布:2017网络与新媒体专业 编辑:程序博客网 时间:2024/05/18 10:10
from urllib import request, parsefrom html.parser import HTMLParserimport jsonfrom idna import unicodeclass MyHTMLParser(HTMLParser):    def __init__(self):        self.__xsrf = 0  # 爬取知乎,登录需要这个随机的token        HTMLParser.__init__(self)    def handle_startendtag(self, tag, attrs):        if tag == 'input':            try:                if attrs[0][1] == 'hidden' and attrs[1][1] == '_xsrf':                    self.__xsrf = attrs[2][1]            except Exception as e:                pass    def handle_starttag(self, tag, attrs):        pass    def handle_data(self, data):        pass    @property    def xsrf(self):        return self.__xsrfif __name__ == '__main__':    myparser = MyHTMLParser()    url = 'https://www.zhihu.com/signin'    req = request.Request(url)    req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0')    with request.urlopen(req) as page:        data = page.read().decode('utf-8')        myparser.feed(data)        print(myparser.xsrf)    login_data = parse.urlencode([        ('phone_num', '...'),        ('password', '...'),        ('_xsrf', myparser.xsrf),        ('captcha_type', 'cn')    ])    login_url = 'https://www.zhihu.com/login/phone_num'    login_req = request.Request(login_url)    login_req.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0')    login_req.add_header('X-Xsrftoken', myparser.xsrf)    login_req.add_header('Connection', 'keep-alive')    with request.urlopen(login_req, data=login_data.encode('utf-8')) as f:        print('%s: %s' % (f.status, f.reason))        print(f.read().decode('unicode_escape'))#并没有实现对验证码的辨别,可以通过第三方包识别验证码