python模拟登陆知乎（手工识别验证码）

来源：互联网发布：安装windows xp系统编辑：程序博客网时间：2024/06/05 14:53

借鉴：http://www.jianshu.com/p/3debfb110ad9
https://github.com/xchaoinfo/fuck-login/tree/master/001%20zhihu
https://github.com/lining0806/PythonSpiderNotes/tree/master/ZhihuSpider
详细的介绍请看http://www.jianshu.com/p/3debfb110ad9，这个文章介绍的很清楚。
然后我概括一下主要的设计思路。
在点击登陆按钮的时候，除了向服务器传输账号和密码外，还需要传输xsrf参数以及验证码。还有一点，简化一些。如果成功登陆的话，
就把cookie保存下来。下一次再次运行程序的时候就使用cookie来登陆。

# coding: utf8import requestsfrom bs4 import BeautifulSoupimport os,timeimport http.cookiejarimport jsonimport reglobal hasCookiehasCookie = False#访问的链接global url#保持一个session来进行全部的访问操作session = requests.Session()session.cookies = http.cookiejar.LWPCookieJar(filename='zhihucookie')# 构造 Request headersagent = 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36'headers = {    "Host":"www.zhihu.com",    "Referer":"https://www.zhihu.com",    "User-Agent":agent,    "X-Requested-With":"XMLHttpRequest"}try:    session.cookies.load(ignore_discard=True)    hasCookie = Trueexcept:    print('cookie 文件未能加载')def get_xsrf():    """    获取参数_xsrf    """    response = session.get('https://www.zhihu.com', headers=headers)    homesoup = BeautifulSoup(response.text, 'html.parser')    xsrfinput = homesoup.find('input', {'name': '_xsrf'})    return xsrfinput['value']def get_captcha():    ########## 获取验证码文件    randomtime = str(int(time.time() * 1000))    captchaurl = 'https://www.zhihu.com/captcha.gif?r=' + \                 randomtime + "&type=login"    captcharesponse = session.get(url=captchaurl, headers=headers)    with open('checkcode.gif', 'wb') as f:        f.write(captcharesponse.content)        f.close()    os.startfile('checkcode.gif')    captcha = raw_input('请输入验证码：')    return captchadef login():    """    输入自己的账号密码，模拟登录知乎    """    # 若不用验证码，使用cookie登录    if(hasCookie):        #为了防止cookie失效，用try来捕捉异常        try:            url='https://www.zhihu.com/login/email'            result = session.get(url,headers=headers)        except Exception,e:            print str(e)            loginwithcaptcha()    # 要用验证码，post后登录    else:        loginwithcaptcha()    # 保存cookie到本地    session.cookies.save(ignore_discard=True, ignore_expires=True)#验证码登陆，不管在页面需不需要进行验证码输入，我们在程序中都给他输入def loginwithcaptcha():    username = raw_input('输入账号：')    password = raw_input('输入密码：')    # 检测到11位数字则是手机登录    if re.match(r'\d{11}$', username):        url = "http://www.zhihu.com/login/phone_num"        print('使用手机登录中...')        data = {'_xsrf': get_xsrf(),                'password': password,                'remember_me': 'true',                'phone_num': username                }    else:        url = 'https://www.zhihu.com/login/email'        print('使用邮箱登录中...')        data = {'_xsrf': get_xsrf(),                'password': password,                'remember_me': 'true',                'email': username                }    # headers['X-Xsrftoken'] = data['_xsrf']    data['captcha'] = get_captcha()    result = session.post(url, data=data, headers=headers)    result_response = result.json()['r']    #判断返回值，如果返回值为1，则出现错误，需要重新输入验证码，当然要确定你的账号和密码没有错误    while (result_response == 1):        data['captcha'] = get_captcha()        result = session.post(url, data=data, headers=headers)        result_response = result.json()['r']if __name__ == '__main__':    login()    # 设置里面的简介页面，登录后才能查看。以此来验证确实登录成功    get_url = 'https://www.zhihu.com/settings/profile'    # allow_redirects=False 禁止重定向    resp = session.get(get_url, headers=headers, allow_redirects=False)    print(resp.text)

0 0