csdn登陆+cookies

来源:互联网 发布:销售数据ppt模板 编辑:程序博客网 时间:2024/05/12 05:14

账号登陆并访问其他页面:要建立会话,保持一致性,进行页面之间的切换(类似于登上淘宝账号,利用自己的信息访问其他页面)

利用urllib2和re模块完成的

#coding=utf-8import urlparse, reimport urllib2,cookielib,urllibfilename = 'cookie.txt'cookie = cookielib.MozillaCookieJar(filename)cookieProc=urllib2.HTTPCookieProcessor(cookie)#cookieProc = urllib.request.HTTPCookieProcessor(cookie)opener = urllib2.build_opener(cookieProc)h = opener.open('https://passport.csdn.net').read().decode("utf8")"""          <!-- 该参数可以理解成每个需要登录的用户都有一个流水号。只有有了webflow发放的有效的流水号,          用户才可以说明是已经进入了webflow流程。否则,没有流水号的情况下,webflow会认为用户还没有进入webflow流程,          从而会重新进入一次webflow流程,从而会重新出现登录界面。 -->            <input type="hidden" name="lt" value="LT-666200-OcdXttTQaLRiBs3XwBkdIHoHD53VeN" />            <input type="hidden" name="execution" value="e7s1" />            <input type="hidden" name="_eventId" value="submit" />"""patten1 = re.compile(r'name="lt" value="(.*?)"')patten2 = re.compile(r'name="execution" value="(.*?)"')b1 = patten1.search(h)print b1.group(1)b2 = patten2.search(h)postData = {    'username': '用户名',    'password': '密码',    'lt': b1.group(1),    'execution': b2.group(1),    '_eventId': 'submit',}postData= urllib.urlencode(postData).encode(encoding='UTF8')opener.addheaders = [                     ('User-Agent',                      'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),                     ('Referer', 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn')                     ]response = opener.open('https://passport.csdn.net', data=postData)#保存cookie到文件cookie.save(ignore_discard=True, ignore_expires=True)response2 = opener.open('http://my.csdn.net/')text2 = response2.read().decode('utf-8', 'ignore')print text2# tool.log(text2, 'csdn_mycsdn.html')

利用上面的cookies.txt

#! /user/bin/env python#encoding=utf-8__author__ = 'chw'import cookielib,urllib2# 创建一个MozillaCookieJar对象cookie = cookielib.MozillaCookieJar()# 从文件中的读取cookie内容到变量cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)# 打印cookie内容,证明获取cookie成功# for item in cookie:#     print 'name:' + item.name + '-value:' + item.value# 利用获取到的cookie创建一个openerhandler = urllib2.HTTPCookieProcessor(cookie)opener = urllib2.build_opener(handler)opener.addheaders = [                     ('User-Agent',                      'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),                     ('Referer', 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn')                     ]#创建请求的request# req = urllib2.Request("http://my.csdn.net/")res = opener.open('http://my.csdn.net/')print res.read()

方法2:

#! /user/bin/env python#encoding=utf-8__author__ = 'chw'import cookielibimport urllibimport urllib2import lxml.htmldef parse_from(html):    tree=lxml.html.fromstring(html)    data={}    for e in tree.cssselect('form input'):        if e.get('name'):            data[e.get('name')]=e.get('value')    return dataurl='https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn'cj=cookielib.CookieJar()opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))opener.addheaders = [                     ('User-Agent',                      'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'),                     ('Referer', 'https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn')                     ]html=opener.open(url).read()data=parse_from(html)data['username']='*********'data['password']='**********'encoded_data=urllib.urlencode(data)request=urllib2.Request(url,encoded_data)response=opener.open(request)response1=opener.open('http://my.csdn.net/my/mycsdn')print response1.read()

感觉上面比较乱,下面利用requests和BeautifulSoup

#! /user/bin/env python#encoding=utf-8__author__ = 'chw'import requestsimport cookielibfrom bs4 import BeautifulSoupurl='https://passport.csdn.net'headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}#切换页面,进行长期会话session=requests.Session()# 建立LWPCookieJar实例,可以存Set-Cookie3类型的文件。# 而MozillaCookieJar类是存为'/.txt'格式的文件**session.cookies = cookielib.MozillaCookieJar(filename='cookie1.txt')**response=session.get(url).textsoup=BeautifulSoup(response,'lxml')lt=soup.find(attrs={'name':"lt" }).get('value')execution=soup.find(attrs={'name':"execution" }).get('value')_eventId=soup.find(attrs={'name':"_eventId" }).get('value')postData = {    'username': '账号',    'password': '密码',    'lt': lt,    'execution': execution,    '_eventId': _eventId,}headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}url1='http://my.csdn.net/'#post 换成登录的地址,session.post(url,postData,headers)**session.cookies.save(ignore_discard=True, ignore_expires=True)**#换成抓取的地址response1=session.get(url1,params=postData,headers=headers).text#cookiesprint response1

依然可以用上那面的cookies访问

参考文献:http://www.jianshu.com/p/3debfb110ad9

0 0
原创粉丝点击