模拟登录以及BeautifulSoup学习笔记

来源:互联网 发布:java实现自定义表单 编辑:程序博客网 时间:2024/06/05 22:54
# coding:utf-8import urllibimport urllib2import cookielibfrom bs4 import BeautifulSoup# 设置登录urllogin_url = "******************"# 创建登录类class Login(object):    #初始化    def __init__(self):        self.username = ''        self.password = ''        # 验证码        self.rode = ''        #设置cookie        self.cj = cookirlib,LWPCookieJar()        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))        urllib2.install_opener(self.opener)    def setLoginInfo(self, username, passwoed ,rode):        """设置登录用户信息"""        self.username = username        self.password = password        self.rode = rode    def login(self):        """"模拟登录"""        login_data = {'username': self.username, 'password': self.password, 'rode': self.rode}        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2859.0 Safari/537.36'}        req = urllib2.Request(login_url, data=urllib.urlencode(login_data), headers=headers)        response = urllibs.urlopen(req)        # 加cookie, 自动保存seissionid,做后续跳转        self.operate = self.opener.open(req)        # 通过BeautifulSoup获取网页内容        soup = BeautifulSoup(response, 'lxml')        # 获取网页的content        data = soup.select('#content')        if data:            print u'模拟登录成功!', data        print u'模拟登录失败!'   def skip(self, skip_url):       """       模拟登录成功后,跳转网页       ----------------------------       传递跳转网页的url      """       headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2859.0 Safari/537.36'}       req = urllib2.Request(skip_url, headers=headers)       response = urllib2.urlopen(req)       soup = BeautifulSoup(response, 'lxml')       # 获取跳转后网页的title       print soup.title.stringif __name__ == "__main__":    userlogin = Login()    username = '***********'    password = '***********'    rode = '***********'    # 执行模拟登录方法    userlogin.login()    # 执行模拟登录成功后网页跳转方法    user_login.skip(skip_url = '***************') 
0 0
原创粉丝点击