Python模拟浏览器登录

来源:互联网 发布:php storm10使用教程 编辑:程序博客网 时间:2024/06/05 18:01

'''
import gzipimport sysimport reimport urllib.requestimport urllib.parseimport urllib.errorimport http.cookiejarfrom bs4 import BeautifulSoupimport datetimeimport randompages = set()random.seed(datetime.datetime.now())#获取页面所有内链的列表def getInternalLinks(bsObj, includeUrl):    internalLinks = []    #找出所有已“/”开头的连接    for link in bsObj.findAll('a', href=re.compile('^(/|.*'+includeUrl+')')):        if link.attrs['href'] is not None:            if link.attrs['href'] not in internalLinks:                internalLinks.append(link.attrs['href'])    return internalLinks'http 方式模拟登陆网站'def ungzip(data):    try:        print("正在解压.....")        data = gzip.decompress(data)        print("解压成功")    except:        print("未经压缩,无需解压")    return dataLoginUrl = "http://网址.com:9090/jsFrame/login.aspx?login=login"headers = {    'Accept':'text/html, application/xhtml+xml, */*',    'Referer':'http://网址.com:9090/jsFrame/login.aspx?login=login',    'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',    'Content-Type':'application/x-www-form-urlencoded',    'Accept-Encoding':'gzip, deflate',    'Host':'网址.com:9090',    'Connection':'Keep-Alive',    'Pragma':'no-cache'       }__VIEWSTATE='/wEPDwUKMTgyNjAzNjE0MQ9kFgICAQ9kFgICCQ8QDxYCHgdWaXNpYmxlaGRkZGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFEmNoa1JlbWViZXJQYXNzd29yZGHjnndASufNAaraxhc4Fq1KydHN'__EVENTVALIDATION='/wEWBgKthPnxBQLT8dy8BQKd+7qdDgK1qbSRCwLPx7zUAgLf2eqGAzVtS60EPvfNOGv+JEjkKNxzBqnS'txtUserID='123'txtPwd='密码'txtPassword='123'postDict = {    '__VIEWSTATE':__VIEWSTATE,    '__EVENTVALIDATION':__EVENTVALIDATION,    'txtUserID':txtUserID,    'txtPwd':txtPwd,    'txtPassword':txtPassword,    'Image1':' '    }#将http内容合并码#The urllib.parse.urlencode() function takes a mapping or sequence of 2-tuples#and returns an ASCII string in this format. It should be encoded to bytes before being used as the data parameter.postdata=urllib.parse.urlencode(postDict).encode()cookie_filename = 'cookie.txt'cookie = http.cookiejar.MozillaCookieJar(cookie_filename)cookie_support = urllib.request.HTTPCookieProcessor(cookie)#创建一个带有cookie的openeropener = urllib.request.build_opener(cookie_support)#将url,http头和http内容放到requet中request = urllib.request.Request(LoginUrl, data=postdata, headers=headers)try:    #模拟浏览器发送请求,并获取返回结果    response = opener.open(request)    #将返回结果解压    response = ungzip(response.read())    #将返回结果解码    page = response.decode()    #print(page)    bsObj = BeautifulSoup(page,"html.parser")    for link in bsObj.findAll('iframe'):        print(link)except urllib.error.URLError as e:    print(e.code,':',e.reason)cookie.save(ignore_discard=True, ignore_expires=True)  # 保存cookie到cookie.txt中print(cookie)for item in cookie:    print('Name = ' + item.name)    print('Value = ' + item.value)#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$t_headers = {    'Accept':'*/*',    'Accept-Language':'zh-cn',    'Referer':'http://erp.sciyon.com:9090/NM/JsFrame/HomeShow/Inform.aspx?title=新闻公告&homeitemid=101',    'x-requested-with':'XMLHttpRequest',    'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',    'Accept-Encoding':'gzip, deflate',    'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',    'Host':'erp.sciyon.com:9090',    'DNT':'1',    'Connection':'Keep-Alive',    'Pragma':'no-cache'       }t_url = 'http://erp.sciyon.com:9090/NM/Proxy/NoticeProxy.aspx'Data = '<Data>'Data2 = '</Data>'Action = '<Action>'Action2 = '</Action>'TYPE = '<TYPE>'TYPE2 = '</TYPE>'STATE = '<STATE>'STATE2 = '</STATE>'AUTHORIZATION = '<AUTHORIZATION>'AUTHORIZATION2 = '</AUTHORIZATION>'HOMEPAGEID = '<HOMEPAGEID>'HOMEPAGEID2 = '</HOMEPAGEID>'#get_postDict = Data+Action+'GETNOTICEDATABYWHERE'+Action2+TYPE+TYPE2+STATE+'APPROVE'+STATE2+AUTHORIZATION+'1'+AUTHORIZATION2+HOMEPAGEID+'101'+HOMEPAGEID2+Data2get_postDict = '''<Data><Action>GETNOTICEDATABYWHERE</Action><TYPE></TYPE><STATE>APPROVE</STATE><AUTHORIZATION>1</AUTHORIZATION><HOMEPAGEID>101</HOMEPAGEID></Data>'''#get_postdata=urllib.parse.urlencode(get_postDict).encode()#get_request = urllib.request.Request(t_url,headers=t_headers,data=get_postdata)#这时openner对象中应该含有前面获取到的cookie信息try:    #模拟浏览器发送请求,并获取返回结果    get_response = opener.open(t_url,get_postDict.encode('utf-8'))    #将返回结果解压    get_response = ungzip(get_response.read())    #将返回结果解码    page = get_response.decode()    print(page)    #bsObj = BeautifulSoup(page,"html.parser")    '''    for link in bsObj.findAll('a'):        print(link)    '''except urllib.error.URLError as e:    print(e.code,':',e.reason)print("*********************************************************")#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#----------------------------------------------------------------#利用oookie请求访问另外一个网址'''get_headers = {    'Accept':'*/*',    'Accept-Language':'zh-cn',    'Referer':'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx',    'x-requested-with':'Ext.basex',    'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',    'Accept-Encoding':'gzip, deflate',    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',    'Host':'IP',    'Connection':'Keep-Alive',    'Pragma':'no-cache'       }get_url = 'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx?FlowGuid=8D11A66F0EAF44FCBBD9DEBEE6D45BFE'start = 0limit = 30SORTFIELD = 'FTASKID'SORTTYPE = 'DESC'ACTION = 'GETWORKTASK'QUERYPARA = '%3CData%3E%3CQueryPara%3E%3CBEGDATE%3E2017-01-01%3C%2FBEGDATE%3E%3CENEDATE%3E2017-01-13%3C%2FENEDATE%3E%3CFCLASS%3E%3C%2FFCLASS%3E%3CFTYPE%3E%3C%2FFTYPE%3E%3CFDELAY%3E%3C%2FFDELAY%3E%3CFTITLE%3E%3C%2FFTITLE%3E%3CFEEDBACKID%3E%3C%2FFEEDBACKID%3E%3CFSUSER%3E%3C%2FFSUSER%3E%3CFRUSER%3E%3C%2FFRUSER%3E%3CSTATE%3E%3C%2FSTATE%3E%3C%2FQueryPara%3E%3C%2FData%3E'get_postDict = {    'start':start,    'limit':limit,    'SORTFIELD':SORTFIELD,    'SORTTYPE':SORTTYPE,    'ACTION':ACTION,    'QUERYPARA':QUERYPARA    }get_postdata=urllib.parse.urlencode(postDict).encode()get_request = urllib.request.Request(get_url,get_postdata,headers=get_headers)#这时openner对象中应该含有前面获取到的cookie信息try:    #模拟浏览器发送请求,并获取返回结果    get_response = opener.open(get_request)    #将返回结果解压    get_response = ungzip(get_response.read())    #将返回结果解码    page = get_response.decode()    print(page)    #bsObj = BeautifulSoup(page,"html.parser")    for link in bsObj.findAll('a'):        print(link)except urllib.error.URLError as e:    print(e.code,':',e.reason)#get_response = ungzip(opener.open(get_request).read())#print(get_response.decode())#------------------------------------------------------------------------------------------------------------------'''

0 0