Python模拟浏览器登录
来源:互联网 发布:php storm10使用教程 编辑:程序博客网 时间:2024/06/05 18:01
'''
import gzipimport sysimport reimport urllib.requestimport urllib.parseimport urllib.errorimport http.cookiejarfrom bs4 import BeautifulSoupimport datetimeimport randompages = set()random.seed(datetime.datetime.now())#获取页面所有内链的列表def getInternalLinks(bsObj, includeUrl): internalLinks = [] #找出所有已“/”开头的连接 for link in bsObj.findAll('a', href=re.compile('^(/|.*'+includeUrl+')')): if link.attrs['href'] is not None: if link.attrs['href'] not in internalLinks: internalLinks.append(link.attrs['href']) return internalLinks'http 方式模拟登陆网站'def ungzip(data): try: print("正在解压.....") data = gzip.decompress(data) print("解压成功") except: print("未经压缩,无需解压") return dataLoginUrl = "http://网址.com:9090/jsFrame/login.aspx?login=login"headers = { 'Accept':'text/html, application/xhtml+xml, */*', 'Referer':'http://网址.com:9090/jsFrame/login.aspx?login=login', 'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)', 'Content-Type':'application/x-www-form-urlencoded', 'Accept-Encoding':'gzip, deflate', 'Host':'网址.com:9090', 'Connection':'Keep-Alive', 'Pragma':'no-cache' }__VIEWSTATE='/wEPDwUKMTgyNjAzNjE0MQ9kFgICAQ9kFgICCQ8QDxYCHgdWaXNpYmxlaGRkZGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFEmNoa1JlbWViZXJQYXNzd29yZGHjnndASufNAaraxhc4Fq1KydHN'__EVENTVALIDATION='/wEWBgKthPnxBQLT8dy8BQKd+7qdDgK1qbSRCwLPx7zUAgLf2eqGAzVtS60EPvfNOGv+JEjkKNxzBqnS'txtUserID='123'txtPwd='密码'txtPassword='123'postDict = { '__VIEWSTATE':__VIEWSTATE, '__EVENTVALIDATION':__EVENTVALIDATION, 'txtUserID':txtUserID, 'txtPwd':txtPwd, 'txtPassword':txtPassword, 'Image1':' ' }#将http内容合并码#The urllib.parse.urlencode() function takes a mapping or sequence of 2-tuples#and returns an ASCII string in this format. It should be encoded to bytes before being used as the data parameter.postdata=urllib.parse.urlencode(postDict).encode()cookie_filename = 'cookie.txt'cookie = http.cookiejar.MozillaCookieJar(cookie_filename)cookie_support = urllib.request.HTTPCookieProcessor(cookie)#创建一个带有cookie的openeropener = urllib.request.build_opener(cookie_support)#将url,http头和http内容放到requet中request = urllib.request.Request(LoginUrl, data=postdata, headers=headers)try: #模拟浏览器发送请求,并获取返回结果 response = opener.open(request) #将返回结果解压 response = ungzip(response.read()) #将返回结果解码 page = response.decode() #print(page) bsObj = BeautifulSoup(page,"html.parser") for link in bsObj.findAll('iframe'): print(link)except urllib.error.URLError as e: print(e.code,':',e.reason)cookie.save(ignore_discard=True, ignore_expires=True) # 保存cookie到cookie.txt中print(cookie)for item in cookie: print('Name = ' + item.name) print('Value = ' + item.value)#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$t_headers = { 'Accept':'*/*', 'Accept-Language':'zh-cn', 'Referer':'http://erp.sciyon.com:9090/NM/JsFrame/HomeShow/Inform.aspx?title=新闻公告&homeitemid=101', 'x-requested-with':'XMLHttpRequest', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Accept-Encoding':'gzip, deflate', 'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)', 'Host':'erp.sciyon.com:9090', 'DNT':'1', 'Connection':'Keep-Alive', 'Pragma':'no-cache' }t_url = 'http://erp.sciyon.com:9090/NM/Proxy/NoticeProxy.aspx'Data = '<Data>'Data2 = '</Data>'Action = '<Action>'Action2 = '</Action>'TYPE = '<TYPE>'TYPE2 = '</TYPE>'STATE = '<STATE>'STATE2 = '</STATE>'AUTHORIZATION = '<AUTHORIZATION>'AUTHORIZATION2 = '</AUTHORIZATION>'HOMEPAGEID = '<HOMEPAGEID>'HOMEPAGEID2 = '</HOMEPAGEID>'#get_postDict = Data+Action+'GETNOTICEDATABYWHERE'+Action2+TYPE+TYPE2+STATE+'APPROVE'+STATE2+AUTHORIZATION+'1'+AUTHORIZATION2+HOMEPAGEID+'101'+HOMEPAGEID2+Data2get_postDict = '''<Data><Action>GETNOTICEDATABYWHERE</Action><TYPE></TYPE><STATE>APPROVE</STATE><AUTHORIZATION>1</AUTHORIZATION><HOMEPAGEID>101</HOMEPAGEID></Data>'''#get_postdata=urllib.parse.urlencode(get_postDict).encode()#get_request = urllib.request.Request(t_url,headers=t_headers,data=get_postdata)#这时openner对象中应该含有前面获取到的cookie信息try: #模拟浏览器发送请求,并获取返回结果 get_response = opener.open(t_url,get_postDict.encode('utf-8')) #将返回结果解压 get_response = ungzip(get_response.read()) #将返回结果解码 page = get_response.decode() print(page) #bsObj = BeautifulSoup(page,"html.parser") ''' for link in bsObj.findAll('a'): print(link) '''except urllib.error.URLError as e: print(e.code,':',e.reason)print("*********************************************************")#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#----------------------------------------------------------------#利用oookie请求访问另外一个网址'''get_headers = { 'Accept':'*/*', 'Accept-Language':'zh-cn', 'Referer':'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx', 'x-requested-with':'Ext.basex', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Accept-Encoding':'gzip, deflate', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko', 'Host':'IP', 'Connection':'Keep-Alive', 'Pragma':'no-cache' }get_url = 'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx?FlowGuid=8D11A66F0EAF44FCBBD9DEBEE6D45BFE'start = 0limit = 30SORTFIELD = 'FTASKID'SORTTYPE = 'DESC'ACTION = 'GETWORKTASK'QUERYPARA = '%3CData%3E%3CQueryPara%3E%3CBEGDATE%3E2017-01-01%3C%2FBEGDATE%3E%3CENEDATE%3E2017-01-13%3C%2FENEDATE%3E%3CFCLASS%3E%3C%2FFCLASS%3E%3CFTYPE%3E%3C%2FFTYPE%3E%3CFDELAY%3E%3C%2FFDELAY%3E%3CFTITLE%3E%3C%2FFTITLE%3E%3CFEEDBACKID%3E%3C%2FFEEDBACKID%3E%3CFSUSER%3E%3C%2FFSUSER%3E%3CFRUSER%3E%3C%2FFRUSER%3E%3CSTATE%3E%3C%2FSTATE%3E%3C%2FQueryPara%3E%3C%2FData%3E'get_postDict = { 'start':start, 'limit':limit, 'SORTFIELD':SORTFIELD, 'SORTTYPE':SORTTYPE, 'ACTION':ACTION, 'QUERYPARA':QUERYPARA }get_postdata=urllib.parse.urlencode(postDict).encode()get_request = urllib.request.Request(get_url,get_postdata,headers=get_headers)#这时openner对象中应该含有前面获取到的cookie信息try: #模拟浏览器发送请求,并获取返回结果 get_response = opener.open(get_request) #将返回结果解压 get_response = ungzip(get_response.read()) #将返回结果解码 page = get_response.decode() print(page) #bsObj = BeautifulSoup(page,"html.parser") for link in bsObj.findAll('a'): print(link)except urllib.error.URLError as e: print(e.code,':',e.reason)#get_response = ungzip(opener.open(get_request).read())#print(get_response.decode())#------------------------------------------------------------------------------------------------------------------'''
0 0
- python模拟浏览器登录
- Python模拟浏览器登录
- Python模拟浏览器登录淘宝
- 用Python模拟浏览器登录
- python模拟浏览器登录淘宝抓取内容
- python脚本模拟浏览器
- python 模拟浏览器
- python 模拟浏览器
- python模拟一个浏览器
- python实现模拟登录
- python实现模拟登录
- python实现模拟登录
- python实现模拟登录
- python实现模拟登录
- python实现模拟登录
- python实现模拟登录
- python 模拟登录i
- python模拟百度登录
- 一个使用openGL渲染的炫丽Android动画库
- Maven基本命令
- ubuntu16.04LTS安装tensorflow1.1.0(pip方式/python2.7版本/cpu版本)
- 程序员面试金典——加法运算替代
- Javascript继承机制的设计思想
- Python模拟浏览器登录
- 1492: 求和
- UART中的硬件流控RTS与CTS
- ZOJ 1003
- Mysql数据库优化系列(二)------AWK脚本统计数据库性能参数
- 水题
- BFS例题
- 彻底解决 jspatch app store 审核通不过的问题
- linux驱动开发之字符设备框架