Python3中urllib详细使用方法
来源:互联网 发布:上海网络综合布线工程 编辑:程序博客网 时间:2024/06/16 09:06
urllib是python的一个获取url(Uniform Resource Locators,统一资源定址器)了,我们可以利用它来抓取远程的数据进行保存哦,下面整理了一些关于urllib使用中的一些关于header,代理,超时,认证,异常处理处理方法,下面一起来看看。
# create a password managerpassword_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()# Add the username and password.# If we knew the realm, we could use it instead of None.top_level_url = "https://www.111cn.net /"password_mgr.add_password(None, top_level_url, 'rekfan', 'xxxxxx')handler = urllib.request.HTTPBasicAuthHandler(password_mgr)# create "opener" (OpenerDirector instance)opener = urllib.request.build_opener(handler)# use the opener to fetch a URLa_url = "https://www.111cn.net /"x = opener.open(a_url)print(x.read())# Install the opener.# Now all calls to urllib.request.urlopen use our opener.urllib.request.install_opener(opener)a = urllib.request.urlopen(a_url).read().decode('utf8')print(a)9、使用代理python3 抓取网页资源的 N 种方法:
1、最简单
- import urllib.request
- response = urllib.request.urlopen(’http://python.org/’)
- html = response.read()
import urllib.requestresponse = urllib.request.urlopen('http://python.org/')html = response.read()2、使用 Request
- import urllib.request
- req = urllib.request.Request(’http://python.org/’)
- response = urllib.request.urlopen(req)
- the_page = response.read()
import urllib.requestreq = urllib.request.Request('http://python.org/')response = urllib.request.urlopen(req)the_page = response.read()3、发送数据
- #! /usr/bin/env python3
- import urllib.parse
- import urllib.request
- url = ’http://localhost/login.php’
- user_agent = ’Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)’
- values = {
- ’act’ : ‘login’,
- ’login[email]’ : ‘yzhang@i9i8.com’,
- ’login[password]’ : ‘123456’
- }
- data = urllib.parse.urlencode(values)
- req = urllib.request.Request(url, data)
- req.add_header(’Referer’, ‘http://www.python.org/’)
- response = urllib.request.urlopen(req)
- the_page = response.read()
- print(the_page.decode(“utf8”))
#! /usr/bin/env python3import urllib.parseimport urllib.requesturl = 'http://localhost/login.php'user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'values = {'act' : 'login','login[email]' : 'yzhang@i9i8.com','login[password]' : '123456'}data = urllib.parse.urlencode(values)req = urllib.request.Request(url, data)req.add_header('Referer', 'http://www.python.org/')response = urllib.request.urlopen(req)the_page = response.read()print(the_page.decode("utf8"))4、发送数据和header
- #! /usr/bin/env python3
- import urllib.parse
- import urllib.request
- url = ’http://localhost/login.php’
- user_agent = ’Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)’
- values = {
- ’act’ : ‘login’,
- ’login[email]’ : ‘yzhang@i9i8.com’,
- ’login[password]’ : ‘123456’
- }
- headers = { ’User-Agent’ : user_agent }
- data = urllib.parse.urlencode(values)
- req = urllib.request.Request(url, data, headers)
- response = urllib.request.urlopen(req)
- the_page = response.read()
- print(the_page.decode(“utf8”))
#! /usr/bin/env python3import urllib.parseimport urllib.requesturl = 'http://localhost/login.php'user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'values = {'act' : 'login','login[email]' : 'yzhang@i9i8.com','login[password]' : '123456'}headers = { 'User-Agent' : user_agent }data = urllib.parse.urlencode(values)req = urllib.request.Request(url, data, headers)response = urllib.request.urlopen(req)the_page = response.read()print(the_page.decode("utf8"))5、http 错误
- #! /usr/bin/env python3
- import urllib.request
- req = urllib.request.Request(’http://www.111cn.net ’)
- try:
- urllib.request.urlopen(req)
- except urllib.error.HTTPError as e:
- print(e.code)
- print(e.read().decode(“utf8”))
#! /usr/bin/env python3import urllib.requestreq = urllib.request.Request('http://www.111cn.net ')try:urllib.request.urlopen(req)except urllib.error.HTTPError as e:print(e.code)print(e.read().decode("utf8"))6、异常处理1
- #! /usr/bin/env python3
- from urllib.request import Request, urlopen
- from urllib.error import URLError, HTTPError
- req = Request(”http://www.111cn.net /”)
- try:
- response = urlopen(req)
- except HTTPError as e:
- print(‘The server couldn’t fulfill the request.’)
- print(‘Error code: ’, e.code)
- except URLError as e:
- print(‘We failed to reach a server.’)
- print(‘Reason: ’, e.reason)
- else:
- print(“good!”)
- print(response.read().decode(“utf8”))
#! /usr/bin/env python3from urllib.request import Request, urlopenfrom urllib.error import URLError, HTTPErrorreq = Request("http://www.111cn.net /")try:response = urlopen(req)except HTTPError as e:print('The server couldn't fulfill the request.')print('Error code: ', e.code)except URLError as e:print('We failed to reach a server.')print('Reason: ', e.reason)else:print("good!")print(response.read().decode("utf8"))7、异常处理2
- #! /usr/bin/env python3
- from urllib.request import Request, urlopen
- from urllib.error import URLError
- req = Request(”http://www.111cn.net /”)
- try:
- response = urlopen(req)
- except URLError as e:
- if hasattr(e, ‘reason’):
- print(‘We failed to reach a server.’)
- print(‘Reason: ’, e.reason)
- elif hasattr(e, ‘code’):
- print(‘The server couldn’t fulfill the request.’)
- print(‘Error code: ’, e.code)
- else:
- print(“good!”)
- print(response.read().decode(“utf8”))
#! /usr/bin/env python3from urllib.request import Request, urlopenfrom urllib.error import URLErrorreq = Request("http://www.111cn.net /")try:response = urlopen(req)except URLError as e:if hasattr(e, 'reason'):print('We failed to reach a server.')print('Reason: ', e.reason)elif hasattr(e, 'code'):print('The server couldn't fulfill the request.')print('Error code: ', e.code)else:print("good!")print(response.read().decode("utf8"))8、HTTP 认证
- #! /usr/bin/env python3
- import urllib.request
- # create a password manager
- password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
- # Add the username and password.
- # If we knew the realm, we could use it instead of None.
- top_level_url = ”https://www.111cn.net /”
- password_mgr.add_password(None, top_level_url, ‘rekfan’, ‘xxxxxx’)
- handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
- # create “opener” (OpenerDirector instance)
- opener = urllib.request.build_opener(handler)
- # use the opener to fetch a URL
- a_url = ”https://www.111cn.net /”
- x = opener.open(a_url)
- print(x.read())
- # Install the opener.
- # Now all calls to urllib.request.urlopen use our opener.
- urllib.request.install_opener(opener)
- a = urllib.request.urlopen(a_url).read().decode(’utf8’)
- print(a)
#! /usr/bin/env python3import urllib.request
- #! /usr/bin/env python3
- import urllib.request
- proxy_support = urllib.request.ProxyHandler({’sock5’: ‘localhost:1080’})
- opener = urllib.request.build_opener(proxy_support)
- urllib.request.install_opener(opener)
- a = urllib.request.urlopen(”http://www.111cn.net ”).read().decode(“utf8”)
- print(a)
#! /usr/bin/env python3import urllib.requestproxy_support = urllib.request.ProxyHandler({'sock5': 'localhost:1080'})opener = urllib.request.build_opener(proxy_support)urllib.request.install_opener(opener)a = urllib.request.urlopen("http://www.111cn.net ").read().decode("utf8")print(a)10、超时
- #! /usr/bin/env python3
- import socket
- import urllib.request
- # timeout in seconds
- timeout = 2
- socket.setdefaulttimeout(timeout)
- # this call to urllib.request.urlopen now uses the default timeout
- # we have set in the socket module
- req = urllib.request.Request(’http://www.111cn.net /’)
- a = urllib.request.urlopen(req).read()
- print(a)
#! /usr/bin/env python3import socketimport urllib.request# timeout in secondstimeout = 2socket.setdefaulttimeout(timeout)# this call to urllib.request.urlopen now uses the default timeout# we have set in the socket modulereq = urllib.request.Request('http://www.111cn.net /')a = urllib.request.urlopen(req).read()print(a)
阅读全文
0 0
- Python3中urllib详细使用方法
- Python3中urllib详细使用方法
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3中urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3学习:urllib的使用方法
- 【Python3.6爬虫学习记录】(六)urllib详细使用方法(header,代理,超时,认证,异常处理)
- Python3 中urllib的使用
- Python3中urllib学习笔记
- Python3中urllib使用介绍
- python3.x中urllib的使用
- python3.3中urllib.request.open问题
- Python3中urllib的UnicodeDecodeError错误
- python3.x 中urllib的使用
- 【算法分析与设计】【第一周】121.&122. Best Time to Buy and Sell Stock I&II
- Raft membership change
- Python学习——爬虫之pdfkit用法
- 坐标系变换理论推导
- 网易笔试题
- Python3中urllib详细使用方法
- EBS_BOM展开SQL语句
- 正则表达式 处理选项
- Weave 如何与外网通信?- 每天5分钟玩转 Docker 容器技术(66)
- 【TP】【P-sensor】TP模拟P-sensor
- sql语句实现的具体过程
- 软件测试实验室之Eclipse
- python Django web初学安装
- eclipse中如何正确打开对应格式的文件比如.ftl , .bpmn等