Requests

来源:互联网 发布:北京用友软件代理商 编辑:程序博客网 时间:2024/05/17 22:01

Requests 崔老师爬虫系列课程学习笔记

install

pip install requests

examples

import requestsresponse = requests.get('http://www.baidu.com')print(type(response))print(response.status_code)print(response.text)print(response.cookies)

# 各种请求方式

import requestsrequests.post('http://httpbin.org/post')requests.put('http://httpbin.org/put')requests.delete('http://httpbin.org/delete')requests.head('http://httpbin.org/head')

请求

基本GET请求

import requestsresponse = requests.get('http://www.baidu.com')print(response.text)

带参数的GET请求

import requestsresponse = requests.get('http://httpbin.org/get?name=germey&age=22')print(response.text)
import requestsdata = {    'name':'germey',    'age':22}response = requests.get('http://httpbin.org/get',params=data)print(response.text)

解析JSON

import requestsimport jsonresponse = requests.get('http://httpbin.org/get')print(response.json())print(json.loads(response.text))

获取二进制类型

import requestsresponse = requests.get('http://seopic.699pic.com/photo/00013/4041.jpg_wh1200.jpg')with open('祈福.jpg','wb') as f:    f.write(response.content)    f.close()

添加Headers

import requestsresponse = requests.get('http://www.zhihu.com/explore')print(response.text)
import requestsheaders = {    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'}response = requests.get('http://www.zhihu.com/explore',headers = headers)print(response.text)

基本POST请求

import requestsdata = {'name':'germey','age':22}headers = {    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36'}response = requests.post('http://httpbin.org/post',data = data)print(response.text)

响应

response属性

import requestsresponse = requests.get('http://www.baidu.com')print(type(response.status_code),response.status_code)print(type(response.cookies),response.cookies)print(type(response.headers),response.headers)print(type(response.url),response.url)print(type(response.history),response.history)

状态码的判断

import requestsresponse = requests.get('http://www.jianshu.com/hello.html')exit() if response.status_code == requests.codes.not_found:    print('404 NOTFOUND')

高级操作

文件上传

import requestsfiles ={"files":open("祈福.jpg",'rb')}response = requests.post('http://httpbin.org/post',files=files)print(response.text)

获取Cookie

import requestsresponse = requests.get('http://www.baidu.com')print(response.cookies)for key,value in response.cookies.items():    print(key + '=' + value)

会话维持

模拟登陆

import requests s = requests.Session()s.get('http://httpbin.org/cookies/set/number/123456789')response = s.get('http://httpbin.org/cookies')print(response.text)

证书验证

import requestsfrom requests.packages import urllib3urllib3.disable_warnings()#消除警告信息response = requests.get('https://www.12306.cn',verify=False)#使用verify参数可以避免网站证书不合法问题print(response.status_code)
import requestsresponse = requests.get('https://www.12306.cn',cert=('/path/server.crt','/path/key'))#手动添加本地证书print(response.status_code)

代理设置

import requestsproxies = {    "http":"http://127.0.0.1:51507",    "https":"https://127.0.0.1:51507"}response = requests.get("http://www.baidu.com",proxies=proxies)print(response.status_code)
200
import requestsproxies={        "http":"socks5://127.0.0.1:51507",    "https":"socks5://127.0.0.1:51507"}response = requests.get("https://www.taobao.com",proxies=proxies)print(response.status_code)

超时设置

import requestsresponse = requests.get("http://www.taobao.com",timeout=1)print(response.status_code)
200

认证设置

import requestsfrom requests.auth import HTTPBasicAuthresponse = requests.get('http://www.++++++++++.com'),auth=HTTPBasicAuth('13026156724','zhou3210'))print(response.text)
import requestsresponse = requests.get('http://www.++++++++++.com'),auth=('13026156724','zhou3210'))print(response.text)

异常处理

import requestsfrom requests.exceptions import HTTPError,ConnectionError,ReadTimeouttry:    response = requests.get('http://www.baidu.com',timeout=0.01)except HTTPError:    print('HTTPError')except ConnectionError:    print('ConnectionError')except ReadTimeout:    print('ReadTimeout')
原创粉丝点击