用 Python 请求¶
urllib3 是 Python 内置的 HTTP 请求库
Requests 是由著名开发者 Kenneth Reitz 基于 urllib3 封装的同步请求库,更加易用
AIOHTTP 是基于 ASyncio 实现的异步请求库,适合于纯异步请求的项目,性能更优
HTTPX 是一个全功能的 HTTP 请求客户端,支持同步和异步请求,并且可支持 HTTP/2
除此之外,还有 http.client, urllib3, and httplib2 等请求库
内置¶
快速启动一个服务:python -m http.server 80
Requests¶
python -m pip install requests
requests.request¶
https://requests.readthedocs.io/en/latest/api/#requests.request
import requests
# 兼容各种请求方式
r = requests.request(method, url, **kwars)
"""
:method: GET/POST/PUT/DELETE/PATCH/OPTIONS/HEAD
:url 请求url
:**kwarts
params=_dict # url参数,即查询字符串
data=_dict # 表单
json=_dict # 表单,自动编码
headers=_dict # 请求头
cookies=_dict # Dict or CookieJar object
files={"f1": open("xxx.xls", "rb"), "f2": open(...)} # 文件
proxies={"http": "127.0.0.1:8888", "https": "127.0.0.1:8888"} # 设置代理
verify="/path/to/certifile"
timeout=0.01 # 设置连接超时时间,建议设为比3的倍数略大的一个数值,因为TCP数据包重传窗口的默认大小是3
allow_redirects=True # 自动重定向,默认True
"""
requests.method¶
import requests
payload = {'key1': 'value1', 'key2': 'value2'}
# 请求方式非常简单易懂
r = requests.get('https://httpbin.org/get', params=payload)
r = requests.head('https://httpbin.org/get')
r = requests.options('https://httpbin.org/get')
r = requests.post('https://httpbin.org/post', data=payload)
r = requests.put('https://httpbin.org/put', data=payload)
r = requests.patch('https://httpbin.org/patch', data=payload)
r = requests.delete('https://httpbin.org/delete')
print(r) # <Response [200]>
requests.Response¶
- 头信息
# 查看路由(重定向)记录
r.history # [<Response [301]>]
r.url # 'https://httpbin.org/get?key1=value1&key2=value2'
r.encoding # 'utf-8'
r.request.headers # 请求头
r.headers # 响应头
"""
{'Date': 'Wed, 24 Aug 2022 03:50:40 GMT', 'Content-Type': 'application/json', 'Content-Length': '379', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}
# 获取头字段值,不区分大小写
r.headers.get('content-type') # 'application/json'
"""
- 响应码
https://requests.readthedocs.io/en/latest/api/#api-cookies
r.status_code # 200
# 使用内置状态码对象做判断
r.status_code == requests.codes.ok
# 检查请求是否成功
r.raise_for_status()
"""
成功返回:None
不成功则抛出异常:requests.exceptions.HTTPError
"""
- 响应内容
# 字节码形式
r.content
"""
b'{\n "args": {\n "key1": "value1", \n "key2": "value2"\n }, \n "headers": {\n "Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n "Host": "httpbin.org", \n "User-Agent": "python-requests/2.28.1", \n "X-Amzn-Trace-Id": "Root=1-6305a010-32d12aaa313962317e29ede5"\n }, \n "origin": "185.212.56.154", \n "url": "https://httpbin.org/get?key1=value1&key2=value2"\n}\n'
# 图片的字节码可以直接用于构建图像
from PIL import Image
from io import BytesIO
i = Image.open(BytesIO(r.content))
"""
# 字符串形式
r.text
"""
'{\n "args": {\n "key1": "value1", \n "key2": "value2"\n }, \n "headers": {\n "Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n "Host": "httpbin.org", \n "User-Agent": "python-requests/2.28.1", \n "X-Amzn-Trace-Id": "Root=1-6305a010-32d12aaa313962317e29ede5"\n }, \n "origin": "185.212.56.154", \n "url": "https://httpbin.org/get?key1=value1&key2=value2"\n}\n'
"""
# json格式
r.json()
"""
# get
# params=payload
{'args': {'key1': 'value1', 'key2': 'value2'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.28.1', 'X-Amzn-Trace-Id': 'Root=1-6305a010-32d12aaa313962317e29ede5'}, 'origin': '185.212.56.154', 'url': 'https://httpbin.org/get?key1=value1&key2=value2'}
# post
# data=payload
{'args': {}, 'data': '', 'files': {}, 'form': {'k1': 'v1', 'k2': 'v2'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Content-Length': '11', 'Content-Type': 'application/x-www-form-urlencoded', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.28.1', 'X-Amzn-Trace-Id': 'Root=1-63086975-1228aeea3fa968ba500d1c9d'}, 'json': None, 'origin': '123.118.73.22', 'url': 'https://httpbin.org/post'}
# post
# json=payload
{'args': {}, 'data': '{"k1": "v1", "k2": "v2", "k3": null}', 'files': {}, 'form': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Content-Length': '36', 'Content-Type': 'application/json', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.28.1', 'X-Amzn-Trace-Id': 'Root=1-630868d2-3c641ceb41c7147f5a160c5f'}, 'json': {'k1': 'v1', 'k2': 'v2', 'k3': None}, 'origin': '123.118.73.22', 'url': 'https://httpbin.org/post'}
"""
# 获取原始响应内容,请求时需要设置stream=True
r.raw
"""
<urllib3.response.HTTPResponse object at 0x105cd6dc0>
"""
r.raw.read(10) # b'{\n "args"'
# 通常保存到文件
with open(filename, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
- 证书
verify = True # 校验证书,默认
verify = False # 忽略证书验证,但会输出警告提示
"""
# 发送请求前加上这行代码,可关闭警告提示
requests.packages.urllib3.disable_warnings()
"""
verify="/path/to/certifile" # 传入证书
cert=('/path/client.cert', '/path/client.key') # 传入客户端证书
Cookies¶
- Jar格式cookie
# 从返回中获取
cookies_jar = res.cookies # <RequestsCookieJar[]>
# 手动拼凑
cookies_jar = requests.cookies.RequestsCookieJar()
cookies_jar.set('tasty_cookie', 'yum', domain='httpbin.org', path='/cookies')
cookies_jar.set('gross_cookie', 'blech', domain='httpbin.org', path='/elsewhere')
# dict转jar
cookies_jar = requests.cookies.RequestsCookieJar()
for k,v in cookies_dict.items():
cookies_jar.set(k, v)
- Dict格式cookie
cookies_dict = {k1:v1, k2:v2}
cookies_dict = dict(k1="v1",k2="v2")
# jar转dict,方便保存
cookies_dict = requests.utils.dict_from_cookiejar(cookies_jar)
requests.Session
¶
import requests
# Session对象拥有requests的一切方法
with requests.Session() as s:
# 请求login接口,使用账号密码登陆,之后的session请求将默认携带cookies
res = s.post('https://xxx.com/login', data={"username":xxx, "password":xxx})
# 如果不请求login接口,而是直接访问需要登录的接口,可以手动添加cookies到session中
"""方式1"""
s.cookies.update(cookies_dict/jar) # Updates this jar with cookies from another CookieJar or dict-like
"""方式2"""
s.cookies = requests.cookies.cookiejar_from_dict(cookies_dict, cookiejar=None, overwrite=True)
"""
# 方式2和1其实调用的都是requests.cookies.set_cookie(cookie)
# 以下两种方式其实调用的都是方式2
s.cookies = requests.utils.cookiejar_from_dict(cookies_dict, cookiejar=None, overwrite=True)
requests.utils.add_dict_to_cookiejar(s.cookies, my_cookies_dict)
"""
# 清空cookies
s.cookies.clear()
AIOHTTP¶
pip install aiohttp
import aiohttp
import asyncio
url = "https://example.com"
async def main():
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
# async with session.post(url, data={"key": "value"}) as response:
print(await response.text())
asyncio.run(main())
HTTPX¶
pip install httpx
pip install httpx[http2] # 支持HTTP2的组件
# 命令行客户端,类似curl
pip install 'httpx[cli]'
httpx --help
同步¶
与 requests 几乎相同
import httpx
r1 = httpx.get("https://www.example.org")
r2 = httpx.post("https://httpbin.org/post", data={"name": "7c", "age": 30})
r # <Response [200 OK]>
r.status_code # 200
r.headers['content-type'] # 'text/html; charset=UTF-8'
r.text # '<!doctype html>\n<html>\n<head>\n<title>Example Domain</title>...'
r.json() # 自动解码
- HTTP2
client = httpx.Client(http2=True)
r = client.get()
异步¶
import httpx
import asyncio
async def main():
async with httpx.AsyncClient() as client:
response = await client.get("https://example.com")
print(response.text)
asyncio.run(main())
最后更新:
2023-08-25