import requests
r = requests.get('https://www.baidu.com/')
print(type(r))
print(r.status_code)
print(type(r.text))
print(r.text)
print(r.cookies)
#发送其他请求
r = requests.post('http://httpbin.org/post')
r = requests.put('http://httpbin.org/put')
r = requests.delete('http://httpbin.org/delete')
r = requests.head('http://httpbin.org/get')
r = requests.options('http://httpbin.org/get')
#GET请求
#基本实例
r = requests.get('http://httpbin.org/get')
print(r.text)
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.31.0",
"X-Amzn-Trace-Id": "Root=1-67c2cfc9-197bdfa835c331751229e13d"
},
"origin": "154.40.60.12",
"url": "http://httpbin.org/get"
}
#添加参数
data = {
'name': 'germey',
'age': 22
}
r = requests.get("http://httpbin.org/get",params=data)
print(r.text)
{
"args": {
"age": "22",
"name": "germey"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.31.0",
"X-Amzn-Trace-Id": "Root=1-67c2d039-06fc3a62021e926051aec502"
},
"origin": "154.40.60.12",
"url": "http://httpbin.org/get?name=germey&age=22"
}
#网页的返回类型实际上是 str 类型,但是是 JSON 格式的。想直接解析返回结果,得到一个字典格式的话,可以直接调用 json 方法
import requests
r = requests.get("http://httpbin.org/get")
print(type(r.text))
print(r.json())
print(type(r.json()))
<class 'str'>
{'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate, br, zstd', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.31.0', 'X-Amzn-Trace-Id': 'Root=1-67c2d074-4c51486a68ce721962b679a3'}, 'origin': '154.40.60.12', 'url': 'http://httpbin.org/get'}
<class 'dict'>
#抓取网页 以"知乎"-"发现"页面为例,代码不适用
import re
#添加headers
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
r = requests.get("https://www.zhihu.com/explore", headers=headers)
#正则表达式来匹配出所有的问题内容
pattern = re.compile('question.*?question_link.*?>(.*?)</a>', re.S)
titles = re.findall(pattern,r.text)
print(titles)
#抓取二进制数据 以github站点图标为例
import requests
r = requests.get("https://github.com/favicon.ico")
#print(r.text)
#print(r.content)
#保存
with open('favicon.ico', 'wb') as f:
f.write(r.content)
#POST请求
data = {'name':'germey','age':'22' }
r = requests.post("http://httpbin.org/post",data = data)
print(r.text)
{
"args": {},
"data": "",
"files": {},
"form": {
"age": "22",
"name": "germey"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Content-Length": "18",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.31.0",
"X-Amzn-Trace-Id": "Root=1-67c2fd51-785a04c7193dac8e4f366d8a"
},
"json": null,
"origin": "154.40.60.12",
"url": "http://httpbin.org/post"
}
# requests 还提供了一个内置的状态码查询对象 requests.codes
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
r = requests.get('http://www.jianshu.com',headers=headers)
exit() if not r.status_code == requests.codes.ok else print('Request Successfully')
Request Successfully
#高级用法
#1.文件上传
files = {'file':open('favicon.ico','rb')}
r = requests.post('http://httpbin.org/post',files = files)
#print(r.text)
#获取 Cookies
r = requests.get('https://www.baidu.com')
print(r.cookies)
for key,value in r.cookies.items():
print(key+'='+value)
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
BDORZ=27315
#用cookie来维持登录状态
import requests
headers = {
'cookie':'_xsrf=lFnzNR23XYlIWOlqH7giKRfH0z0aeqlS; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1740900296',
'Host':'www.zhihu.com',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
}
r = requests.get('https://www.zhihu.com',headers=headers)
print(r.text)
#会话维持
s = requests.Session()
#请求测试网址,可以设置一个 cookie
s.get('http://httpbin.org/cookies/set/number/123456789')
#再次请求获取cookie
r = s.get('http://httpbin.org/cookies')
print(r.text)
{
"cookies": {
"number": "123456789"
}
}
#SSL 证书验证
#当发送 HTTP 请求的时候,它会检查 SSL 证书,我们可以使用 verify 参数控制是否检查此证书
'''response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)'''
#忽略警告
from requests.packages import urllib3
urllib3.disable_warnings()
response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)
#捕获警告到日志的方式忽略警告
import logging
logging.captureWarnings(True)
response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)
200
200
#代理设置
# 提取代理API接口,获取1个代理IP
api_url = "https://dps.kdlapi.com/api/getdps/?secret_id=******&signature=****&num=1&pt=1&sep=1"
# 获取API接口返回的代理IP
proxy_ip = requests.get(api_url).text
# 用户名密码认证(私密代理/独享代理)
username = "*******"
password = "********"
proxies = {
"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
}
print(proxies)
requests.get('https://www.taobao.com', proxies=proxies)
{'http': 'http://d4785269730:4bkl8hdn@218.95.37.135:41577/', 'https': 'http://d4785269730:4bkl8hdn@218.95.37.135:41577/'}
<Response [200]>
#超时设置
#将超时时间设置为 1 秒
r = requests.get('https://www.taobao.com', timeout=1)
print(r.status_code)
# timeout 是连接和读取这二者的 timeout 总和,可以分别传入
r = requests.get('https://www.taobao.com', timeout=(5, 30))
#永久等待,可以直接将 timeout 设置为 None
r = requests.get('https://www.taobao.com', timeout=None)
200
#身份认证
from requests.auth import HTTPBasicAuth
#r = requests.get('http://127.0.0.1:5244/',auth=HTTPBasicAuth('admin','Hyy823237'))
#默认使用 HTTPBasicAuth 这个类来认证
r = requests.get('http://127.0.0.1:5244/',auth=('admin','Hyy823237'))
print(r.status_code)
200
#Prepared Request
from requests import Request, Session
url = 'http://httpbin.org/post'
data = {'name': 'germey'}
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
s = Session()
req = Request('POST', url, data=data, headers=headers)
prepped = s.prepare_request(req)
r = s.send(prepped)
print(r.text)
{
"args": {},
"data": "",
"files": {},
"form": {
"name": "germey"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Content-Length": "11",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36",
"X-Amzn-Trace-Id": "Root=1-67c411e2-5e4075fc632d6ba24a26c1a3"
},
"json": null,
"origin": "154.40.60.12",
"url": "http://httpbin.org/post"
}