Python Request源码解读之 adapters.py
2023-12-30 02:29:56
requests.adapters 是 requests 库中的一个子模块,它包含了一些用于处理 HTTP 请求的适配器类。这些适配器类主要用于处理与 HTTP 请求相关的各种细节,例如超时、重试、连接池管理等。
以下是 requests.adapters 中一些主要的类:
HTTPAdapter:
这是所有适配器的基类。它提供了一些基本的适配器功能,如重试机制、连接池管理等。
HTTPConnectionPool:
这个类用于处理与 HTTP 连接相关的操作,例如建立连接、发送请求等。它使用连接池来管理连接,以提高性能和效率。
HTTPProxyAdapter:
这个类用于处理通过代理服务器发送 HTTP 请求的情况。它使用 HTTPConnectionPool 来管理代理连接。
PoolManager:
这个类用于管理连接池,它提供了一些方便的方法来获取和释放连接。
在使用 requests 库发送 HTTP 请求时,可以根据需要选择不同的适配器。例如,如果要使用自定义的超时设置或重试机制,可以创建一个自定义的适配器类,继承自 HTTPAdapter 并实现相应的功能。
源码解读
# -*- coding: utf-8 -*-
"""
requests.adapters
~~~~~~~~~~~~~~~~~
This module contains the transport adapters that Requests uses to define
and maintain connections.
"""
import os.path
import socket
from urllib3.poolmanager import PoolManager, proxy_from_url
from urllib3.response import HTTPResponse
from urllib3.util import parse_url
from urllib3.util import Timeout as TimeoutSauce
from urllib3.util.retry import Retry
from urllib3.exceptions import ClosedPoolError
from urllib3.exceptions import ConnectTimeoutError
from urllib3.exceptions import HTTPError as _HTTPError
from urllib3.exceptions import MaxRetryError
from urllib3.exceptions import NewConnectionError
from urllib3.exceptions import ProxyError as _ProxyError
from urllib3.exceptions import ProtocolError
from urllib3.exceptions import ReadTimeoutError
from urllib3.exceptions import SSLError as _SSLError
from urllib3.exceptions import ResponseError
from urllib3.exceptions import LocationValueError
from .models import Response
from .compat import urlparse, basestring
from .utils import (DEFAULT_CA_BUNDLE_PATH, extract_zipped_paths,
get_encoding_from_headers, prepend_scheme_if_needed,
get_auth_from_url, urldefragauth, select_proxy)
from .structures import CaseInsensitiveDict
from .cookies import extract_cookies_to_jar
from .exceptions import (ConnectionError, ConnectTimeout, ReadTimeout, SSLError,
ProxyError, RetryError, InvalidSchema, InvalidProxyURL,
InvalidURL)
from .auth import _basic_auth_str
try:
from urllib3.contrib.socks import SOCKSProxyManager
except ImportError:
def SOCKSProxyManager(*args, **kwargs):
raise InvalidSchema("Missing dependencies for SOCKS support.")
DEFAULT_POOLBLOCK = False
DEFAULT_POOLSIZE = 10
DEFAULT_RETRIES = 0
DEFAULT_POOL_TIMEOUT = None
#首先,导入了所需的模块和函数:
#os.path:用于处理文件路径。
#socket:用于低级别的网络通信。
#从urllib3库中,导入了一系列的类和异常。这个库主要用于HTTP客户端请求和重试。
#从requests库的models、compat、utils、structures、cookies和exceptions模块中,导入了一些特定的类和函数。
#定义了DEFAULT_POOLBLOCK, DEFAULT_POOLSIZE, DEFAULT_RETRIES 和 DEFAULT_POOL_TIMEOUT 这四个全局变量,这些变量定义了连接池的默认行为和重试策略。
#之后,定义了一个SOCKSProxyManager函数,它尝试从urllib3.contrib.socks模块导入SOCKS代理管理器,如果导入失败(可能是因为缺少必要的依赖),则抛出一个InvalidSchema异常。
#此段代码中的注释非常详尽,说明了各个部分的用途和工作原理。例如,从urllib3库导入的各种异常类被用来处理在网络请求过程中可能出现的各种问题,如连接错误、超时、SSL错误等。同样,requests库也定义了一些自己的异常类,如ConnectionError, ConnectTimeout, ReadTimeout, SSLError, ProxyError, RetryError, InvalidSchema, InvalidProxyURL, 和 InvalidURL等。
#总的来说,这段代码定义了一些网络连接和异常处理的基础设施,以便在发送HTTP请求时使用
class BaseAdapter(object):
"""The Base Transport Adapter"""
def __init__(self):
super(BaseAdapter, self).__init__()
def send(self, request, stream=False, timeout=None, verify=True,
cert=None, proxies=None):
"""Sends PreparedRequest object. Returns Response object.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple
:param verify: (optional) Either a boolean, in which case it controls whether we verify
the server's TLS certificate, or a string, in which case it must be a path
to a CA bundle to use
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
"""
raise NotImplementedError
def close(self):
"""Cleans up adapter specific items."""
raise NotImplementedError
#class BaseAdapter(object)::定义了一个名为BaseAdapter的类,它继承自object(在Python 2中,所有类都隐式地继承自object,而在Python 3中,不显式继承object则是新的风格)。
# """The Base Transport Adapter""":这是一个文档字符串,简要描述了这个类的作用。
#def __init__(self)::定义了类的初始化方法。
# super(BaseAdapter, self).__init__():调用父类的初始化方法。
#def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None)::定义了一个名为send的方法,这个方法用于发送一个请求并返回响应。
# """Sends PreparedRequest object. Returns Response object.:这是send方法的文档字符串,解释了其功能。
# :param request: The :class:PreparedRequest being sent.:文档字符串说明第一个参数request是一个待发送的PreparedRequest对象。
# :param stream: (optional) Whether to stream the request content.:第二个参数stream是一个可选参数,表示是否要流式传输请求内容。
# :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:(connect timeout, read timeout) tuple.:第三个参数timeout是一个可选参数,表示等待服务器发送数据的超时时间,可以是一个浮点数或者一个包含连接超时和读取超时的元组。
# :type timeout: float or tuple:文档字符串说明timeout的类型可以是浮点数或者元组。
# :param verify: (optional) Either a boolean, in which case it controls whether we verify the server's TLS certificate, or a string, in which case it must be a path to a CA bundle to use.:第四个参数verify是一个可选参数,它可以是一个布尔值来控制是否验证服务器的TLS证书,或者是一个字符串,表示CA证书的路径。
# :param cert: (optional) Any user-provided SSL certificate to be trusted.:第五个参数cert是一个可选参数,表示用户提供的SSL证书。
# :param proxies: (optional) The proxies dictionary to apply to the request.:第六个参数proxies是一个可选参数,表示要应用到请求的代理字典。
# """Raises: NotImplementedError:文档字符串说明这个方法会抛出一个NotImplementedError异常。
#def close(self)::定义了一个名为close的方法。
# """Cleans up adapter specific items.""":这是close方法的文档字符串,解释了其功能。
# Raises: NotImplementedError:文档字符串说明这个方法会抛出一个NotImplementedError异常。
#总的来说,这个基类提供了一个通用的接口用于发送HTTP请求和清理资源,具体的实现会在子类中进行。子类需要重写这些方法以提供具体的功能。
class HTTPAdapter(BaseAdapter):
"""The built-in HTTP Adapter for urllib3.
Provides a general-case interface for Requests sessions to contact HTTP and
HTTPS urls by implementing the Transport Adapter interface. This class will
usually be created by the :class:`Session <Session>` class under the
covers.
:param pool_connections: The number of urllib3 connection pools to cache.
:param pool_maxsize: The maximum number of connections to save in the pool.
:param max_retries: The maximum number of retries each connection
should attempt. Note, this applies only to failed DNS lookups, socket
connections and connection timeouts, never to requests where data has
made it to the server. By default, Requests does not retry failed
connections. If you need granular control over the conditions under
which we retry a request, import urllib3's ``Retry`` class and pass
that instead.
:param pool_block: Whether the connection pool should block for connections.
Usage::
>>> import requests
>>> s = requests.Session()
>>> a = requests.adapters.HTTPAdapter(max_retries=3)
>>> s.mount('http://', a)
"""
__attrs__ = ['max_retries', 'config', '_pool_connections', '_pool_maxsize',
'_pool_block']
def __init__(self, pool_connections=DEFAULT_POOLSIZE,
pool_maxsize=DEFAULT_POOLSIZE, max_retries=DEFAULT_RETRIES,
pool_block=DEFAULT_POOLBLOCK):
if max_retries == DEFAULT_RETRIES:
self.max_retries = Retry(0, read=False)
else:
self.max_retries = Retry.from_int(max_retries)
self.config = {}
self.proxy_manager = {}
super(HTTPAdapter, self).__init__()
self._pool_connections = pool_connections
self._pool_maxsize = pool_maxsize
self._pool_block = pool_block
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
def __getstate__(self):
return {attr: getattr(self, attr, None) for attr in self.__attrs__}
def __setstate__(self, state):
# Can't handle by adding 'proxy_manager' to self.__attrs__ because
# self.poolmanager uses a lambda function, which isn't pickleable.
self.proxy_manager = {}
self.config = {}
for attr, value in state.items():
setattr(self, attr, value)
self.init_poolmanager(self._pool_connections, self._pool_maxsize,
block=self._pool_block)
def init_poolmanager(self, connections, maxsize, block=DEFAULT_POOLBLOCK, **pool_kwargs):
"""Initializes a urllib3 PoolManager.
This method should not be called from user code, and is only
exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param connections: The number of urllib3 connection pools to cache.
:param maxsize: The maximum number of connections to save in the pool.
:param block: Block when no free connections are available.
:param pool_kwargs: Extra keyword arguments used to initialize the Pool Manager.
"""
# save these values for pickling
self._pool_connections = connections
self._pool_maxsize = maxsize
self._pool_block = block
self.poolmanager = PoolManager(num_pools=connections, maxsize=maxsize,
block=block, strict=True, **pool_kwargs)
#这段代码定义了一个名为HTTPAdapter的类,该类继承自BaseAdapter。这个类是用于处理HTTP和HTTPS请求的传输适配器,基于urllib3库。它提供了一个通用的接口,使Requests会话能够通过实现Transport Adapter接口来联系HTTP和HTTPS URL。
#以下是关于HTTPAdapter类中各个部分的详细解释:
#__init__方法:这是类的构造函数,用于初始化对象。它接收几个参数,包括连接池的大小、最大连接数、最大重试次数和是否阻塞等。这些参数可以用来配置连接池的行为。
#max_retries:这个属性表示每个连接应该尝试的最大重试次数。如果重试次数超过了最大值,连接将失败。
#config:这个属性用于存储配置信息,这里并没有具体的实现代码,所以它的具体作用不清楚。
#_pool_connections和_pool_maxsize:这两个属性分别表示连接池中的连接数和最大连接数。
#_pool_block:这个属性表示连接池是否应该阻塞。如果设置为True,当连接池中没有可用连接时,新的连接请求将会阻塞等待。
#proxy_manager:这个属性用于存储代理信息,这里并没有具体的实现代码,所以它的具体作用不清楚。
#init_poolmanager:这个方法用于初始化连接池管理器,它接收几个参数来配置连接池的行为。
#这个类通常由Requests的Session类创建和使用,可以通过将该类的一个实例挂载到特定的URL上,来配置对特定URL的请求行为。例如,可以设置最大重试次数、连接池大小等参数来控制请求的行为。
def proxy_manager_for(self, proxy, **proxy_kwargs):
"""Return urllib3 ProxyManager for the given proxy.
This method should not be called from user code, and is only
exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param proxy: The proxy to return a urllib3 ProxyManager for.
:param proxy_kwargs: Extra keyword arguments used to configure the Proxy Manager.
:returns: ProxyManager
:rtype: urllib3.ProxyManager
"""
if proxy in self.proxy_manager:
manager = self.proxy_manager[proxy]
elif proxy.lower().startswith('socks'):
username, password = get_auth_from_url(proxy)
manager = self.proxy_manager[proxy] = SOCKSProxyManager(
proxy,
username=username,
password=password,
num_pools=self._pool_connections,
maxsize=self._pool_maxsize,
block=self._pool_block,
**proxy_kwargs
)
else:
proxy_headers = self.proxy_headers(proxy)
manager = self.proxy_manager[proxy] = proxy_from_url(
proxy,
proxy_headers=proxy_headers,
num_pools=self._pool_connections,
maxsize=self._pool_maxsize,
block=self._pool_block,
**proxy_kwargs)
return manager
def cert_verify(self, conn, url, verify, cert):
"""Verify a SSL certificate. This method should not be called from user
code, and is only exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param conn: The urllib3 connection object associated with the cert.
:param url: The requested URL.
:param verify: Either a boolean, in which case it controls whether we verify
the server's TLS certificate, or a string, in which case it must be a path
to a CA bundle to use
:param cert: The SSL certificate to verify.
"""
if url.lower().startswith('https') and verify:
cert_loc = None
# Allow self-specified cert location.
if verify is not True:
cert_loc = verify
if not cert_loc:
cert_loc = extract_zipped_paths(DEFAULT_CA_BUNDLE_PATH)
if not cert_loc or not os.path.exists(cert_loc):
raise IOError("Could not find a suitable TLS CA certificate bundle, "
"invalid path: {}".format(cert_loc))
conn.cert_reqs = 'CERT_REQUIRED'
if not os.path.isdir(cert_loc):
conn.ca_certs = cert_loc
else:
conn.ca_cert_dir = cert_loc
else:
conn.cert_reqs = 'CERT_NONE'
conn.ca_certs = None
conn.ca_cert_dir = None
if cert:
if not isinstance(cert, basestring):
conn.cert_file = cert[0]
conn.key_file = cert[1]
else:
conn.cert_file = cert
conn.key_file = None
if conn.cert_file and not os.path.exists(conn.cert_file):
raise IOError("Could not find the TLS certificate file, "
"invalid path: {}".format(conn.cert_file))
if conn.key_file and not os.path.exists(conn.key_file):
raise IOError("Could not find the TLS key file, "
"invalid path: {}".format(conn.key_file))
#这段代码定义了两个方法:proxy_manager_for 和 cert_verify,它们都是用于处理网络请求的HTTP适配器的一部分。
#proxy_manager_for 方法:
#这个方法用于根据给定的代理配置返回一个 urllib3.ProxyManager 对象。
#首先,它会检查是否已经为给定的代理配置创建了 ProxyManager 对象,如果已经存在,就直接返回。
#如果代理以 "socks" 开头,它会获取代理URL中的用户名和密码,并使用这些信息创建一个 SOCKSProxyManager 对象。
#如果代理不是以 "socks" 开头,它会根据代理URL和其他参数创建一个普通的 ProxyManager 对象。
#cert_verify 方法:
#这个方法用于验证SSL证书。它不应该被用户代码直接调用,而是仅在子类化 HTTPAdapter 类时使用。
#这个方法接受连接对象、请求的URL、证书验证配置和SSL证书等信息作为参数。
#该方法的具体实现(如如何验证证书)似乎被截断了,因此无法提供更多信息。
#总的来说,这段代码是用于处理网络请求的一部分,特别是与代理和SSL证书验证相关的部分。
def build_response(self, req, resp):
"""Builds a :class:`Response <requests.Response>` object from a urllib3
response. This should not be called from user code, and is only exposed
for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`
:param req: The :class:`PreparedRequest <PreparedRequest>` used to generate the response.
:param resp: The urllib3 response object.
:rtype: requests.Response
"""
response = Response()
# Fallback to None if there's no status_code, for whatever reason.
response.status_code = getattr(resp, 'status', None)
# Make headers case-insensitive.
response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {}))
# Set encoding.
response.encoding = get_encoding_from_headers(response.headers)
response.raw = resp
response.reason = response.raw.reason
if isinstance(req.url, bytes):
response.url = req.url.decode('utf-8')
else:
response.url = req.url
# Add new cookies from the server.
extract_cookies_to_jar(response.cookies, req, resp)
# Give the Response some context.
response.request = req
response.connection = self
return response
#这段代码定义了一个名为 build_response 的方法,该方法用于从urllib3的响应对象中构建一个requests.Response对象。这个方法主要用于内部处理,不应该被用户直接调用,而是在创建自定义的HTTPAdapter子类时使用。下面是对代码中每一部分的详细解释:
#方法签名:
#def build_response(self, req, resp):
#这定义了一个名为build_response的方法,它接受两个参数:req(一个PreparedRequest对象,用于生成响应)和resp(一个urllib3的响应对象)。
#创建Response对象:
#response = Response(): 这行代码创建一个新的Response对象。我们没有上下文来了解Response类的具体实现,但从命名和后续的代码中可以推测它代表一个HTTP响应。
#设置状态码:
#response.status_code = getattr(resp, 'status', None): 这行代码尝试从urllib3的响应对象中获取状态码,并将其赋值给Response对象的status_code属性。如果resp没有状态码属性,它将设置为None。
#设置Headers:
#response.headers = CaseInsensitiveDict(getattr(resp, 'headers', {})): 这行代码将响应的头部信息从大写敏感转换为不区分大小写。它使用一个名为CaseInsensitiveDict的字典,这个字典可以存储不区分大小写的键。
#设置Encoding:
#response.encoding = get_encoding_from_headers(response.headers): 这行代码从响应头部获取字符编码。它调用了一个名为get_encoding_from_headers的函数,该函数应该根据响应头部返回正确的字符编码。
#设置Raw对象和Reason:
#response.raw = resp: 将原始的urllib3响应对象赋值给Response对象的raw属性。
#response.reason = response.raw.reason: 从原始的响应对象中获取并设置HTTP状态消息。
#设置URL:
#如果请求的URL是字节类型(例如,从HTTPS请求中获取),则将其解码为字符串:
#python`if isinstance(req.url, bytes):
# response.url = req.url.decode('utf-8')
#else:
# response.url = req.url`
#添加新的Cookies:
#extract_cookies_to_jar(response.cookies, req, resp): 这行代码调用了一个名为extract_cookies_to_jar的函数,该函数从服务器响应中提取新的cookies,并将其添加到响应的cookie jar中。
#设置Request和Connection上下文:
#response.request = req: 将原始请求对象赋值给响应对象的request属性。
#response.connection = self: 将当前HTTP适配器对象赋值给响应对象的connection属性。这为处理程序提供了一个引用,以便它可以访问用于发起请求的HTTP适配器。
#返回Response对象:
#return response: 最后,方法返回构建好的响应对象。
#总体来说,这个方法的目的是将底层的urllib3响应对象转换为一个更高级别的、用户友好的requests.Response对象,其中包含处理HTTP响应所需的所有属性和方法。
def get_connection(self, url, proxies=None):
"""Returns a urllib3 connection for the given URL. This should not be
called from user code, and is only exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param url: The URL to connect to.
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
:rtype: urllib3.ConnectionPool
"""
proxy = select_proxy(url, proxies)
if proxy:
proxy = prepend_scheme_if_needed(proxy, 'http')
proxy_url = parse_url(proxy)
if not proxy_url.host:
raise InvalidProxyURL("Please check proxy URL. It is malformed"
" and could be missing the host.")
proxy_manager = self.proxy_manager_for(proxy)
conn = proxy_manager.connection_from_url(url)
else:
# Only scheme should be lower case
parsed = urlparse(url)
url = parsed.geturl()
conn = self.poolmanager.connection_from_url(url)
return conn
def close(self):
"""Disposes of any internal state.
Currently, this closes the PoolManager and any active ProxyManager,
which closes any pooled connections.
"""
self.poolmanager.clear()
for proxy in self.proxy_manager.values():
proxy.clear()
#这段代码定义了一个类的方法,该类似乎是一个用于处理HTTP请求的适配器,可能是基于requests库的HTTPAdapter类。以下是代码的详细解释:
#get_connection方法:
#def get_connection(self, url, proxies=None):
#这定义了一个名为get_connection的方法,它接受一个URL和一个可选的代理字典作为参数。此方法返回一个urllib3.ConnectionPool对象,该对象表示与给定URL的连接。
#proxy = select_proxy(url, proxies):
#这行代码调用一个名为select_proxy的函数,该函数从给定的URL和代理字典中选择一个代理。
#如果代理被选择:
#代理的URL前缀如果需要的话会添加'http'。
#解析代理URL并检查其主机部分是否存在。如果不存在,将引发一个异常。
#使用代理管理器为选择的代理创建一个连接。
#如果没有选择代理:
#解析提供的URL并获取其标准形式。
#使用连接池管理器从URL创建连接。
#close方法:
#def close(self):
#这定义了一个名为close的方法,用于清理内部状态。
#self.poolmanager.clear():
#这行代码清除连接池管理器中的所有连接,释放资源。
#对于每个活动的代理管理器:
#清除代理管理器中的所有连接。
#总体来说,这段代码定义了一个HTTP适配器,它处理与给定URL的连接,可以选择使用代理,并管理这些连接的生命周期。当不再需要这些连接时,可以通过调用close方法来释放资源。
def request_url(self, request, proxies):
"""Obtain the url to use when making the final request.
If the message is being sent through a HTTP proxy, the full URL has to
be used. Otherwise, we should only use the path portion of the URL.
This should not be called from user code, and is only exposed for use
when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param proxies: A dictionary of schemes or schemes and hosts to proxy URLs.
:rtype: str
"""
proxy = select_proxy(request.url, proxies)
scheme = urlparse(request.url).scheme
is_proxied_http_request = (proxy and scheme != 'https')
using_socks_proxy = False
if proxy:
proxy_scheme = urlparse(proxy).scheme.lower()
using_socks_proxy = proxy_scheme.startswith('socks')
url = request.path_url
if is_proxied_http_request and not using_socks_proxy:
url = urldefragauth(request.url)
return url
def add_headers(self, request, **kwargs):
"""Add any headers needed by the connection. As of v2.0 this does
nothing by default, but is left for overriding by users that subclass
the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
This should not be called from user code, and is only exposed for use
when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param request: The :class:`PreparedRequest <PreparedRequest>` to add headers to.
:param kwargs: The keyword arguments from the call to send().
"""
pass
def proxy_headers(self, proxy):
"""Returns a dictionary of the headers to add to any request sent
through a proxy. This works with urllib3 magic to ensure that they are
correctly sent to the proxy, rather than in a tunnelled request if
CONNECT is being used.
This should not be called from user code, and is only exposed for use
when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param proxy: The url of the proxy being used for this request.
:rtype: dict
"""
headers = {}
username, password = get_auth_from_url(proxy)
if username:
headers['Proxy-Authorization'] = _basic_auth_str(username,
password)
return headers
#request_url方法:
#这个方法用于获取用于最终请求的URL。如果消息通过HTTP代理发送,则必须使用完整的URL。否则,只应使用URL的路径部分。这个方法不应从用户代码中调用,而仅在子类化requests.adapters.HTTPAdapter类时公开使用。它接受一个PreparedRequest对象和一个代理字典作为参数,并返回一个字符串类型的URL。
#add_headers方法:
#这个方法用于添加任何由连接所需的头部。在v2.0版本中,默认情况下不执行任何操作,但留给了子类化HTTPAdapter类的用户进行覆盖。这个方法不应从用户代码中调用,而仅在子类化requests.adapters.HTTPAdapter类时公开使用。它接受一个PreparedRequest对象和一些关键字参数,但没有实际的操作。
#proxy_headers方法:
#这个方法返回一个字典,其中包含要添加到通过代理发送的任何请求的头部。这个方法使用了urllib3的魔法来确保它们被正确地发送到代理,而不是在请求的主体中发送。
def send(self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None):
"""Sends PreparedRequest object. Returns Response object.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple or urllib3 Timeout object
:param verify: (optional) Either a boolean, in which case it controls whether
we verify the server's TLS certificate, or a string, in which case it
must be a path to a CA bundle to use
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
:rtype: requests.Response
"""
try:
conn = self.get_connection(request.url, proxies)
except LocationValueError as e:
raise InvalidURL(e, request=request)
self.cert_verify(conn, request.url, verify, cert)
url = self.request_url(request, proxies)
self.add_headers(request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies)
chunked = not (request.body is None or 'Content-Length' in request.headers)
if isinstance(timeout, tuple):
try:
connect, read = timeout
timeout = TimeoutSauce(connect=connect, read=read)
except ValueError as e:
# this may raise a string formatting error.
err = ("Invalid timeout {}. Pass a (connect, read) "
"timeout tuple, or a single float to set "
"both timeouts to the same value".format(timeout))
raise ValueError(err)
elif isinstance(timeout, TimeoutSauce):
pass
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
if not chunked:
resp = conn.urlopen(
method=request.method,
url=url,
body=request.body,
headers=request.headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
timeout=timeout
)
# Send the request.
else:
if hasattr(conn, 'proxy_pool'):
conn = conn.proxy_pool
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
try:
low_conn.putrequest(request.method,
url,
skip_accept_encoding=True)
for header, value in request.headers.items():
low_conn.putheader(header, value)
low_conn.endheaders()
for i in request.body:
low_conn.send(hex(len(i))[2:].encode('utf-8'))
low_conn.send(b'\r\n')
low_conn.send(i)
low_conn.send(b'\r\n')
low_conn.send(b'0\r\n\r\n')
# Receive the response from the server
try:
# For Python 2.7, use buffering of HTTP responses
r = low_conn.getresponse(buffering=True)
except TypeError:
# For compatibility with Python 3.3+
r = low_conn.getresponse()
resp = HTTPResponse.from_httplib(
r,
pool=conn,
connection=low_conn,
preload_content=False,
decode_content=False
)
except:
# If we hit any problems here, clean up the connection.
# Then, reraise so that we can handle the actual exception.
low_conn.close()
raise
except (ProtocolError, socket.error) as err:
raise ConnectionError(err, request=request)
except MaxRetryError as e:
if isinstance(e.reason, ConnectTimeoutError):
# TODO: Remove this in 3.0.0: see #2811
if not isinstance(e.reason, NewConnectionError):
raise ConnectTimeout(e, request=request)
if isinstance(e.reason, ResponseError):
raise RetryError(e, request=request)
if isinstance(e.reason, _ProxyError):
raise ProxyError(e, request=request)
if isinstance(e.reason, _SSLError):
# This branch is for urllib3 v1.22 and later.
raise SSLError(e, request=request)
raise ConnectionError(e, request=request)
except ClosedPoolError as e:
raise ConnectionError(e, request=request)
except _ProxyError as e:
raise ProxyError(e)
except (_SSLError, _HTTPError) as e:
if isinstance(e, _SSLError):
# This branch is for urllib3 versions earlier than v1.22
raise SSLError(e, request=request)
elif isinstance(e, ReadTimeoutError):
raise ReadTimeout(e, request=request)
else:
raise
return self.build_response(request, resp)
#获取与给定URL和代理相关的连接。如果无法获取连接(例如,由于无效的URL),则抛出一个InvalidURL异常。
#验证SSL证书。
#获取请求的URL,并添加请求头部。
#检查是否需要分块传输。如果请求体不为空或者没有设置'Content-Length'头部,则启用分块传输。
#处理超时参数。如果超时参数是一个元组,则将其转换为TimeoutSauce对象。如果超时参数已经是一个TimeoutSauce对象,则不做任何处理。否则,将超时参数转换为TimeoutSauce对象。
#执行实际的网络请求。如果不需要分块传输,则使用连接对象的urlopen方法发送请求。否则,获取一个低级别的连接对象并发送请求。
#如果是代理使用代理转发
#使用try catch捕获各类错误信息
文章来源:https://blog.csdn.net/qq_34399969/article/details/135293055
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!