PageRenderTime 27ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/src/urllib3/poolmanager.py

https://github.com/shazow/urllib3
Python | 623 lines | 536 code | 24 blank | 63 comment | 9 complexity | 71389431e7bbcdd5b0e6dfd2a57ee098 MD5 | raw file
  1. import functools
  2. import logging
  3. import warnings
  4. from types import TracebackType
  5. from typing import (
  6. TYPE_CHECKING,
  7. Any,
  8. Dict,
  9. FrozenSet,
  10. Mapping,
  11. NamedTuple,
  12. Optional,
  13. Tuple,
  14. Type,
  15. TypeVar,
  16. Union,
  17. )
  18. from urllib.parse import urljoin
  19. from ._collections import RecentlyUsedContainer
  20. from ._request_methods import RequestMethods
  21. from .connection import ProxyConfig
  22. from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
  23. from .exceptions import (
  24. LocationValueError,
  25. MaxRetryError,
  26. ProxySchemeUnknown,
  27. URLSchemeUnknown,
  28. )
  29. from .response import BaseHTTPResponse
  30. from .util.connection import _TYPE_SOCKET_OPTIONS
  31. from .util.proxy import connection_requires_http_tunnel
  32. from .util.retry import Retry
  33. from .util.timeout import Timeout
  34. from .util.url import Url, parse_url
  35. if TYPE_CHECKING:
  36. import ssl
  37. from typing_extensions import Literal
  38. __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
  39. log = logging.getLogger(__name__)
  40. SSL_KEYWORDS = (
  41. "key_file",
  42. "cert_file",
  43. "cert_reqs",
  44. "ca_certs",
  45. "ssl_version",
  46. "ssl_minimum_version",
  47. "ssl_maximum_version",
  48. "ca_cert_dir",
  49. "ssl_context",
  50. "key_password",
  51. "server_hostname",
  52. )
  53. # Default value for `blocksize` - a new parameter introduced to
  54. # http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
  55. _DEFAULT_BLOCKSIZE = 16384
  56. _SelfT = TypeVar("_SelfT")
  57. class PoolKey(NamedTuple):
  58. """
  59. All known keyword arguments that could be provided to the pool manager, its
  60. pools, or the underlying connections.
  61. All custom key schemes should include the fields in this key at a minimum.
  62. """
  63. key_scheme: str
  64. key_host: str
  65. key_port: Optional[int]
  66. key_timeout: Optional[Union[Timeout, float, int]]
  67. key_retries: Optional[Union[Retry, int]]
  68. key_block: Optional[bool]
  69. key_source_address: Optional[Tuple[str, int]]
  70. key_key_file: Optional[str]
  71. key_key_password: Optional[str]
  72. key_cert_file: Optional[str]
  73. key_cert_reqs: Optional[str]
  74. key_ca_certs: Optional[str]
  75. key_ssl_version: Optional[Union[int, str]]
  76. key_ssl_minimum_version: Optional["ssl.TLSVersion"]
  77. key_ssl_maximum_version: Optional["ssl.TLSVersion"]
  78. key_ca_cert_dir: Optional[str]
  79. key_ssl_context: Optional["ssl.SSLContext"]
  80. key_maxsize: Optional[int]
  81. key_headers: Optional[FrozenSet[Tuple[str, str]]]
  82. key__proxy: Optional[Url]
  83. key__proxy_headers: Optional[FrozenSet[Tuple[str, str]]]
  84. key__proxy_config: Optional[ProxyConfig]
  85. key_socket_options: Optional[_TYPE_SOCKET_OPTIONS]
  86. key__socks_options: Optional[FrozenSet[Tuple[str, str]]]
  87. key_assert_hostname: Optional[Union[bool, str]]
  88. key_assert_fingerprint: Optional[str]
  89. key_server_hostname: Optional[str]
  90. key_blocksize: Optional[int]
  91. def _default_key_normalizer(
  92. key_class: Type[PoolKey], request_context: Dict[str, Any]
  93. ) -> PoolKey:
  94. """
  95. Create a pool key out of a request context dictionary.
  96. According to RFC 3986, both the scheme and host are case-insensitive.
  97. Therefore, this function normalizes both before constructing the pool
  98. key for an HTTPS request. If you wish to change this behaviour, provide
  99. alternate callables to ``key_fn_by_scheme``.
  100. :param key_class:
  101. The class to use when constructing the key. This should be a namedtuple
  102. with the ``scheme`` and ``host`` keys at a minimum.
  103. :type key_class: namedtuple
  104. :param request_context:
  105. A dictionary-like object that contain the context for a request.
  106. :type request_context: dict
  107. :return: A namedtuple that can be used as a connection pool key.
  108. :rtype: PoolKey
  109. """
  110. # Since we mutate the dictionary, make a copy first
  111. context = request_context.copy()
  112. context["scheme"] = context["scheme"].lower()
  113. context["host"] = context["host"].lower()
  114. # These are both dictionaries and need to be transformed into frozensets
  115. for key in ("headers", "_proxy_headers", "_socks_options"):
  116. if key in context and context[key] is not None:
  117. context[key] = frozenset(context[key].items())
  118. # The socket_options key may be a list and needs to be transformed into a
  119. # tuple.
  120. socket_opts = context.get("socket_options")
  121. if socket_opts is not None:
  122. context["socket_options"] = tuple(socket_opts)
  123. # Map the kwargs to the names in the namedtuple - this is necessary since
  124. # namedtuples can't have fields starting with '_'.
  125. for key in list(context.keys()):
  126. context["key_" + key] = context.pop(key)
  127. # Default to ``None`` for keys missing from the context
  128. for field in key_class._fields:
  129. if field not in context:
  130. context[field] = None
  131. # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
  132. if context.get("key_blocksize") is None:
  133. context["key_blocksize"] = _DEFAULT_BLOCKSIZE
  134. return key_class(**context)
  135. #: A dictionary that maps a scheme to a callable that creates a pool key.
  136. #: This can be used to alter the way pool keys are constructed, if desired.
  137. #: Each PoolManager makes a copy of this dictionary so they can be configured
  138. #: globally here, or individually on the instance.
  139. key_fn_by_scheme = {
  140. "http": functools.partial(_default_key_normalizer, PoolKey),
  141. "https": functools.partial(_default_key_normalizer, PoolKey),
  142. }
  143. pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
  144. class PoolManager(RequestMethods):
  145. """
  146. Allows for arbitrary requests while transparently keeping track of
  147. necessary connection pools for you.
  148. :param num_pools:
  149. Number of connection pools to cache before discarding the least
  150. recently used pool.
  151. :param headers:
  152. Headers to include with all requests, unless other headers are given
  153. explicitly.
  154. :param \\**connection_pool_kw:
  155. Additional parameters are used to create fresh
  156. :class:`urllib3.connectionpool.ConnectionPool` instances.
  157. Example:
  158. .. code-block:: python
  159. import urllib3
  160. http = urllib3.PoolManager(num_pools=2)
  161. resp1 = http.request("GET", "https://google.com/")
  162. resp2 = http.request("GET", "https://google.com/mail")
  163. resp3 = http.request("GET", "https://yahoo.com/")
  164. print(len(http.pools))
  165. # 2
  166. """
  167. proxy: Optional[Url] = None
  168. proxy_config: Optional[ProxyConfig] = None
  169. def __init__(
  170. self,
  171. num_pools: int = 10,
  172. headers: Optional[Mapping[str, str]] = None,
  173. **connection_pool_kw: Any,
  174. ) -> None:
  175. super().__init__(headers)
  176. self.connection_pool_kw = connection_pool_kw
  177. def dispose_func(p: Any) -> None:
  178. p.close()
  179. self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
  180. self.pools = RecentlyUsedContainer(num_pools, dispose_func=dispose_func)
  181. # Locally set the pool classes and keys so other PoolManagers can
  182. # override them.
  183. self.pool_classes_by_scheme = pool_classes_by_scheme
  184. self.key_fn_by_scheme = key_fn_by_scheme.copy()
  185. def __enter__(self: _SelfT) -> _SelfT:
  186. return self
  187. def __exit__(
  188. self,
  189. exc_type: Optional[Type[BaseException]],
  190. exc_val: Optional[BaseException],
  191. exc_tb: Optional[TracebackType],
  192. ) -> "Literal[False]":
  193. self.clear()
  194. # Return False to re-raise any potential exceptions
  195. return False
  196. def _new_pool(
  197. self,
  198. scheme: str,
  199. host: str,
  200. port: int,
  201. request_context: Optional[Dict[str, Any]] = None,
  202. ) -> HTTPConnectionPool:
  203. """
  204. Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
  205. any additional pool keyword arguments.
  206. If ``request_context`` is provided, it is provided as keyword arguments
  207. to the pool class used. This method is used to actually create the
  208. connection pools handed out by :meth:`connection_from_url` and
  209. companion methods. It is intended to be overridden for customization.
  210. """
  211. pool_cls: Type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
  212. if request_context is None:
  213. request_context = self.connection_pool_kw.copy()
  214. # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
  215. # set to 'None' in the request_context.
  216. if request_context.get("blocksize") is None:
  217. request_context["blocksize"] = _DEFAULT_BLOCKSIZE
  218. # Although the context has everything necessary to create the pool,
  219. # this function has historically only used the scheme, host, and port
  220. # in the positional args. When an API change is acceptable these can
  221. # be removed.
  222. for key in ("scheme", "host", "port"):
  223. request_context.pop(key, None)
  224. if scheme == "http":
  225. for kw in SSL_KEYWORDS:
  226. request_context.pop(kw, None)
  227. return pool_cls(host, port, **request_context)
  228. def clear(self) -> None:
  229. """
  230. Empty our store of pools and direct them all to close.
  231. This will not affect in-flight connections, but they will not be
  232. re-used after completion.
  233. """
  234. self.pools.clear()
  235. def connection_from_host(
  236. self,
  237. host: Optional[str],
  238. port: Optional[int] = None,
  239. scheme: Optional[str] = "http",
  240. pool_kwargs: Optional[Dict[str, Any]] = None,
  241. ) -> HTTPConnectionPool:
  242. """
  243. Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
  244. If ``port`` isn't given, it will be derived from the ``scheme`` using
  245. ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
  246. provided, it is merged with the instance's ``connection_pool_kw``
  247. variable and used to create the new connection pool, if one is
  248. needed.
  249. """
  250. if not host:
  251. raise LocationValueError("No host specified.")
  252. request_context = self._merge_pool_kwargs(pool_kwargs)
  253. request_context["scheme"] = scheme or "http"
  254. if not port:
  255. port = port_by_scheme.get(request_context["scheme"].lower(), 80)
  256. request_context["port"] = port
  257. request_context["host"] = host
  258. return self.connection_from_context(request_context)
  259. def connection_from_context(
  260. self, request_context: Dict[str, Any]
  261. ) -> HTTPConnectionPool:
  262. """
  263. Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
  264. ``request_context`` must at least contain the ``scheme`` key and its
  265. value must be a key in ``key_fn_by_scheme`` instance variable.
  266. """
  267. if "strict" in request_context:
  268. warnings.warn(
  269. "The 'strict' parameter is no longer needed on Python 3+. "
  270. "This will raise an error in urllib3 v3.0.0.",
  271. DeprecationWarning,
  272. )
  273. request_context.pop("strict")
  274. scheme = request_context["scheme"].lower()
  275. pool_key_constructor = self.key_fn_by_scheme.get(scheme)
  276. if not pool_key_constructor:
  277. raise URLSchemeUnknown(scheme)
  278. pool_key = pool_key_constructor(request_context)
  279. return self.connection_from_pool_key(pool_key, request_context=request_context)
  280. def connection_from_pool_key(
  281. self, pool_key: PoolKey, request_context: Dict[str, Any]
  282. ) -> HTTPConnectionPool:
  283. """
  284. Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
  285. ``pool_key`` should be a namedtuple that only contains immutable
  286. objects. At a minimum it must have the ``scheme``, ``host``, and
  287. ``port`` fields.
  288. """
  289. with self.pools.lock:
  290. # If the scheme, host, or port doesn't match existing open
  291. # connections, open a new ConnectionPool.
  292. pool = self.pools.get(pool_key)
  293. if pool:
  294. return pool
  295. # Make a fresh ConnectionPool of the desired type
  296. scheme = request_context["scheme"]
  297. host = request_context["host"]
  298. port = request_context["port"]
  299. pool = self._new_pool(scheme, host, port, request_context=request_context)
  300. self.pools[pool_key] = pool
  301. return pool
  302. def connection_from_url(
  303. self, url: str, pool_kwargs: Optional[Dict[str, Any]] = None
  304. ) -> HTTPConnectionPool:
  305. """
  306. Similar to :func:`urllib3.connectionpool.connection_from_url`.
  307. If ``pool_kwargs`` is not provided and a new pool needs to be
  308. constructed, ``self.connection_pool_kw`` is used to initialize
  309. the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
  310. is provided, it is used instead. Note that if a new pool does not
  311. need to be created for the request, the provided ``pool_kwargs`` are
  312. not used.
  313. """
  314. u = parse_url(url)
  315. return self.connection_from_host(
  316. u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
  317. )
  318. def _merge_pool_kwargs(self, override: Optional[Dict[str, Any]]) -> Dict[str, Any]:
  319. """
  320. Merge a dictionary of override values for self.connection_pool_kw.
  321. This does not modify self.connection_pool_kw and returns a new dict.
  322. Any keys in the override dictionary with a value of ``None`` are
  323. removed from the merged dictionary.
  324. """
  325. base_pool_kwargs = self.connection_pool_kw.copy()
  326. if override:
  327. for key, value in override.items():
  328. if value is None:
  329. try:
  330. del base_pool_kwargs[key]
  331. except KeyError:
  332. pass
  333. else:
  334. base_pool_kwargs[key] = value
  335. return base_pool_kwargs
  336. def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
  337. """
  338. Indicates if the proxy requires the complete destination URL in the
  339. request. Normally this is only needed when not using an HTTP CONNECT
  340. tunnel.
  341. """
  342. if self.proxy is None:
  343. return False
  344. return not connection_requires_http_tunnel(
  345. self.proxy, self.proxy_config, parsed_url.scheme
  346. )
  347. def urlopen( # type: ignore[override]
  348. self, method: str, url: str, redirect: bool = True, **kw: Any
  349. ) -> BaseHTTPResponse:
  350. """
  351. Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
  352. with custom cross-host redirect logic and only sends the request-uri
  353. portion of the ``url``.
  354. The given ``url`` parameter must be absolute, such that an appropriate
  355. :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
  356. """
  357. u = parse_url(url)
  358. conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
  359. kw["assert_same_host"] = False
  360. kw["redirect"] = False
  361. if "headers" not in kw:
  362. kw["headers"] = self.headers
  363. if self._proxy_requires_url_absolute_form(u):
  364. response = conn.urlopen(method, url, **kw)
  365. else:
  366. response = conn.urlopen(method, u.request_uri, **kw)
  367. redirect_location = redirect and response.get_redirect_location()
  368. if not redirect_location:
  369. return response
  370. # Support relative URLs for redirecting.
  371. redirect_location = urljoin(url, redirect_location)
  372. # RFC 7231, Section 6.4.4
  373. if response.status == 303:
  374. method = "GET"
  375. retries = kw.get("retries")
  376. if not isinstance(retries, Retry):
  377. retries = Retry.from_int(retries, redirect=redirect)
  378. # Strip headers marked as unsafe to forward to the redirected location.
  379. # Check remove_headers_on_redirect to avoid a potential network call within
  380. # conn.is_same_host() which may use socket.gethostbyname() in the future.
  381. if retries.remove_headers_on_redirect and not conn.is_same_host(
  382. redirect_location
  383. ):
  384. new_headers = kw["headers"].copy()
  385. for header in kw["headers"]:
  386. if header.lower() in retries.remove_headers_on_redirect:
  387. new_headers.pop(header, None)
  388. kw["headers"] = new_headers
  389. try:
  390. retries = retries.increment(method, url, response=response, _pool=conn) # type: ignore[arg-type]
  391. except MaxRetryError:
  392. if retries.raise_on_redirect:
  393. response.drain_conn()
  394. raise
  395. return response
  396. kw["retries"] = retries
  397. kw["redirect"] = redirect
  398. log.info("Redirecting %s -> %s", url, redirect_location)
  399. response.drain_conn()
  400. return self.urlopen(method, redirect_location, **kw)
  401. class ProxyManager(PoolManager):
  402. """
  403. Behaves just like :class:`PoolManager`, but sends all requests through
  404. the defined proxy, using the CONNECT method for HTTPS URLs.
  405. :param proxy_url:
  406. The URL of the proxy to be used.
  407. :param proxy_headers:
  408. A dictionary containing headers that will be sent to the proxy. In case
  409. of HTTP they are being sent with each request, while in the
  410. HTTPS/CONNECT case they are sent only once. Could be used for proxy
  411. authentication.
  412. :param proxy_ssl_context:
  413. The proxy SSL context is used to establish the TLS connection to the
  414. proxy when using HTTPS proxies.
  415. :param use_forwarding_for_https:
  416. (Defaults to False) If set to True will forward requests to the HTTPS
  417. proxy to be made on behalf of the client instead of creating a TLS
  418. tunnel via the CONNECT method. **Enabling this flag means that request
  419. and response headers and content will be visible from the HTTPS proxy**
  420. whereas tunneling keeps request and response headers and content
  421. private. IP address, target hostname, SNI, and port are always visible
  422. to an HTTPS proxy even when this flag is disabled.
  423. Example:
  424. .. code-block:: python
  425. import urllib3
  426. proxy = urllib3.ProxyManager("https://localhost:3128/")
  427. resp1 = proxy.request("GET", "https://google.com/")
  428. resp2 = proxy.request("GET", "https://httpbin.org/")
  429. print(len(proxy.pools))
  430. # 1
  431. resp3 = proxy.request("GET", "https://httpbin.org/")
  432. resp4 = proxy.request("GET", "https://twitter.com/")
  433. print(len(proxy.pools))
  434. # 3
  435. """
  436. def __init__(
  437. self,
  438. proxy_url: str,
  439. num_pools: int = 10,
  440. headers: Optional[Mapping[str, str]] = None,
  441. proxy_headers: Optional[Mapping[str, str]] = None,
  442. proxy_ssl_context: Optional["ssl.SSLContext"] = None,
  443. use_forwarding_for_https: bool = False,
  444. **connection_pool_kw: Any,
  445. ) -> None:
  446. if isinstance(proxy_url, HTTPConnectionPool):
  447. str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
  448. else:
  449. str_proxy_url = proxy_url
  450. proxy = parse_url(str_proxy_url)
  451. if proxy.scheme not in ("http", "https"):
  452. raise ProxySchemeUnknown(proxy.scheme)
  453. if not proxy.port:
  454. port = port_by_scheme.get(proxy.scheme, 80)
  455. proxy = proxy._replace(port=port)
  456. self.proxy = proxy
  457. self.proxy_headers = proxy_headers or {}
  458. self.proxy_ssl_context = proxy_ssl_context
  459. self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
  460. connection_pool_kw["_proxy"] = self.proxy
  461. connection_pool_kw["_proxy_headers"] = self.proxy_headers
  462. connection_pool_kw["_proxy_config"] = self.proxy_config
  463. super().__init__(num_pools, headers, **connection_pool_kw)
  464. def connection_from_host(
  465. self,
  466. host: Optional[str],
  467. port: Optional[int] = None,
  468. scheme: Optional[str] = "http",
  469. pool_kwargs: Optional[Dict[str, Any]] = None,
  470. ) -> HTTPConnectionPool:
  471. if scheme == "https":
  472. return super().connection_from_host(
  473. host, port, scheme, pool_kwargs=pool_kwargs
  474. )
  475. return super().connection_from_host(
  476. self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
  477. )
  478. def _set_proxy_headers(
  479. self, url: str, headers: Optional[Mapping[str, str]] = None
  480. ) -> Mapping[str, str]:
  481. """
  482. Sets headers needed by proxies: specifically, the Accept and Host
  483. headers. Only sets headers not provided by the user.
  484. """
  485. headers_ = {"Accept": "*/*"}
  486. netloc = parse_url(url).netloc
  487. if netloc:
  488. headers_["Host"] = netloc
  489. if headers:
  490. headers_.update(headers)
  491. return headers_
  492. def urlopen( # type: ignore[override]
  493. self, method: str, url: str, redirect: bool = True, **kw: Any
  494. ) -> BaseHTTPResponse:
  495. "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
  496. u = parse_url(url)
  497. if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
  498. # For connections using HTTP CONNECT, httplib sets the necessary
  499. # headers on the CONNECT to the proxy. If we're not using CONNECT,
  500. # we'll definitely need to set 'Host' at the very least.
  501. headers = kw.get("headers", self.headers)
  502. kw["headers"] = self._set_proxy_headers(url, headers)
  503. return super().urlopen(method, url, redirect=redirect, **kw)
  504. def proxy_from_url(url: str, **kw: Any) -> ProxyManager:
  505. return ProxyManager(proxy_url=url, **kw)