Update 2025-04-24_11:44:19

This commit is contained in:
oib
2025-04-24 11:44:23 +02:00
commit e748c737f4
3408 changed files with 717481 additions and 0 deletions

View File

@ -0,0 +1,211 @@
"""
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
"""
from __future__ import annotations
# Set default logging handler to avoid "No handler found" warnings.
import logging
import sys
import typing
import warnings
from logging import NullHandler
from . import exceptions
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from ._version import __version__
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
from .response import BaseHTTPResponse, HTTPResponse
from .util.request import make_headers
from .util.retry import Retry
from .util.timeout import Timeout
# Ensure that Python is compiled with OpenSSL 1.1.1+
# If the 'ssl' module isn't available at all that's
# fine, we only care if the module is available.
try:
import ssl
except ImportError:
pass
else:
if not ssl.OPENSSL_VERSION.startswith("OpenSSL "): # Defensive:
warnings.warn(
"urllib3 v2 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/3020",
exceptions.NotOpenSSLWarning,
)
elif ssl.OPENSSL_VERSION_INFO < (1, 1, 1): # Defensive:
raise ImportError(
"urllib3 v2 only supports OpenSSL 1.1.1+, currently "
f"the 'ssl' module is compiled with {ssl.OPENSSL_VERSION!r}. "
"See: https://github.com/urllib3/urllib3/issues/2168"
)
__author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
__license__ = "MIT"
__version__ = __version__
__all__ = (
"HTTPConnectionPool",
"HTTPHeaderDict",
"HTTPSConnectionPool",
"PoolManager",
"ProxyManager",
"HTTPResponse",
"Retry",
"Timeout",
"add_stderr_logger",
"connection_from_url",
"disable_warnings",
"encode_multipart_formdata",
"make_headers",
"proxy_from_url",
"request",
"BaseHTTPResponse",
)
logging.getLogger(__name__).addHandler(NullHandler())
def add_stderr_logger(
level: int = logging.DEBUG,
) -> logging.StreamHandler[typing.TextIO]:
"""
Helper for quickly adding a StreamHandler to the logger. Useful for
debugging.
Returns the handler after adding it.
"""
# This method needs to be in this __init__.py to get the __name__ correct
# even if urllib3 is vendored within another package.
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
logger.addHandler(handler)
logger.setLevel(level)
logger.debug("Added a stderr logging handler to logger: %s", __name__)
return handler
# ... Clean up.
del NullHandler
# All warning filters *must* be appended unless you're really certain that they
# shouldn't be: otherwise, it's very hard for users to use most Python
# mechanisms to silence them.
# SecurityWarning's always go off by default.
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
def disable_warnings(category: type[Warning] = exceptions.HTTPWarning) -> None:
"""
Helper for quickly disabling all urllib3 warnings.
"""
warnings.simplefilter("ignore", category)
_DEFAULT_POOL = PoolManager()
def request(
method: str,
url: str,
*,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
preload_content: bool | None = True,
decode_content: bool | None = True,
redirect: bool | None = True,
retries: Retry | bool | int | None = None,
timeout: Timeout | float | int | None = 3,
json: typing.Any | None = None,
) -> BaseHTTPResponse:
"""
A convenience, top-level request method. It uses a module-global ``PoolManager`` instance.
Therefore, its side effects could be shared across dependencies relying on it.
To avoid side effects create a new ``PoolManager`` instance and use it instead.
The method does not accept low-level ``**urlopen_kw`` keyword arguments.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param body:
Data to send in the request body, either :class:`str`, :class:`bytes`,
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
:param fields:
Data to encode and send in the request body.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc.
:param bool preload_content:
If True, the response's body will be preloaded into memory.
:param bool decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
:param redirect:
If True, automatically handle redirects (status codes 301, 302,
303, 307, 308). Each redirect counts as a retry. Disabling retries
will disable redirect, too.
:param retries:
Configure the number of retries to allow before raising a
:class:`~urllib3.exceptions.MaxRetryError` exception.
If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a
:class:`~urllib3.util.retry.Retry` object for fine-grained control
over different types of retries.
Pass an integer number to retry connection errors that many times,
but no other types of errors. Pass zero to never retry.
If ``False``, then retries are disabled and any exception is raised
immediately. Also, instead of raising a MaxRetryError on redirects,
the redirect response will be returned.
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
:param timeout:
If specified, overrides the default timeout for this one
request. It may be a float (in seconds) or an instance of
:class:`urllib3.util.Timeout`.
:param json:
Data to encode and send as JSON with UTF-encoded in the request body.
The ``"Content-Type"`` header will be set to ``"application/json"``
unless specified otherwise.
"""
return _DEFAULT_POOL.request(
method,
url,
body=body,
fields=fields,
headers=headers,
preload_content=preload_content,
decode_content=decode_content,
redirect=redirect,
retries=retries,
timeout=timeout,
json=json,
)
if sys.platform == "emscripten":
from .contrib.emscripten import inject_into_urllib3 # noqa: 401
inject_into_urllib3()

View File

@ -0,0 +1,165 @@
from __future__ import annotations
import typing
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from .util.url import Url
_TYPE_BODY = typing.Union[bytes, typing.IO[typing.Any], typing.Iterable[bytes], str]
class ProxyConfig(typing.NamedTuple):
ssl_context: ssl.SSLContext | None
use_forwarding_for_https: bool
assert_hostname: None | str | typing.Literal[False]
assert_fingerprint: str | None
class _ResponseOptions(typing.NamedTuple):
# TODO: Remove this in favor of a better
# HTTP request/response lifecycle tracking.
request_method: str
request_url: str
preload_content: bool
decode_content: bool
enforce_content_length: bool
if typing.TYPE_CHECKING:
import ssl
from typing import Protocol
from .response import BaseHTTPResponse
class BaseHTTPConnection(Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
host: str
port: int
timeout: None | (
float
) # Instance doesn't store _DEFAULT_TIMEOUT, must be resolved.
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool
proxy_is_verified: bool | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None: ...
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None: ...
def connect(self) -> None: ...
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None: ...
def getresponse(self) -> BaseHTTPResponse: ...
def close(self) -> None: ...
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
class BaseHTTPSConnection(BaseHTTPConnection, Protocol):
default_port: typing.ClassVar[int]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
# Certificate verification methods
cert_reqs: int | str | None
assert_hostname: None | str | typing.Literal[False]
assert_fingerprint: str | None
ssl_context: ssl.SSLContext | None
# Trusted CAs
ca_certs: str | None
ca_cert_dir: str | None
ca_cert_data: None | str | bytes
# TLS version
ssl_minimum_version: int | None
ssl_maximum_version: int | None
ssl_version: int | str | None # Deprecated
# Client certificates
cert_file: str | None
key_file: str | None
key_password: str | None
def __init__(
self,
host: str,
port: int | None = None,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: _TYPE_SOCKET_OPTIONS | None = ...,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | typing.Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None: ...

View File

@ -0,0 +1,479 @@
from __future__ import annotations
import typing
from collections import OrderedDict
from enum import Enum, auto
from threading import RLock
if typing.TYPE_CHECKING:
# We can only import Protocol if TYPE_CHECKING because it's a development
# dependency, and is not available at runtime.
from typing import Protocol
from typing_extensions import Self
class HasGettableStringKeys(Protocol):
def keys(self) -> typing.Iterator[str]: ...
def __getitem__(self, key: str) -> str: ...
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
# Key type
_KT = typing.TypeVar("_KT")
# Value type
_VT = typing.TypeVar("_VT")
# Default type
_DT = typing.TypeVar("_DT")
ValidHTTPHeaderSource = typing.Union[
"HTTPHeaderDict",
typing.Mapping[str, str],
typing.Iterable[tuple[str, str]],
"HasGettableStringKeys",
]
class _Sentinel(Enum):
not_passed = auto()
def ensure_can_construct_http_header_dict(
potential: object,
) -> ValidHTTPHeaderSource | None:
if isinstance(potential, HTTPHeaderDict):
return potential
elif isinstance(potential, typing.Mapping):
# Full runtime checking of the contents of a Mapping is expensive, so for the
# purposes of typechecking, we assume that any Mapping is the right shape.
return typing.cast(typing.Mapping[str, str], potential)
elif isinstance(potential, typing.Iterable):
# Similarly to Mapping, full runtime checking of the contents of an Iterable is
# expensive, so for the purposes of typechecking, we assume that any Iterable
# is the right shape.
return typing.cast(typing.Iterable[tuple[str, str]], potential)
elif hasattr(potential, "keys") and hasattr(potential, "__getitem__"):
return typing.cast("HasGettableStringKeys", potential)
else:
return None
class RecentlyUsedContainer(typing.Generic[_KT, _VT], typing.MutableMapping[_KT, _VT]):
"""
Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond
``maxsize``.
:param maxsize:
Maximum number of recent elements to retain.
:param dispose_func:
Every time an item is evicted from the container,
``dispose_func(value)`` is called. Callback which will get called
"""
_container: typing.OrderedDict[_KT, _VT]
_maxsize: int
dispose_func: typing.Callable[[_VT], None] | None
lock: RLock
def __init__(
self,
maxsize: int = 10,
dispose_func: typing.Callable[[_VT], None] | None = None,
) -> None:
super().__init__()
self._maxsize = maxsize
self.dispose_func = dispose_func
self._container = OrderedDict()
self.lock = RLock()
def __getitem__(self, key: _KT) -> _VT:
# Re-insert the item, moving it to the end of the eviction line.
with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key: _KT, value: _VT) -> None:
evicted_item = None
with self.lock:
# Possibly evict the existing value of 'key'
try:
# If the key exists, we'll overwrite it, which won't change the
# size of the pool. Because accessing a key should move it to
# the end of the eviction line, we pop it out first.
evicted_item = key, self._container.pop(key)
self._container[key] = value
except KeyError:
# When the key does not exist, we insert the value first so that
# evicting works in all cases, including when self._maxsize is 0
self._container[key] = value
if len(self._container) > self._maxsize:
# If we didn't evict an existing value, and we've hit our maximum
# size, then we have to evict the least recently used item from
# the beginning of the container.
evicted_item = self._container.popitem(last=False)
# After releasing the lock on the pool, dispose of any evicted value.
if evicted_item is not None and self.dispose_func:
_, evicted_value = evicted_item
self.dispose_func(evicted_value)
def __delitem__(self, key: _KT) -> None:
with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self) -> int:
with self.lock:
return len(self._container)
def __iter__(self) -> typing.NoReturn:
raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe."
)
def clear(self) -> None:
with self.lock:
# Copy pointers to all values, then wipe the mapping
values = list(self._container.values())
self._container.clear()
if self.dispose_func:
for value in values:
self.dispose_func(value)
def keys(self) -> set[_KT]: # type: ignore[override]
with self.lock:
return set(self._container.keys())
class HTTPHeaderDictItemView(set[tuple[str, str]]):
"""
HTTPHeaderDict is unusual for a Mapping[str, str] in that it has two modes of
address.
If we directly try to get an item with a particular name, we will get a string
back that is the concatenated version of all the values:
>>> d['X-Header-Name']
'Value1, Value2, Value3'
However, if we iterate over an HTTPHeaderDict's items, we will optionally combine
these values based on whether combine=True was called when building up the dictionary
>>> d = HTTPHeaderDict({"A": "1", "B": "foo"})
>>> d.add("A", "2", combine=True)
>>> d.add("B", "bar")
>>> list(d.items())
[
('A', '1, 2'),
('B', 'foo'),
('B', 'bar'),
]
This class conforms to the interface required by the MutableMapping ABC while
also giving us the nonstandard iteration behavior we want; items with duplicate
keys, ordered by time of first insertion.
"""
_headers: HTTPHeaderDict
def __init__(self, headers: HTTPHeaderDict) -> None:
self._headers = headers
def __len__(self) -> int:
return len(list(self._headers.iteritems()))
def __iter__(self) -> typing.Iterator[tuple[str, str]]:
return self._headers.iteritems()
def __contains__(self, item: object) -> bool:
if isinstance(item, tuple) and len(item) == 2:
passed_key, passed_val = item
if isinstance(passed_key, str) and isinstance(passed_val, str):
return self._headers._has_value_for_header(passed_key, passed_val)
return False
class HTTPHeaderDict(typing.MutableMapping[str, str]):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
when compared case-insensitively.
:param kwargs:
Additional field-value pairs to pass in to ``dict.update``.
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
case-insensitively in order to maintain ``dict``'s api. For fields that
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
in a loop.
If multiple fields that are equal case-insensitively are passed to the
constructor or ``.update``, the behavior is undefined and some will be
lost.
>>> headers = HTTPHeaderDict()
>>> headers.add('Set-Cookie', 'foo=bar')
>>> headers.add('set-cookie', 'baz=quxx')
>>> headers['content-length'] = '7'
>>> headers['SET-cookie']
'foo=bar, baz=quxx'
>>> headers['Content-Length']
'7'
"""
_container: typing.MutableMapping[str, list[str]]
def __init__(self, headers: ValidHTTPHeaderSource | None = None, **kwargs: str):
super().__init__()
self._container = {} # 'dict' is insert-ordered
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
else:
self.extend(headers)
if kwargs:
self.extend(kwargs)
def __setitem__(self, key: str, val: str) -> None:
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
self._container[key.lower()] = [key, val]
def __getitem__(self, key: str) -> str:
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key: str) -> None:
del self._container[key.lower()]
def __contains__(self, key: object) -> bool:
if isinstance(key, str):
return key.lower() in self._container
return False
def setdefault(self, key: str, default: str = "") -> str:
return super().setdefault(key, default)
def __eq__(self, other: object) -> bool:
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return False
else:
other_as_http_header_dict = type(self)(maybe_constructable)
return {k.lower(): v for k, v in self.itermerged()} == {
k.lower(): v for k, v in other_as_http_header_dict.itermerged()
}
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def __len__(self) -> int:
return len(self._container)
def __iter__(self) -> typing.Iterator[str]:
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def discard(self, key: str) -> None:
try:
del self[key]
except KeyError:
pass
def add(self, key: str, val: str, *, combine: bool = False) -> None:
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
If this is called with combine=True, instead of adding a new header value
as a distinct item during iteration, this will instead append the value to
any existing header value with a comma. If no existing header value exists
for the key, then the value will simply be added, ignoring the combine parameter.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
>>> list(headers.items())
[('foo', 'bar'), ('foo', 'baz')]
>>> headers.add('foo', 'quz', combine=True)
>>> list(headers.items())
[('foo', 'bar, baz, quz')]
"""
# avoid a bytes/str comparison by decoding before httplib
if isinstance(key, bytes):
key = key.decode("latin-1")
key_lower = key.lower()
new_vals = [key, val]
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# if there are values here, then there is at least the initial
# key/value pair
assert len(vals) >= 2
if combine:
vals[-1] = vals[-1] + ", " + val
else:
vals.append(val)
def extend(self, *args: ValidHTTPHeaderSource, **kwargs: str) -> None:
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError(
f"extend() takes at most 1 positional arguments ({len(args)} given)"
)
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, typing.Mapping):
for key, val in other.items():
self.add(key, val)
elif isinstance(other, typing.Iterable):
other = typing.cast(typing.Iterable[tuple[str, str]], other)
for key, value in other:
self.add(key, value)
elif hasattr(other, "keys") and hasattr(other, "__getitem__"):
# THIS IS NOT A TYPESAFE BRANCH
# In this branch, the object has a `keys` attr but is not a Mapping or any of
# the other types indicated in the method signature. We do some stuff with
# it as though it partially implements the Mapping interface, but we're not
# doing that stuff safely AT ALL.
for key in other.keys():
self.add(key, other[key])
for key, value in kwargs.items():
self.add(key, value)
@typing.overload
def getlist(self, key: str) -> list[str]: ...
@typing.overload
def getlist(self, key: str, default: _DT) -> list[str] | _DT: ...
def getlist(
self, key: str, default: _Sentinel | _DT = _Sentinel.not_passed
) -> list[str] | _DT:
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
if default is _Sentinel.not_passed:
# _DT is unbound; empty list is instance of List[str]
return []
# _DT is bound; default is instance of _DT
return default
else:
# _DT may or may not be bound; vals[1:] is instance of List[str], which
# meets our external interface requirement of `Union[List[str], _DT]`.
return vals[1:]
def _prepare_for_method_change(self) -> Self:
"""
Remove content-specific header fields before changing the request
method to GET or HEAD according to RFC 9110, Section 15.4.
"""
content_specific_headers = [
"Content-Encoding",
"Content-Language",
"Content-Location",
"Content-Type",
"Content-Length",
"Digest",
"Last-Modified",
]
for header in content_specific_headers:
self.discard(header)
return self
# Backwards compatibility for httplib
getheaders = getlist
getallmatchingheaders = getlist
iget = getlist
# Backwards compatibility for http.cookiejar
get_all = getlist
def __repr__(self) -> str:
return f"{type(self).__name__}({dict(self.itermerged())})"
def _copy_from(self, other: HTTPHeaderDict) -> None:
for key in other:
val = other.getlist(key)
self._container[key.lower()] = [key, *val]
def copy(self) -> Self:
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self) -> typing.Iterator[tuple[str, str]]:
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ", ".join(val[1:])
def items(self) -> HTTPHeaderDictItemView: # type: ignore[override]
return HTTPHeaderDictItemView(self)
def _has_value_for_header(self, header_name: str, potential_value: str) -> bool:
if header_name in self:
return potential_value in self._container[header_name.lower()][1:]
return False
def __ior__(self, other: object) -> HTTPHeaderDict:
# Supports extending a header dict in-place using operator |=
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
self.extend(maybe_constructable)
return self
def __or__(self, other: object) -> Self:
# Supports merging header dicts using operator |
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = self.copy()
result.extend(maybe_constructable)
return result
def __ror__(self, other: object) -> Self:
# Supports merging header dicts using operator | when other is on left side
# combining items with add instead of __setitem__
maybe_constructable = ensure_can_construct_http_header_dict(other)
if maybe_constructable is None:
return NotImplemented
result = type(self)(maybe_constructable)
result.extend(self)
return result

View File

@ -0,0 +1,278 @@
from __future__ import annotations
import json as _json
import typing
from urllib.parse import urlencode
from ._base_connection import _TYPE_BODY
from ._collections import HTTPHeaderDict
from .filepost import _TYPE_FIELDS, encode_multipart_formdata
from .response import BaseHTTPResponse
__all__ = ["RequestMethods"]
_TYPE_ENCODE_URL_FIELDS = typing.Union[
typing.Sequence[tuple[str, typing.Union[str, bytes]]],
typing.Mapping[str, typing.Union[str, bytes]],
]
class RequestMethods:
"""
Convenience mixin for classes who implement a :meth:`urlopen` method, such
as :class:`urllib3.HTTPConnectionPool` and
:class:`urllib3.PoolManager`.
Provides behavior for making common types of HTTP request methods and
decides which type of request field encoding to use.
Specifically,
:meth:`.request_encode_url` is for sending requests whose fields are
encoded in the URL (such as GET, HEAD, DELETE).
:meth:`.request_encode_body` is for sending requests whose fields are
encoded in the *body* of the request using multipart or www-form-urlencoded
(such as for POST, PUT, PATCH).
:meth:`.request` is for making any kind of request, it will look up the
appropriate encoding format and use one of the above two methods to make
the request.
Initializer parameters:
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
"""
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
def __init__(self, headers: typing.Mapping[str, str] | None = None) -> None:
self.headers = headers or {}
def urlopen(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**kw: typing.Any,
) -> BaseHTTPResponse: # Abstract
raise NotImplementedError(
"Classes extending RequestMethods must implement "
"their own ``urlopen`` method."
)
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
json: typing.Any | None = None,
**urlopen_kw: typing.Any,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the appropriate encoding of
``fields`` based on the ``method`` used.
This is a convenience method that requires the least amount of manual
effort. It can be used in most situations, while still having the
option to drop down to more specific methods when necessary, such as
:meth:`request_encode_url`, :meth:`request_encode_body`,
or even the lowest level :meth:`urlopen`.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param body:
Data to send in the request body, either :class:`str`, :class:`bytes`,
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
:param fields:
Data to encode and send in the URL or request body, depending on ``method``.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param json:
Data to encode and send as JSON with UTF-encoded in the request body.
The ``"Content-Type"`` header will be set to ``"application/json"``
unless specified otherwise.
"""
method = method.upper()
if json is not None and body is not None:
raise TypeError(
"request got values for both 'body' and 'json' parameters which are mutually exclusive"
)
if json is not None:
if headers is None:
headers = self.headers
if not ("content-type" in map(str.lower, headers.keys())):
headers = HTTPHeaderDict(headers)
headers["Content-Type"] = "application/json"
body = _json.dumps(json, separators=(",", ":"), ensure_ascii=False).encode(
"utf-8"
)
if body is not None:
urlopen_kw["body"] = body
if method in self._encode_url_methods:
return self.request_encode_url(
method,
url,
fields=fields, # type: ignore[arg-type]
headers=headers,
**urlopen_kw,
)
else:
return self.request_encode_body(
method, url, fields=fields, headers=headers, **urlopen_kw
)
def request_encode_url(
self,
method: str,
url: str,
fields: _TYPE_ENCODE_URL_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param fields:
Data to encode and send in the URL.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
"""
if headers is None:
headers = self.headers
extra_kw: dict[str, typing.Any] = {"headers": headers}
extra_kw.update(urlopen_kw)
if fields:
url += "?" + urlencode(fields)
return self.urlopen(method, url, **extra_kw)
def request_encode_body(
self,
method: str,
url: str,
fields: _TYPE_FIELDS | None = None,
headers: typing.Mapping[str, str] | None = None,
encode_multipart: bool = True,
multipart_boundary: str | None = None,
**urlopen_kw: str,
) -> BaseHTTPResponse:
"""
Make a request using :meth:`urlopen` with the ``fields`` encoded in
the body. This is useful for request methods like POST, PUT, PATCH, etc.
When ``encode_multipart=True`` (default), then
:func:`urllib3.encode_multipart_formdata` is used to encode
the payload with the appropriate content type. Otherwise
:func:`urllib.parse.urlencode` is used with the
'application/x-www-form-urlencoded' content type.
Multipart encoding must be used when posting files, and it's reasonably
safe to use it in other times too. However, it may break request
signing, such as with OAuth.
Supports an optional ``fields`` parameter of key/value strings AND
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
the MIME type is optional. For example::
fields = {
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(),
'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
}
When uploading a file, providing a filename (the first parameter of the
tuple) is optional but recommended to best mimic behavior of browsers.
Note that if ``headers`` are supplied, the 'Content-Type' header will
be overwritten because it depends on the dynamic random boundary string
which is used to compose the body of the request. The random boundary
string can be explicitly set with the ``multipart_boundary`` parameter.
:param method:
HTTP request method (such as GET, POST, PUT, etc.)
:param url:
The URL to perform the request on.
:param fields:
Data to encode and send in the request body.
:param headers:
Dictionary of custom headers to send, such as User-Agent,
If-None-Match, etc. If None, pool headers are used. If provided,
these headers completely replace any pool-specific headers.
:param encode_multipart:
If True, encode the ``fields`` using the multipart/form-data MIME
format.
:param multipart_boundary:
If not specified, then a random boundary will be generated using
:func:`urllib3.filepost.choose_boundary`.
"""
if headers is None:
headers = self.headers
extra_kw: dict[str, typing.Any] = {"headers": HTTPHeaderDict(headers)}
body: bytes | str
if fields:
if "body" in urlopen_kw:
raise TypeError(
"request got values for both 'fields' and 'body', can only specify one."
)
if encode_multipart:
body, content_type = encode_multipart_formdata(
fields, boundary=multipart_boundary
)
else:
body, content_type = (
urlencode(fields), # type: ignore[arg-type]
"application/x-www-form-urlencoded",
)
extra_kw["body"] = body
extra_kw["headers"].setdefault("Content-Type", content_type)
extra_kw.update(urlopen_kw)
return self.urlopen(method, url, **extra_kw)

View File

@ -0,0 +1,21 @@
# file generated by setuptools-scm
# don't change, don't track in version control
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple
from typing import Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
__version__ = version = '2.4.0'
__version_tuple__ = version_tuple = (2, 4, 0)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
from __future__ import annotations
import urllib3.connection
from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection
def inject_into_urllib3() -> None:
# override connection classes to use emscripten specific classes
# n.b. mypy complains about the overriding of classes below
# if it isn't ignored
HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection
HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection
urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment]
urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment]

View File

@ -0,0 +1,255 @@
from __future__ import annotations
import os
import typing
# use http.client.HTTPException for consistency with non-emscripten
from http.client import HTTPException as HTTPException # noqa: F401
from http.client import ResponseNotReady
from ..._base_connection import _TYPE_BODY
from ...connection import HTTPConnection, ProxyConfig, port_by_scheme
from ...exceptions import TimeoutError
from ...response import BaseHTTPResponse
from ...util.connection import _TYPE_SOCKET_OPTIONS
from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from ...util.url import Url
from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request
from .request import EmscriptenRequest
from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse
if typing.TYPE_CHECKING:
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
class EmscriptenHTTPConnection:
default_port: typing.ClassVar[int] = port_by_scheme["http"]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
timeout: None | (float)
host: str
port: int
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool = False
proxy_is_verified: bool | None = None
_response: EmscriptenResponse | None
def __init__(
self,
host: str,
port: int = 0,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = None,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
self.host = host
self.port = port
self.timeout = timeout if isinstance(timeout, float) else 0.0
self.scheme = "http"
self._closed = True
self._response = None
# ignore these things because we don't
# have control over that stuff
self.proxy = None
self.proxy_config = None
self.blocksize = blocksize
self.source_address = None
self.socket_options = None
self.is_verified = False
def set_tunnel(
self,
host: str,
port: int | None = 0,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
pass
def connect(self) -> None:
pass
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
self._closed = False
if url.startswith("/"):
# no scheme / host / port included, make a full url
url = f"{self.scheme}://{self.host}:{self.port}" + url
request = EmscriptenRequest(
url=url,
method=method,
timeout=self.timeout if self.timeout else 0,
decode_content=decode_content,
)
request.set_body(body)
if headers:
for k, v in headers.items():
request.set_header(k, v)
self._response = None
try:
if not preload_content:
self._response = send_streaming_request(request)
if self._response is None:
self._response = send_request(request)
except _TimeoutError as e:
raise TimeoutError(e.message) from e
except _RequestError as e:
raise HTTPException(e.message) from e
def getresponse(self) -> BaseHTTPResponse:
if self._response is not None:
return EmscriptenHttpResponseWrapper(
internal_response=self._response,
url=self._response.request.url,
connection=self,
)
else:
raise ResponseNotReady()
def close(self) -> None:
self._closed = True
self._response = None
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
return self._closed
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
return True
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
return False
class EmscriptenHTTPSConnection(EmscriptenHTTPConnection):
default_port = port_by_scheme["https"]
# all this is basically ignored, as browser handles https
cert_reqs: int | str | None = None
ca_certs: str | None = None
ca_cert_dir: str | None = None
ca_cert_data: None | str | bytes = None
cert_file: str | None
key_file: str | None
key_password: str | None
ssl_context: typing.Any | None
ssl_version: int | str | None = None
ssl_minimum_version: int | None = None
ssl_maximum_version: int | None = None
assert_hostname: None | str | typing.Literal[False]
assert_fingerprint: str | None = None
def __init__(
self,
host: str,
port: int = 0,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: (
None | _TYPE_SOCKET_OPTIONS
) = HTTPConnection.default_socket_options,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | typing.Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: typing.Any | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
super().__init__(
host,
port=port,
timeout=timeout,
source_address=source_address,
blocksize=blocksize,
socket_options=socket_options,
proxy=proxy,
proxy_config=proxy_config,
)
self.scheme = "https"
self.key_file = key_file
self.cert_file = cert_file
self.key_password = key_password
self.ssl_context = ssl_context
self.server_hostname = server_hostname
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.ssl_version = ssl_version
self.ssl_minimum_version = ssl_minimum_version
self.ssl_maximum_version = ssl_maximum_version
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
self.ca_cert_data = ca_cert_data
self.cert_reqs = None
# The browser will automatically verify all requests.
# We have no control over that setting.
self.is_verified = True
def set_cert(
self,
key_file: str | None = None,
cert_file: str | None = None,
cert_reqs: int | str | None = None,
key_password: str | None = None,
ca_certs: str | None = None,
assert_hostname: None | str | typing.Literal[False] = None,
assert_fingerprint: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
) -> None:
pass
# verify that this class implements BaseHTTP(s) connection correctly
if typing.TYPE_CHECKING:
_supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0)
_supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0)

View File

@ -0,0 +1,110 @@
let Status = {
SUCCESS_HEADER: -1,
SUCCESS_EOF: -2,
ERROR_TIMEOUT: -3,
ERROR_EXCEPTION: -4,
};
let connections = {};
let nextConnectionID = 1;
const encoder = new TextEncoder();
self.addEventListener("message", async function (event) {
if (event.data.close) {
let connectionID = event.data.close;
delete connections[connectionID];
return;
} else if (event.data.getMore) {
let connectionID = event.data.getMore;
let { curOffset, value, reader, intBuffer, byteBuffer } =
connections[connectionID];
// if we still have some in buffer, then just send it back straight away
if (!value || curOffset >= value.length) {
// read another buffer if required
try {
let readResponse = await reader.read();
if (readResponse.done) {
// read everything - clear connection and return
delete connections[connectionID];
Atomics.store(intBuffer, 0, Status.SUCCESS_EOF);
Atomics.notify(intBuffer, 0);
// finished reading successfully
// return from event handler
return;
}
curOffset = 0;
connections[connectionID].value = readResponse.value;
value = readResponse.value;
} catch (error) {
console.log("Request exception:", error);
let errorBytes = encoder.encode(error.message);
let written = errorBytes.length;
byteBuffer.set(errorBytes);
intBuffer[1] = written;
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
Atomics.notify(intBuffer, 0);
}
}
// send as much buffer as we can
let curLen = value.length - curOffset;
if (curLen > byteBuffer.length) {
curLen = byteBuffer.length;
}
byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0);
Atomics.store(intBuffer, 0, curLen); // store current length in bytes
Atomics.notify(intBuffer, 0);
curOffset += curLen;
connections[connectionID].curOffset = curOffset;
return;
} else {
// start fetch
let connectionID = nextConnectionID;
nextConnectionID += 1;
const intBuffer = new Int32Array(event.data.buffer);
const byteBuffer = new Uint8Array(event.data.buffer, 8);
try {
const response = await fetch(event.data.url, event.data.fetchParams);
// return the headers first via textencoder
var headers = [];
for (const pair of response.headers.entries()) {
headers.push([pair[0], pair[1]]);
}
let headerObj = {
headers: headers,
status: response.status,
connectionID,
};
const headerText = JSON.stringify(headerObj);
let headerBytes = encoder.encode(headerText);
let written = headerBytes.length;
byteBuffer.set(headerBytes);
intBuffer[1] = written;
// make a connection
connections[connectionID] = {
reader: response.body.getReader(),
intBuffer: intBuffer,
byteBuffer: byteBuffer,
value: undefined,
curOffset: 0,
};
// set header ready
Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER);
Atomics.notify(intBuffer, 0);
// all fetching after this goes through a new postmessage call with getMore
// this allows for parallel requests
} catch (error) {
console.log("Request exception:", error);
let errorBytes = encoder.encode(error.message);
let written = errorBytes.length;
byteBuffer.set(errorBytes);
intBuffer[1] = written;
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
Atomics.notify(intBuffer, 0);
}
}
});
self.postMessage({ inited: true });

View File

@ -0,0 +1,708 @@
"""
Support for streaming http requests in emscripten.
A few caveats -
If your browser (or Node.js) has WebAssembly JavaScript Promise Integration enabled
https://github.com/WebAssembly/js-promise-integration/blob/main/proposals/js-promise-integration/Overview.md
*and* you launch pyodide using `pyodide.runPythonAsync`, this will fetch data using the
JavaScript asynchronous fetch api (wrapped via `pyodide.ffi.call_sync`). In this case
timeouts and streaming should just work.
Otherwise, it uses a combination of XMLHttpRequest and a web-worker for streaming.
This approach has several caveats:
Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed.
Streaming only works if you're running pyodide in a web worker.
Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch
operation, so it requires that you have crossOriginIsolation enabled, by serving over https
(or from localhost) with the two headers below set:
Cross-Origin-Opener-Policy: same-origin
Cross-Origin-Embedder-Policy: require-corp
You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in
JavaScript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole
request into a buffer and then returning it. it shows a warning in the JavaScript console in this case.
Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once
control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch.
NB: in this code, there are a lot of JavaScript objects. They are named js_*
to make it clear what type of object they are.
"""
from __future__ import annotations
import io
import json
from email.parser import Parser
from importlib.resources import files
from typing import TYPE_CHECKING, Any
import js # type: ignore[import-not-found]
from pyodide.ffi import ( # type: ignore[import-not-found]
JsArray,
JsException,
JsProxy,
to_js,
)
if TYPE_CHECKING:
from typing_extensions import Buffer
from .request import EmscriptenRequest
from .response import EmscriptenResponse
"""
There are some headers that trigger unintended CORS preflight requests.
See also https://github.com/koenvo/pyodide-http/issues/22
"""
HEADERS_TO_IGNORE = ("user-agent",)
SUCCESS_HEADER = -1
SUCCESS_EOF = -2
ERROR_TIMEOUT = -3
ERROR_EXCEPTION = -4
_STREAMING_WORKER_CODE = (
files(__package__)
.joinpath("emscripten_fetch_worker.js")
.read_text(encoding="utf-8")
)
class _RequestError(Exception):
def __init__(
self,
message: str | None = None,
*,
request: EmscriptenRequest | None = None,
response: EmscriptenResponse | None = None,
):
self.request = request
self.response = response
self.message = message
super().__init__(self.message)
class _StreamingError(_RequestError):
pass
class _TimeoutError(_RequestError):
pass
def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy:
return to_js(dict_val, dict_converter=js.Object.fromEntries)
class _ReadStream(io.RawIOBase):
def __init__(
self,
int_buffer: JsArray,
byte_buffer: JsArray,
timeout: float,
worker: JsProxy,
connection_id: int,
request: EmscriptenRequest,
):
self.int_buffer = int_buffer
self.byte_buffer = byte_buffer
self.read_pos = 0
self.read_len = 0
self.connection_id = connection_id
self.worker = worker
self.timeout = int(1000 * timeout) if timeout > 0 else None
self.is_live = True
self._is_closed = False
self.request: EmscriptenRequest | None = request
def __del__(self) -> None:
self.close()
# this is compatible with _base_connection
def is_closed(self) -> bool:
return self._is_closed
# for compatibility with RawIOBase
@property
def closed(self) -> bool:
return self.is_closed()
def close(self) -> None:
if self.is_closed():
return
self.read_len = 0
self.read_pos = 0
self.int_buffer = None
self.byte_buffer = None
self._is_closed = True
self.request = None
if self.is_live:
self.worker.postMessage(_obj_from_dict({"close": self.connection_id}))
self.is_live = False
super().close()
def readable(self) -> bool:
return True
def writable(self) -> bool:
return False
def seekable(self) -> bool:
return False
def readinto(self, byte_obj: Buffer) -> int:
if not self.int_buffer:
raise _StreamingError(
"No buffer for stream in _ReadStream.readinto",
request=self.request,
response=None,
)
if self.read_len == 0:
# wait for the worker to send something
js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT)
self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id}))
if (
js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout)
== "timed-out"
):
raise _TimeoutError
data_len = self.int_buffer[0]
if data_len > 0:
self.read_len = data_len
self.read_pos = 0
elif data_len == ERROR_EXCEPTION:
string_len = self.int_buffer[1]
# decode the error string
js_decoder = js.TextDecoder.new()
json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len))
raise _StreamingError(
f"Exception thrown in fetch: {json_str}",
request=self.request,
response=None,
)
else:
# EOF, free the buffers and return zero
# and free the request
self.is_live = False
self.close()
return 0
# copy from int32array to python bytes
ret_length = min(self.read_len, len(memoryview(byte_obj)))
subarray = self.byte_buffer.subarray(
self.read_pos, self.read_pos + ret_length
).to_py()
memoryview(byte_obj)[0:ret_length] = subarray
self.read_len -= ret_length
self.read_pos += ret_length
return ret_length
class _StreamingFetcher:
def __init__(self) -> None:
# make web-worker and data buffer on startup
self.streaming_ready = False
js_data_blob = js.Blob.new(
to_js([_STREAMING_WORKER_CODE], create_pyproxies=False),
_obj_from_dict({"type": "application/javascript"}),
)
def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None:
def onMsg(e: JsProxy) -> None:
self.streaming_ready = True
js_resolve_fn(e)
def onErr(e: JsProxy) -> None:
js_reject_fn(e) # Defensive: never happens in ci
self.js_worker.onmessage = onMsg
self.js_worker.onerror = onErr
js_data_url = js.URL.createObjectURL(js_data_blob)
self.js_worker = js.globalThis.Worker.new(js_data_url)
self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver)
def send(self, request: EmscriptenRequest) -> EmscriptenResponse:
headers = {
k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE
}
body = request.body
fetch_data = {"headers": headers, "body": to_js(body), "method": request.method}
# start the request off in the worker
timeout = int(1000 * request.timeout) if request.timeout > 0 else None
js_shared_buffer = js.SharedArrayBuffer.new(1048576)
js_int_buffer = js.Int32Array.new(js_shared_buffer)
js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8)
js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT)
js.Atomics.notify(js_int_buffer, 0)
js_absolute_url = js.URL.new(request.url, js.location).href
self.js_worker.postMessage(
_obj_from_dict(
{
"buffer": js_shared_buffer,
"url": js_absolute_url,
"fetchParams": fetch_data,
}
)
)
# wait for the worker to send something
js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout)
if js_int_buffer[0] == ERROR_TIMEOUT:
raise _TimeoutError(
"Timeout connecting to streaming request",
request=request,
response=None,
)
elif js_int_buffer[0] == SUCCESS_HEADER:
# got response
# header length is in second int of intBuffer
string_len = js_int_buffer[1]
# decode the rest to a JSON string
js_decoder = js.TextDecoder.new()
# this does a copy (the slice) because decode can't work on shared array
# for some silly reason
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
# get it as an object
response_obj = json.loads(json_str)
return EmscriptenResponse(
request=request,
status_code=response_obj["status"],
headers=response_obj["headers"],
body=_ReadStream(
js_int_buffer,
js_byte_buffer,
request.timeout,
self.js_worker,
response_obj["connectionID"],
request,
),
)
elif js_int_buffer[0] == ERROR_EXCEPTION:
string_len = js_int_buffer[1]
# decode the error string
js_decoder = js.TextDecoder.new()
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
raise _StreamingError(
f"Exception thrown in fetch: {json_str}", request=request, response=None
)
else:
raise _StreamingError(
f"Unknown status from worker in fetch: {js_int_buffer[0]}",
request=request,
response=None,
)
class _JSPIReadStream(io.RawIOBase):
"""
A read stream that uses pyodide.ffi.run_sync to read from a JavaScript fetch
response. This requires support for WebAssembly JavaScript Promise Integration
in the containing browser, and for pyodide to be launched via runPythonAsync.
:param js_read_stream:
The JavaScript stream reader
:param timeout:
Timeout in seconds
:param request:
The request we're handling
:param response:
The response this stream relates to
:param js_abort_controller:
A JavaScript AbortController object, used for timeouts
"""
def __init__(
self,
js_read_stream: Any,
timeout: float,
request: EmscriptenRequest,
response: EmscriptenResponse,
js_abort_controller: Any, # JavaScript AbortController for timeouts
):
self.js_read_stream = js_read_stream
self.timeout = timeout
self._is_closed = False
self._is_done = False
self.request: EmscriptenRequest | None = request
self.response: EmscriptenResponse | None = response
self.current_buffer = None
self.current_buffer_pos = 0
self.js_abort_controller = js_abort_controller
def __del__(self) -> None:
self.close()
# this is compatible with _base_connection
def is_closed(self) -> bool:
return self._is_closed
# for compatibility with RawIOBase
@property
def closed(self) -> bool:
return self.is_closed()
def close(self) -> None:
if self.is_closed():
return
self.read_len = 0
self.read_pos = 0
self.js_read_stream.cancel()
self.js_read_stream = None
self._is_closed = True
self._is_done = True
self.request = None
self.response = None
super().close()
def readable(self) -> bool:
return True
def writable(self) -> bool:
return False
def seekable(self) -> bool:
return False
def _get_next_buffer(self) -> bool:
result_js = _run_sync_with_timeout(
self.js_read_stream.read(),
self.timeout,
self.js_abort_controller,
request=self.request,
response=self.response,
)
if result_js.done:
self._is_done = True
return False
else:
self.current_buffer = result_js.value.to_py()
self.current_buffer_pos = 0
return True
def readinto(self, byte_obj: Buffer) -> int:
if self.current_buffer is None:
if not self._get_next_buffer() or self.current_buffer is None:
self.close()
return 0
ret_length = min(
len(byte_obj), len(self.current_buffer) - self.current_buffer_pos
)
byte_obj[0:ret_length] = self.current_buffer[
self.current_buffer_pos : self.current_buffer_pos + ret_length
]
self.current_buffer_pos += ret_length
if self.current_buffer_pos == len(self.current_buffer):
self.current_buffer = None
return ret_length
# check if we are in a worker or not
def is_in_browser_main_thread() -> bool:
return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window
def is_cross_origin_isolated() -> bool:
return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated
def is_in_node() -> bool:
return (
hasattr(js, "process")
and hasattr(js.process, "release")
and hasattr(js.process.release, "name")
and js.process.release.name == "node"
)
def is_worker_available() -> bool:
return hasattr(js, "Worker") and hasattr(js, "Blob")
_fetcher: _StreamingFetcher | None = None
if is_worker_available() and (
(is_cross_origin_isolated() and not is_in_browser_main_thread())
and (not is_in_node())
):
_fetcher = _StreamingFetcher()
else:
_fetcher = None
NODE_JSPI_ERROR = (
"urllib3 only works in Node.js with pyodide.runPythonAsync"
" and requires the flag --experimental-wasm-stack-switching in "
" versions of node <24."
)
def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None:
if has_jspi():
return send_jspi_request(request, True)
elif is_in_node():
raise _RequestError(
message=NODE_JSPI_ERROR,
request=request,
response=None,
)
if _fetcher and streaming_ready():
return _fetcher.send(request)
else:
_show_streaming_warning()
return None
_SHOWN_TIMEOUT_WARNING = False
def _show_timeout_warning() -> None:
global _SHOWN_TIMEOUT_WARNING
if not _SHOWN_TIMEOUT_WARNING:
_SHOWN_TIMEOUT_WARNING = True
message = "Warning: Timeout is not available on main browser thread"
js.console.warn(message)
_SHOWN_STREAMING_WARNING = False
def _show_streaming_warning() -> None:
global _SHOWN_STREAMING_WARNING
if not _SHOWN_STREAMING_WARNING:
_SHOWN_STREAMING_WARNING = True
message = "Can't stream HTTP requests because: \n"
if not is_cross_origin_isolated():
message += " Page is not cross-origin isolated\n"
if is_in_browser_main_thread():
message += " Python is running in main browser thread\n"
if not is_worker_available():
message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in
if streaming_ready() is False:
message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch
is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`"""
from js import console
console.warn(message)
def send_request(request: EmscriptenRequest) -> EmscriptenResponse:
if has_jspi():
return send_jspi_request(request, False)
elif is_in_node():
raise _RequestError(
message=NODE_JSPI_ERROR,
request=request,
response=None,
)
try:
js_xhr = js.XMLHttpRequest.new()
if not is_in_browser_main_thread():
js_xhr.responseType = "arraybuffer"
if request.timeout:
js_xhr.timeout = int(request.timeout * 1000)
else:
js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15")
if request.timeout:
# timeout isn't available on the main thread - show a warning in console
# if it is set
_show_timeout_warning()
js_xhr.open(request.method, request.url, False)
for name, value in request.headers.items():
if name.lower() not in HEADERS_TO_IGNORE:
js_xhr.setRequestHeader(name, value)
js_xhr.send(to_js(request.body))
headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders()))
if not is_in_browser_main_thread():
body = js_xhr.response.to_py().tobytes()
else:
body = js_xhr.response.encode("ISO-8859-15")
return EmscriptenResponse(
status_code=js_xhr.status, headers=headers, body=body, request=request
)
except JsException as err:
if err.name == "TimeoutError":
raise _TimeoutError(err.message, request=request)
elif err.name == "NetworkError":
raise _RequestError(err.message, request=request)
else:
# general http error
raise _RequestError(err.message, request=request)
def send_jspi_request(
request: EmscriptenRequest, streaming: bool
) -> EmscriptenResponse:
"""
Send a request using WebAssembly JavaScript Promise Integration
to wrap the asynchronous JavaScript fetch api (experimental).
:param request:
Request to send
:param streaming:
Whether to stream the response
:return: The response object
:rtype: EmscriptenResponse
"""
timeout = request.timeout
js_abort_controller = js.AbortController.new()
headers = {k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE}
req_body = request.body
fetch_data = {
"headers": headers,
"body": to_js(req_body),
"method": request.method,
"signal": js_abort_controller.signal,
}
# Call JavaScript fetch (async api, returns a promise)
fetcher_promise_js = js.fetch(request.url, _obj_from_dict(fetch_data))
# Now suspend WebAssembly until we resolve that promise
# or time out.
response_js = _run_sync_with_timeout(
fetcher_promise_js,
timeout,
js_abort_controller,
request=request,
response=None,
)
headers = {}
header_iter = response_js.headers.entries()
while True:
iter_value_js = header_iter.next()
if getattr(iter_value_js, "done", False):
break
else:
headers[str(iter_value_js.value[0])] = str(iter_value_js.value[1])
status_code = response_js.status
body: bytes | io.RawIOBase = b""
response = EmscriptenResponse(
status_code=status_code, headers=headers, body=b"", request=request
)
if streaming:
# get via inputstream
if response_js.body is not None:
# get a reader from the fetch response
body_stream_js = response_js.body.getReader()
body = _JSPIReadStream(
body_stream_js, timeout, request, response, js_abort_controller
)
else:
# get directly via arraybuffer
# n.b. this is another async JavaScript call.
body = _run_sync_with_timeout(
response_js.arrayBuffer(),
timeout,
js_abort_controller,
request=request,
response=response,
).to_py()
response.body = body
return response
def _run_sync_with_timeout(
promise: Any,
timeout: float,
js_abort_controller: Any,
request: EmscriptenRequest | None,
response: EmscriptenResponse | None,
) -> Any:
"""
Await a JavaScript promise synchronously with a timeout which is implemented
via the AbortController
:param promise:
Javascript promise to await
:param timeout:
Timeout in seconds
:param js_abort_controller:
A JavaScript AbortController object, used on timeout
:param request:
The request being handled
:param response:
The response being handled (if it exists yet)
:raises _TimeoutError: If the request times out
:raises _RequestError: If the request raises a JavaScript exception
:return: The result of awaiting the promise.
"""
timer_id = None
if timeout > 0:
timer_id = js.setTimeout(
js_abort_controller.abort.bind(js_abort_controller), int(timeout * 1000)
)
try:
from pyodide.ffi import run_sync
# run_sync here uses WebAssembly JavaScript Promise Integration to
# suspend python until the JavaScript promise resolves.
return run_sync(promise)
except JsException as err:
if err.name == "AbortError":
raise _TimeoutError(
message="Request timed out", request=request, response=response
)
else:
raise _RequestError(message=err.message, request=request, response=response)
finally:
if timer_id is not None:
js.clearTimeout(timer_id)
def has_jspi() -> bool:
"""
Return true if jspi can be used.
This requires both browser support and also WebAssembly
to be in the correct state - i.e. that the javascript
call into python was async not sync.
:return: True if jspi can be used.
:rtype: bool
"""
try:
from pyodide.ffi import can_run_sync, run_sync # noqa: F401
return bool(can_run_sync())
except ImportError:
return False
def streaming_ready() -> bool | None:
if _fetcher:
return _fetcher.streaming_ready
else:
return None # no fetcher, return None to signify that
async def wait_for_streaming_ready() -> bool:
if _fetcher:
await _fetcher.js_worker_ready_promise
return True
else:
return False

View File

@ -0,0 +1,22 @@
from __future__ import annotations
from dataclasses import dataclass, field
from ..._base_connection import _TYPE_BODY
@dataclass
class EmscriptenRequest:
method: str
url: str
params: dict[str, str] | None = None
body: _TYPE_BODY | None = None
headers: dict[str, str] = field(default_factory=dict)
timeout: float = 0
decode_content: bool = True
def set_header(self, name: str, value: str) -> None:
self.headers[name.capitalize()] = value
def set_body(self, body: _TYPE_BODY | None) -> None:
self.body = body

View File

@ -0,0 +1,277 @@
from __future__ import annotations
import json as _json
import logging
import typing
from contextlib import contextmanager
from dataclasses import dataclass
from http.client import HTTPException as HTTPException
from io import BytesIO, IOBase
from ...exceptions import InvalidHeader, TimeoutError
from ...response import BaseHTTPResponse
from ...util.retry import Retry
from .request import EmscriptenRequest
if typing.TYPE_CHECKING:
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
log = logging.getLogger(__name__)
@dataclass
class EmscriptenResponse:
status_code: int
headers: dict[str, str]
body: IOBase | bytes
request: EmscriptenRequest
class EmscriptenHttpResponseWrapper(BaseHTTPResponse):
def __init__(
self,
internal_response: EmscriptenResponse,
url: str | None = None,
connection: BaseHTTPConnection | BaseHTTPSConnection | None = None,
):
self._pool = None # set by pool class
self._body = None
self._response = internal_response
self._url = url
self._connection = connection
self._closed = False
super().__init__(
headers=internal_response.headers,
status=internal_response.status_code,
request_url=url,
version=0,
version_string="HTTP/?",
reason="",
decode_content=True,
)
self.length_remaining = self._init_length(self._response.request.method)
self.length_is_certain = False
@property
def url(self) -> str | None:
return self._url
@url.setter
def url(self, url: str | None) -> None:
self._url = url
@property
def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None:
return self._connection
@property
def retries(self) -> Retry | None:
return self._retries
@retries.setter
def retries(self, retries: Retry | None) -> None:
# Override the request_url if retries has a redirect location.
self._retries = retries
def stream(
self, amt: int | None = 2**16, decode_content: bool | None = None
) -> typing.Generator[bytes]:
"""
A generator wrapper for the read() method. A call will block until
``amt`` bytes have been read from the connection or until the
connection is closed.
:param amt:
How much of the content to read. The generator will return up to
much data per iteration, but may return less. This is particularly
likely when using compressed data. However, the empty string will
never be returned.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
"""
while True:
data = self.read(amt=amt, decode_content=decode_content)
if data:
yield data
else:
break
def _init_length(self, request_method: str | None) -> int | None:
length: int | None
content_length: str | None = self.headers.get("content-length")
if content_length is not None:
try:
# RFC 7230 section 3.3.2 specifies multiple content lengths can
# be sent in a single Content-Length header
# (e.g. Content-Length: 42, 42). This line ensures the values
# are all valid ints and that as long as the `set` length is 1,
# all values are the same. Otherwise, the header is invalid.
lengths = {int(val) for val in content_length.split(",")}
if len(lengths) > 1:
raise InvalidHeader(
"Content-Length contained multiple "
"unmatching values (%s)" % content_length
)
length = lengths.pop()
except ValueError:
length = None
else:
if length < 0:
length = None
else: # if content_length is None
length = None
# Check for responses that shouldn't include a body
if (
self.status in (204, 304)
or 100 <= self.status < 200
or request_method == "HEAD"
):
length = 0
return length
def read(
self,
amt: int | None = None,
decode_content: bool | None = None, # ignored because browser decodes always
cache_content: bool = False,
) -> bytes:
if (
self._closed
or self._response is None
or (isinstance(self._response.body, IOBase) and self._response.body.closed)
):
return b""
with self._error_catcher():
# body has been preloaded as a string by XmlHttpRequest
if not isinstance(self._response.body, IOBase):
self.length_remaining = len(self._response.body)
self.length_is_certain = True
# wrap body in IOStream
self._response.body = BytesIO(self._response.body)
if amt is not None and amt >= 0:
# don't cache partial content
cache_content = False
data = self._response.body.read(amt)
else: # read all we can (and cache it)
data = self._response.body.read()
if cache_content:
self._body = data
if self.length_remaining is not None:
self.length_remaining = max(self.length_remaining - len(data), 0)
if len(data) == 0 or (
self.length_is_certain and self.length_remaining == 0
):
# definitely finished reading, close response stream
self._response.body.close()
return typing.cast(bytes, data)
def read_chunked(
self,
amt: int | None = None,
decode_content: bool | None = None,
) -> typing.Generator[bytes]:
# chunked is handled by browser
while True:
bytes = self.read(amt, decode_content)
if not bytes:
break
yield bytes
def release_conn(self) -> None:
if not self._pool or not self._connection:
return None
self._pool._put_conn(self._connection)
self._connection = None
def drain_conn(self) -> None:
self.close()
@property
def data(self) -> bytes:
if self._body:
return self._body
else:
return self.read(cache_content=True)
def json(self) -> typing.Any:
"""
Deserializes the body of the HTTP response as a Python object.
The body of the HTTP response must be encoded using UTF-8, as per
`RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
your custom decoder instead.
If the body of the HTTP response is not decodable to UTF-8, a
`UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
valid JSON document, a `json.JSONDecodeError` will be raised.
Read more :ref:`here <json_content>`.
:returns: The body of the HTTP response as a Python object.
"""
data = self.data.decode("utf-8")
return _json.loads(data)
def close(self) -> None:
if not self._closed:
if isinstance(self._response.body, IOBase):
self._response.body.close()
if self._connection:
self._connection.close()
self._connection = None
self._closed = True
@contextmanager
def _error_catcher(self) -> typing.Generator[None]:
"""
Catch Emscripten specific exceptions thrown by fetch.py,
instead re-raising urllib3 variants, so that low-level exceptions
are not leaked in the high-level api.
On exit, release the connection back to the pool.
"""
from .fetch import _RequestError, _TimeoutError # avoid circular import
clean_exit = False
try:
yield
# If no exception is thrown, we should avoid cleaning up
# unnecessarily.
clean_exit = True
except _TimeoutError as e:
raise TimeoutError(str(e))
except _RequestError as e:
raise HTTPException(str(e))
finally:
# If we didn't terminate cleanly, we need to throw away our
# connection.
if not clean_exit:
# The response may not be closed but we're not going to use it
# anymore so close it now
if (
isinstance(self._response.body, IOBase)
and not self._response.body.closed
):
self._response.body.close()
# release the connection back to the pool
self.release_conn()
else:
# If we have read everything from the response stream,
# return the connection back to the pool.
if (
isinstance(self._response.body, IOBase)
and self._response.body.closed
):
self.release_conn()

View File

@ -0,0 +1,564 @@
"""
Module for using pyOpenSSL as a TLS backend. This module was relevant before
the standard library ``ssl`` module supported SNI, but now that we've dropped
support for Python 2.7 all relevant Python versions support SNI so
**this module is no longer recommended**.
This needs the following packages installed:
* `pyOpenSSL`_ (tested with 16.0.0)
* `cryptography`_ (minimum 1.3.4, from pyopenssl)
* `idna`_ (minimum 2.0)
However, pyOpenSSL depends on cryptography, so while we use all three directly here we
end up having relatively few packages required.
You can install them with the following command:
.. code-block:: bash
$ python -m pip install pyopenssl cryptography idna
To activate certificate checking, call
:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
before you begin making HTTP requests. This can be done in a ``sitecustomize``
module, or at any other time before your application begins using ``urllib3``,
like this:
.. code-block:: python
try:
import urllib3.contrib.pyopenssl
urllib3.contrib.pyopenssl.inject_into_urllib3()
except ImportError:
pass
.. _pyopenssl: https://www.pyopenssl.org
.. _cryptography: https://cryptography.io
.. _idna: https://github.com/kjd/idna
"""
from __future__ import annotations
import OpenSSL.SSL # type: ignore[import-untyped]
from cryptography import x509
try:
from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined]
except ImportError:
# UnsupportedExtension is gone in cryptography >= 2.1.0
class UnsupportedExtension(Exception): # type: ignore[no-redef]
pass
import logging
import ssl
import typing
from io import BytesIO
from socket import socket as socket_cls
from socket import timeout
from .. import util
if typing.TYPE_CHECKING:
from OpenSSL.crypto import X509 # type: ignore[import-untyped]
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# Map from urllib3 to PyOpenSSL compatible parameter-values.
_openssl_versions: dict[int, int] = {
util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
}
if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
if hasattr(ssl, "PROTOCOL_TLSv1_2") and hasattr(OpenSSL.SSL, "TLSv1_2_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
_stdlib_to_openssl_verify = {
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()}
# The SSLvX values are the most likely to be missing in the future
# but we check them all just to be sure.
_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr(
OpenSSL.SSL, "OP_NO_SSLv3", 0
)
_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0)
_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0)
_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0)
_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0)
_openssl_to_ssl_minimum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1,
ssl.TLSVersion.TLSv1_3: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
ssl.TLSVersion.MAXIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
}
_openssl_to_ssl_maximum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3
| _OP_NO_TLSv1
| _OP_NO_TLSv1_1
| _OP_NO_TLSv1_2
| _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
}
# OpenSSL will only write 16K at a time
SSL_WRITE_BLOCKSIZE = 16384
orig_util_SSLContext = util.ssl_.SSLContext
log = logging.getLogger(__name__)
def inject_into_urllib3() -> None:
"Monkey-patch urllib3 with PyOpenSSL-backed SSL-support."
_validate_dependencies_met()
util.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.IS_PYOPENSSL = True
util.ssl_.IS_PYOPENSSL = True
def extract_from_urllib3() -> None:
"Undo monkey-patching by :func:`inject_into_urllib3`."
util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext
util.IS_PYOPENSSL = False
util.ssl_.IS_PYOPENSSL = False
def _validate_dependencies_met() -> None:
"""
Verifies that PyOpenSSL's package-level dependencies have been met.
Throws `ImportError` if they are not met.
"""
# Method added in `cryptography==1.1`; not available in older versions
from cryptography.x509.extensions import Extensions
if getattr(Extensions, "get_extension_for_class", None) is None:
raise ImportError(
"'cryptography' module missing required functionality. "
"Try upgrading to v1.3.4 or newer."
)
# pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509
# attribute is only present on those versions.
from OpenSSL.crypto import X509
x509 = X509()
if getattr(x509, "_x509", None) is None:
raise ImportError(
"'pyOpenSSL' module missing required functionality. "
"Try upgrading to v0.14 or newer."
)
def _dnsname_to_stdlib(name: str) -> str | None:
"""
Converts a dNSName SubjectAlternativeName field to the form used by the
standard library on the given Python version.
Cryptography produces a dNSName as a unicode string that was idna-decoded
from ASCII bytes. We need to idna-encode that string to get it back, and
then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib
uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8).
If the name cannot be idna-encoded then we return None signalling that
the name given should be skipped.
"""
def idna_encode(name: str) -> bytes | None:
"""
Borrowed wholesale from the Python Cryptography Project. It turns out
that we can't just safely call `idna.encode`: it can explode for
wildcard names. This avoids that problem.
"""
import idna
try:
for prefix in ["*.", "."]:
if name.startswith(prefix):
name = name[len(prefix) :]
return prefix.encode("ascii") + idna.encode(name)
return idna.encode(name)
except idna.core.IDNAError:
return None
# Don't send IPv6 addresses through the IDNA encoder.
if ":" in name:
return name
encoded_name = idna_encode(name)
if encoded_name is None:
return None
return encoded_name.decode("utf-8")
def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]:
"""
Given an PyOpenSSL certificate, provides all the subject alternative names.
"""
cert = peer_cert.to_cryptography()
# We want to find the SAN extension. Ask Cryptography to locate it (it's
# faster than looping in Python)
try:
ext = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value
except x509.ExtensionNotFound:
# No such extension, return the empty list.
return []
except (
x509.DuplicateExtension,
UnsupportedExtension,
x509.UnsupportedGeneralNameType,
UnicodeError,
) as e:
# A problem has been found with the quality of the certificate. Assume
# no SAN field is present.
log.warning(
"A problem was encountered with the certificate that prevented "
"urllib3 from finding the SubjectAlternativeName field. This can "
"affect certificate validation. The error was %s",
e,
)
return []
# We want to return dNSName and iPAddress fields. We need to cast the IPs
# back to strings because the match_hostname function wants them as
# strings.
# Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8
# decoded. This is pretty frustrating, but that's what the standard library
# does with certificates, and so we need to attempt to do the same.
# We also want to skip over names which cannot be idna encoded.
names = [
("DNS", name)
for name in map(_dnsname_to_stdlib, ext.get_values_for_type(x509.DNSName))
if name is not None
]
names.extend(
("IP Address", str(name)) for name in ext.get_values_for_type(x509.IPAddress)
)
return names
class WrappedSocket:
"""API-compatibility wrapper for Python OpenSSL's Connection-class."""
def __init__(
self,
connection: OpenSSL.SSL.Connection,
socket: socket_cls,
suppress_ragged_eofs: bool = True,
) -> None:
self.connection = connection
self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs
self._io_refs = 0
self._closed = False
def fileno(self) -> int:
return self.socket.fileno()
# Copy-pasted from Python 3.5 source code
def _decref_socketios(self) -> None:
if self._io_refs > 0:
self._io_refs -= 1
if self._closed:
self.close()
def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes:
try:
data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return b""
else:
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return b""
else:
raise
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") from e
else:
return self.recv(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"read error: {e!r}") from e
else:
return data # type: ignore[no-any-return]
def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int:
try:
return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return]
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return 0
else:
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return 0
else:
raise
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") from e
else:
return self.recv_into(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"read error: {e!r}") from e
def settimeout(self, timeout: float) -> None:
return self.socket.settimeout(timeout)
def _send_until_done(self, data: bytes) -> int:
while True:
try:
return self.connection.send(data) # type: ignore[no-any-return]
except OpenSSL.SSL.WantWriteError as e:
if not util.wait_for_write(self.socket, self.socket.gettimeout()):
raise timeout() from e
continue
except OpenSSL.SSL.SysCallError as e:
raise OSError(e.args[0], str(e)) from e
def sendall(self, data: bytes) -> None:
total_sent = 0
while total_sent < len(data):
sent = self._send_until_done(
data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE]
)
total_sent += sent
def shutdown(self, how: int) -> None:
try:
self.connection.shutdown()
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"shutdown error: {e!r}") from e
def close(self) -> None:
self._closed = True
if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
try:
return self.connection.close() # type: ignore[no-any-return]
except OpenSSL.SSL.Error:
return
def getpeercert(
self, binary_form: bool = False
) -> dict[str, list[typing.Any]] | None:
x509 = self.connection.get_peer_certificate()
if not x509:
return x509 # type: ignore[no-any-return]
if binary_form:
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return]
return {
"subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item]
"subjectAltName": get_subj_alt_name(x509),
}
def version(self) -> str:
return self.connection.get_protocol_version_name() # type: ignore[no-any-return]
def selected_alpn_protocol(self) -> str | None:
alpn_proto = self.connection.get_alpn_proto_negotiated()
return alpn_proto.decode() if alpn_proto else None
WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined]
class PyOpenSSLContext:
"""
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
for translating the interface of the standard library ``SSLContext`` object
to calls into PyOpenSSL.
"""
def __init__(self, protocol: int) -> None:
self.protocol = _openssl_versions[protocol]
self._ctx = OpenSSL.SSL.Context(self.protocol)
self._options = 0
self.check_hostname = False
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
self._verify_flags: int = ssl.VERIFY_X509_TRUSTED_FIRST
@property
def options(self) -> int:
return self._options
@options.setter
def options(self, value: int) -> None:
self._options = value
self._set_ctx_options()
@property
def verify_flags(self) -> int:
return self._verify_flags
@verify_flags.setter
def verify_flags(self, value: int) -> None:
self._verify_flags = value
self._ctx.get_cert_store().set_flags(self._verify_flags)
@property
def verify_mode(self) -> int:
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
@verify_mode.setter
def verify_mode(self, value: ssl.VerifyMode) -> None:
self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback)
def set_default_verify_paths(self) -> None:
self._ctx.set_default_verify_paths()
def set_ciphers(self, ciphers: bytes | str) -> None:
if isinstance(ciphers, str):
ciphers = ciphers.encode("utf-8")
self._ctx.set_cipher_list(ciphers)
def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
if cafile is not None:
cafile = cafile.encode("utf-8") # type: ignore[assignment]
if capath is not None:
capath = capath.encode("utf-8") # type: ignore[assignment]
try:
self._ctx.load_verify_locations(cafile, capath)
if cadata is not None:
self._ctx.load_verify_locations(BytesIO(cadata))
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e
def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
try:
self._ctx.use_certificate_chain_file(certfile)
if password is not None:
if not isinstance(password, bytes):
password = password.encode("utf-8") # type: ignore[assignment]
self._ctx.set_passwd_cb(lambda *_: password)
self._ctx.use_privatekey_file(keyfile or certfile)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e
def set_alpn_protocols(self, protocols: list[bytes | str]) -> None:
protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return]
def wrap_socket(
self,
sock: socket_cls,
server_side: bool = False,
do_handshake_on_connect: bool = True,
suppress_ragged_eofs: bool = True,
server_hostname: bytes | str | None = None,
) -> WrappedSocket:
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
# If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3
if server_hostname and not util.ssl_.is_ipaddress(server_hostname):
if isinstance(server_hostname, str):
server_hostname = server_hostname.encode("utf-8")
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()
while True:
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(sock, sock.gettimeout()):
raise timeout("select timed out") from e
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"bad handshake: {e!r}") from e
break
return WrappedSocket(cnx, sock)
def _set_ctx_options(self) -> None:
self._ctx.set_options(
self._options
| _openssl_to_ssl_minimum_version[self._minimum_version]
| _openssl_to_ssl_maximum_version[self._maximum_version]
)
@property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
self._set_ctx_options()
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version
self._set_ctx_options()
def _verify_callback(
cnx: OpenSSL.SSL.Connection,
x509: X509,
err_no: int,
err_depth: int,
return_code: int,
) -> bool:
return err_no == 0

View File

@ -0,0 +1,228 @@
"""
This module contains provisional support for SOCKS proxies from within
urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
SOCKS5. To enable its functionality, either install PySocks or install this
module with the ``socks`` extra.
The SOCKS implementation supports the full range of urllib3 features. It also
supports the following SOCKS features:
- SOCKS4A (``proxy_url='socks4a://...``)
- SOCKS4 (``proxy_url='socks4://...``)
- SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
- SOCKS5 with local DNS (``proxy_url='socks5://...``)
- Usernames and passwords for the SOCKS proxy
.. note::
It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
your ``proxy_url`` to ensure that DNS resolution is done from the remote
server instead of client-side when connecting to a domain name.
SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
supports IPv4, IPv6, and domain names.
When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
will be sent as the ``userid`` section of the SOCKS request:
.. code-block:: python
proxy_url="socks4a://<userid>@proxy-host"
When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
of the ``proxy_url`` will be sent as the username/password to authenticate
with the proxy:
.. code-block:: python
proxy_url="socks5h://<username>:<password>@proxy-host"
"""
from __future__ import annotations
try:
import socks # type: ignore[import-not-found]
except ImportError:
import warnings
from ..exceptions import DependencyWarning
warnings.warn(
(
"SOCKS support in urllib3 requires the installation of optional "
"dependencies: specifically, PySocks. For more information, see "
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies"
),
DependencyWarning,
)
raise
import typing
from socket import timeout as SocketTimeout
from ..connection import HTTPConnection, HTTPSConnection
from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from ..exceptions import ConnectTimeoutError, NewConnectionError
from ..poolmanager import PoolManager
from ..util.url import parse_url
try:
import ssl
except ImportError:
ssl = None # type: ignore[assignment]
class _TYPE_SOCKS_OPTIONS(typing.TypedDict):
socks_version: int
proxy_host: str | None
proxy_port: str | None
username: str | None
password: str | None
rdns: bool
class SOCKSConnection(HTTPConnection):
"""
A plain-text HTTP connection that connects via a SOCKS proxy.
"""
def __init__(
self,
_socks_options: _TYPE_SOCKS_OPTIONS,
*args: typing.Any,
**kwargs: typing.Any,
) -> None:
self._socks_options = _socks_options
super().__init__(*args, **kwargs)
def _new_conn(self) -> socks.socksocket:
"""
Establish a new connection via the SOCKS proxy.
"""
extra_kw: dict[str, typing.Any] = {}
if self.source_address:
extra_kw["source_address"] = self.source_address
if self.socket_options:
extra_kw["socket_options"] = self.socket_options
try:
conn = socks.create_connection(
(self.host, self.port),
proxy_type=self._socks_options["socks_version"],
proxy_addr=self._socks_options["proxy_host"],
proxy_port=self._socks_options["proxy_port"],
proxy_username=self._socks_options["username"],
proxy_password=self._socks_options["password"],
proxy_rdns=self._socks_options["rdns"],
timeout=self.timeout,
**extra_kw,
)
except SocketTimeout as e:
raise ConnectTimeoutError(
self,
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
except socks.ProxyError as e:
# This is fragile as hell, but it seems to be the only way to raise
# useful errors here.
if e.socket_err:
error = e.socket_err
if isinstance(error, SocketTimeout):
raise ConnectTimeoutError(
self,
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
else:
# Adding `from e` messes with coverage somehow, so it's omitted.
# See #2386.
raise NewConnectionError(
self, f"Failed to establish a new connection: {error}"
)
else:
raise NewConnectionError(
self, f"Failed to establish a new connection: {e}"
) from e
except OSError as e: # Defensive: PySocks should catch all these.
raise NewConnectionError(
self, f"Failed to establish a new connection: {e}"
) from e
return conn
# We don't need to duplicate the Verified/Unverified distinction from
# urllib3/connection.py here because the HTTPSConnection will already have been
# correctly set to either the Verified or Unverified form by that module. This
# means the SOCKSHTTPSConnection will automatically be the correct type.
class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
pass
class SOCKSHTTPConnectionPool(HTTPConnectionPool):
ConnectionCls = SOCKSConnection
class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
ConnectionCls = SOCKSHTTPSConnection
class SOCKSProxyManager(PoolManager):
"""
A version of the urllib3 ProxyManager that routes connections via the
defined SOCKS proxy.
"""
pool_classes_by_scheme = {
"http": SOCKSHTTPConnectionPool,
"https": SOCKSHTTPSConnectionPool,
}
def __init__(
self,
proxy_url: str,
username: str | None = None,
password: str | None = None,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw: typing.Any,
):
parsed = parse_url(proxy_url)
if username is None and password is None and parsed.auth is not None:
split = parsed.auth.split(":")
if len(split) == 2:
username, password = split
if parsed.scheme == "socks5":
socks_version = socks.PROXY_TYPE_SOCKS5
rdns = False
elif parsed.scheme == "socks5h":
socks_version = socks.PROXY_TYPE_SOCKS5
rdns = True
elif parsed.scheme == "socks4":
socks_version = socks.PROXY_TYPE_SOCKS4
rdns = False
elif parsed.scheme == "socks4a":
socks_version = socks.PROXY_TYPE_SOCKS4
rdns = True
else:
raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
self.proxy_url = proxy_url
socks_options = {
"socks_version": socks_version,
"proxy_host": parsed.host,
"proxy_port": parsed.port,
"username": username,
"password": password,
"rdns": rdns,
}
connection_pool_kw["_socks_options"] = socks_options
super().__init__(num_pools, headers, **connection_pool_kw)
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme

View File

@ -0,0 +1,335 @@
from __future__ import annotations
import socket
import typing
import warnings
from email.errors import MessageDefect
from http.client import IncompleteRead as httplib_IncompleteRead
if typing.TYPE_CHECKING:
from .connection import HTTPConnection
from .connectionpool import ConnectionPool
from .response import HTTPResponse
from .util.retry import Retry
# Base Exceptions
class HTTPError(Exception):
"""Base exception used by this module."""
class HTTPWarning(Warning):
"""Base warning used by this module."""
_TYPE_REDUCE_RESULT = tuple[typing.Callable[..., object], tuple[object, ...]]
class PoolError(HTTPError):
"""Base exception for errors caused within a pool."""
def __init__(self, pool: ConnectionPool, message: str) -> None:
self.pool = pool
self._message = message
super().__init__(f"{pool}: {message}")
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self._message)
class RequestError(PoolError):
"""Base exception for PoolErrors that have associated URLs."""
def __init__(self, pool: ConnectionPool, url: str, message: str) -> None:
self.url = url
super().__init__(pool, message)
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self.url, self._message)
class SSLError(HTTPError):
"""Raised when SSL certificate fails in an HTTPS connection."""
class ProxyError(HTTPError):
"""Raised when the connection to a proxy fails."""
# The original error is also available as __cause__.
original_error: Exception
def __init__(self, message: str, error: Exception) -> None:
super().__init__(message, error)
self.original_error = error
class DecodeError(HTTPError):
"""Raised when automatic decoding based on Content-Type fails."""
class ProtocolError(HTTPError):
"""Raised when something unexpected happens mid-request/response."""
#: Renamed to ProtocolError but aliased for backwards compatibility.
ConnectionError = ProtocolError
# Leaf Exceptions
class MaxRetryError(RequestError):
"""Raised when the maximum number of retries is exceeded.
:param pool: The connection pool
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
:param str url: The requested Url
:param reason: The underlying error
:type reason: :class:`Exception`
"""
def __init__(
self, pool: ConnectionPool, url: str, reason: Exception | None = None
) -> None:
self.reason = reason
message = f"Max retries exceeded with url: {url} (Caused by {reason!r})"
super().__init__(pool, url, message)
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self.url, self.reason)
class HostChangedError(RequestError):
"""Raised when an existing pool gets a request for a foreign host."""
def __init__(
self, pool: ConnectionPool, url: str, retries: Retry | int = 3
) -> None:
message = f"Tried to open a foreign host with url: {url}"
super().__init__(pool, url, message)
self.retries = retries
class TimeoutStateError(HTTPError):
"""Raised when passing an invalid state to a timeout"""
class TimeoutError(HTTPError):
"""Raised when a socket timeout error occurs.
Catching this error will catch both :exc:`ReadTimeoutErrors
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
"""
class ReadTimeoutError(TimeoutError, RequestError):
"""Raised when a socket timeout occurs while receiving data from a server"""
# This timeout error does not have a URL attached and needs to inherit from the
# base HTTPError
class ConnectTimeoutError(TimeoutError):
"""Raised when a socket timeout occurs while connecting to a server"""
class NewConnectionError(ConnectTimeoutError, HTTPError):
"""Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
def __init__(self, conn: HTTPConnection, message: str) -> None:
self.conn = conn
self._message = message
super().__init__(f"{conn}: {message}")
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (None, self._message)
@property
def pool(self) -> HTTPConnection:
warnings.warn(
"The 'pool' property is deprecated and will be removed "
"in urllib3 v2.1.0. Use 'conn' instead.",
DeprecationWarning,
stacklevel=2,
)
return self.conn
class NameResolutionError(NewConnectionError):
"""Raised when host name resolution fails."""
def __init__(self, host: str, conn: HTTPConnection, reason: socket.gaierror):
message = f"Failed to resolve '{host}' ({reason})"
self._host = host
self._reason = reason
super().__init__(conn, message)
def __reduce__(self) -> _TYPE_REDUCE_RESULT:
# For pickling purposes.
return self.__class__, (self._host, None, self._reason)
class EmptyPoolError(PoolError):
"""Raised when a pool runs out of connections and no more are allowed."""
class FullPoolError(PoolError):
"""Raised when we try to add a connection to a full pool in blocking mode."""
class ClosedPoolError(PoolError):
"""Raised when a request enters a pool after the pool has been closed."""
class LocationValueError(ValueError, HTTPError):
"""Raised when there is something wrong with a given URL input."""
class LocationParseError(LocationValueError):
"""Raised when get_host or similar fails to parse the URL input."""
def __init__(self, location: str) -> None:
message = f"Failed to parse: {location}"
super().__init__(message)
self.location = location
class URLSchemeUnknown(LocationValueError):
"""Raised when a URL input has an unsupported scheme."""
def __init__(self, scheme: str):
message = f"Not supported URL scheme {scheme}"
super().__init__(message)
self.scheme = scheme
class ResponseError(HTTPError):
"""Used as a container for an error reason supplied in a MaxRetryError."""
GENERIC_ERROR = "too many error responses"
SPECIFIC_ERROR = "too many {status_code} error responses"
class SecurityWarning(HTTPWarning):
"""Warned when performing security reducing actions"""
class InsecureRequestWarning(SecurityWarning):
"""Warned when making an unverified HTTPS request."""
class NotOpenSSLWarning(SecurityWarning):
"""Warned when using unsupported SSL library"""
class SystemTimeWarning(SecurityWarning):
"""Warned when system time is suspected to be wrong"""
class InsecurePlatformWarning(SecurityWarning):
"""Warned when certain TLS/SSL configuration is not available on a platform."""
class DependencyWarning(HTTPWarning):
"""
Warned when an attempt is made to import a module with missing optional
dependencies.
"""
class ResponseNotChunked(ProtocolError, ValueError):
"""Response needs to be chunked in order to read it as chunks."""
class BodyNotHttplibCompatible(HTTPError):
"""
Body should be :class:`http.client.HTTPResponse` like
(have an fp attribute which returns raw chunks) for read_chunked().
"""
class IncompleteRead(HTTPError, httplib_IncompleteRead):
"""
Response length doesn't match expected Content-Length
Subclass of :class:`http.client.IncompleteRead` to allow int value
for ``partial`` to avoid creating large objects on streamed reads.
"""
partial: int # type: ignore[assignment]
expected: int
def __init__(self, partial: int, expected: int) -> None:
self.partial = partial
self.expected = expected
def __repr__(self) -> str:
return "IncompleteRead(%i bytes read, %i more expected)" % (
self.partial,
self.expected,
)
class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
"""Invalid chunk length in a chunked response."""
def __init__(self, response: HTTPResponse, length: bytes) -> None:
self.partial: int = response.tell() # type: ignore[assignment]
self.expected: int | None = response.length_remaining
self.response = response
self.length = length
def __repr__(self) -> str:
return "InvalidChunkLength(got length %r, %i bytes read)" % (
self.length,
self.partial,
)
class InvalidHeader(HTTPError):
"""The header provided was somehow invalid."""
class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
"""ProxyManager does not support the supplied scheme"""
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
def __init__(self, scheme: str | None) -> None:
# 'localhost' is here because our URL parser parses
# localhost:8080 -> scheme=localhost, remove if we fix this.
if scheme == "localhost":
scheme = None
if scheme is None:
message = "Proxy URL had no scheme, should start with http:// or https://"
else:
message = f"Proxy URL had unsupported scheme {scheme}, should use http:// or https://"
super().__init__(message)
class ProxySchemeUnsupported(ValueError):
"""Fetching HTTPS resources through HTTPS proxies is unsupported"""
class HeaderParsingError(HTTPError):
"""Raised by assert_header_parsing, but we convert it to a log.warning statement."""
def __init__(
self, defects: list[MessageDefect], unparsed_data: bytes | str | None
) -> None:
message = f"{defects or 'Unknown'}, unparsed data: {unparsed_data!r}"
super().__init__(message)
class UnrewindableBodyError(HTTPError):
"""urllib3 encountered an error when trying to rewind a body"""

View File

@ -0,0 +1,341 @@
from __future__ import annotations
import email.utils
import mimetypes
import typing
_TYPE_FIELD_VALUE = typing.Union[str, bytes]
_TYPE_FIELD_VALUE_TUPLE = typing.Union[
_TYPE_FIELD_VALUE,
tuple[str, _TYPE_FIELD_VALUE],
tuple[str, _TYPE_FIELD_VALUE, str],
]
def guess_content_type(
filename: str | None, default: str = "application/octet-stream"
) -> str:
"""
Guess the "Content-Type" of a file.
:param filename:
The filename to guess the "Content-Type" of using :mod:`mimetypes`.
:param default:
If no "Content-Type" can be guessed, default to `default`.
"""
if filename:
return mimetypes.guess_type(filename)[0] or default
return default
def format_header_param_rfc2231(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Helper function to format and quote a single header parameter using the
strategy defined in RFC 2231.
Particularly useful for header parameters which might contain
non-ASCII values, like file names. This follows
`RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_.
:param name:
The name of the parameter, a string expected to be ASCII only.
:param value:
The value of the parameter, provided as ``bytes`` or `str``.
:returns:
An RFC-2231-formatted unicode string.
.. deprecated:: 2.0.0
Will be removed in urllib3 v2.1.0. This is not valid for
``multipart/form-data`` header parameters.
"""
import warnings
warnings.warn(
"'format_header_param_rfc2231' is deprecated and will be "
"removed in urllib3 v2.1.0. This is not valid for "
"multipart/form-data header parameters.",
DeprecationWarning,
stacklevel=2,
)
if isinstance(value, bytes):
value = value.decode("utf-8")
if not any(ch in value for ch in '"\\\r\n'):
result = f'{name}="{value}"'
try:
result.encode("ascii")
except (UnicodeEncodeError, UnicodeDecodeError):
pass
else:
return result
value = email.utils.encode_rfc2231(value, "utf-8")
value = f"{name}*={value}"
return value
def format_multipart_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Format and quote a single multipart header parameter.
This follows the `WHATWG HTML Standard`_ as of 2021/06/10, matching
the behavior of current browser and curl versions. Values are
assumed to be UTF-8. The ``\\n``, ``\\r``, and ``"`` characters are
percent encoded.
.. _WHATWG HTML Standard:
https://html.spec.whatwg.org/multipage/
form-control-infrastructure.html#multipart-form-data
:param name:
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:returns:
A string ``name="value"`` with the escaped value.
.. versionchanged:: 2.0.0
Matches the WHATWG HTML Standard as of 2021/06/10. Control
characters are no longer percent encoded.
.. versionchanged:: 2.0.0
Renamed from ``format_header_param_html5`` and
``format_header_param``. The old names will be removed in
urllib3 v2.1.0.
"""
if isinstance(value, bytes):
value = value.decode("utf-8")
# percent encode \n \r "
value = value.translate({10: "%0A", 13: "%0D", 34: "%22"})
return f'{name}="{value}"'
def format_header_param_html5(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param_html5' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
def format_header_param(name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
.. deprecated:: 2.0.0
Renamed to :func:`format_multipart_header_param`. Will be
removed in urllib3 v2.1.0.
"""
import warnings
warnings.warn(
"'format_header_param' has been renamed to "
"'format_multipart_header_param'. The old name will be "
"removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
return format_multipart_header_param(name, value)
class RequestField:
"""
A data container for request body parameters.
:param name:
The name of this request field. Must be unicode.
:param data:
The data/value body.
:param filename:
An optional filename of the request field. Must be unicode.
:param headers:
An optional dict-like object of headers to initially use for the field.
.. versionchanged:: 2.0.0
The ``header_formatter`` parameter is deprecated and will
be removed in urllib3 v2.1.0.
"""
def __init__(
self,
name: str,
data: _TYPE_FIELD_VALUE,
filename: str | None = None,
headers: typing.Mapping[str, str] | None = None,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
):
self._name = name
self._filename = filename
self.data = data
self.headers: dict[str, str | None] = {}
if headers:
self.headers = dict(headers)
if header_formatter is not None:
import warnings
warnings.warn(
"The 'header_formatter' parameter is deprecated and "
"will be removed in urllib3 v2.1.0.",
DeprecationWarning,
stacklevel=2,
)
self.header_formatter = header_formatter
else:
self.header_formatter = format_multipart_header_param
@classmethod
def from_tuples(
cls,
fieldname: str,
value: _TYPE_FIELD_VALUE_TUPLE,
header_formatter: typing.Callable[[str, _TYPE_FIELD_VALUE], str] | None = None,
) -> RequestField:
"""
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
Supports constructing :class:`~urllib3.fields.RequestField` from
parameter of key/value strings AND key/filetuple. A filetuple is a
(filename, data, MIME type) tuple where the MIME type is optional.
For example::
'foo': 'bar',
'fakefile': ('foofile.txt', 'contents of foofile'),
'realfile': ('barfile.txt', open('realfile').read()),
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
'nonamefile': 'contents of nonamefile field',
Field names and filenames must be unicode.
"""
filename: str | None
content_type: str | None
data: _TYPE_FIELD_VALUE
if isinstance(value, tuple):
if len(value) == 3:
filename, data, content_type = value
else:
filename, data = value
content_type = guess_content_type(filename)
else:
filename = None
content_type = None
data = value
request_param = cls(
fieldname, data, filename=filename, header_formatter=header_formatter
)
request_param.make_multipart(content_type=content_type)
return request_param
def _render_part(self, name: str, value: _TYPE_FIELD_VALUE) -> str:
"""
Override this method to change how each multipart header
parameter is formatted. By default, this calls
:func:`format_multipart_header_param`.
:param name:
The name of the parameter, an ASCII-only ``str``.
:param value:
The value of the parameter, a ``str`` or UTF-8 encoded
``bytes``.
:meta public:
"""
return self.header_formatter(name, value)
def _render_parts(
self,
header_parts: (
dict[str, _TYPE_FIELD_VALUE | None]
| typing.Sequence[tuple[str, _TYPE_FIELD_VALUE | None]]
),
) -> str:
"""
Helper function to format and quote a single header.
Useful for single headers that are composed of multiple items. E.g.,
'Content-Disposition' fields.
:param header_parts:
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
as `k1="v1"; k2="v2"; ...`.
"""
iterable: typing.Iterable[tuple[str, _TYPE_FIELD_VALUE | None]]
parts = []
if isinstance(header_parts, dict):
iterable = header_parts.items()
else:
iterable = header_parts
for name, value in iterable:
if value is not None:
parts.append(self._render_part(name, value))
return "; ".join(parts)
def render_headers(self) -> str:
"""
Renders the headers for this request field.
"""
lines = []
sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
for sort_key in sort_keys:
if self.headers.get(sort_key, False):
lines.append(f"{sort_key}: {self.headers[sort_key]}")
for header_name, header_value in self.headers.items():
if header_name not in sort_keys:
if header_value:
lines.append(f"{header_name}: {header_value}")
lines.append("\r\n")
return "\r\n".join(lines)
def make_multipart(
self,
content_disposition: str | None = None,
content_type: str | None = None,
content_location: str | None = None,
) -> None:
"""
Makes this request field into a multipart request field.
This method overrides "Content-Disposition", "Content-Type" and
"Content-Location" headers to the request parameter.
:param content_disposition:
The 'Content-Disposition' of the request body. Defaults to 'form-data'
:param content_type:
The 'Content-Type' of the request body.
:param content_location:
The 'Content-Location' of the request body.
"""
content_disposition = (content_disposition or "form-data") + "; ".join(
[
"",
self._render_parts(
(("name", self._name), ("filename", self._filename))
),
]
)
self.headers["Content-Disposition"] = content_disposition
self.headers["Content-Type"] = content_type
self.headers["Content-Location"] = content_location

View File

@ -0,0 +1,89 @@
from __future__ import annotations
import binascii
import codecs
import os
import typing
from io import BytesIO
from .fields import _TYPE_FIELD_VALUE_TUPLE, RequestField
writer = codecs.lookup("utf-8")[3]
_TYPE_FIELDS_SEQUENCE = typing.Sequence[
typing.Union[tuple[str, _TYPE_FIELD_VALUE_TUPLE], RequestField]
]
_TYPE_FIELDS = typing.Union[
_TYPE_FIELDS_SEQUENCE,
typing.Mapping[str, _TYPE_FIELD_VALUE_TUPLE],
]
def choose_boundary() -> str:
"""
Our embarrassingly-simple replacement for mimetools.choose_boundary.
"""
return binascii.hexlify(os.urandom(16)).decode()
def iter_field_objects(fields: _TYPE_FIELDS) -> typing.Iterable[RequestField]:
"""
Iterate over fields.
Supports list of (k, v) tuples and dicts, and lists of
:class:`~urllib3.fields.RequestField`.
"""
iterable: typing.Iterable[RequestField | tuple[str, _TYPE_FIELD_VALUE_TUPLE]]
if isinstance(fields, typing.Mapping):
iterable = fields.items()
else:
iterable = fields
for field in iterable:
if isinstance(field, RequestField):
yield field
else:
yield RequestField.from_tuples(*field)
def encode_multipart_formdata(
fields: _TYPE_FIELDS, boundary: str | None = None
) -> tuple[bytes, str]:
"""
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
:param fields:
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
Values are processed by :func:`urllib3.fields.RequestField.from_tuples`.
:param boundary:
If not specified, then a random boundary will be generated using
:func:`urllib3.filepost.choose_boundary`.
"""
body = BytesIO()
if boundary is None:
boundary = choose_boundary()
for field in iter_field_objects(fields):
body.write(f"--{boundary}\r\n".encode("latin-1"))
writer(body).write(field.render_headers())
data = field.data
if isinstance(data, int):
data = str(data) # Backwards compatibility
if isinstance(data, str):
writer(body).write(data)
else:
body.write(data)
body.write(b"\r\n")
body.write(f"--{boundary}--\r\n".encode("latin-1"))
content_type = f"multipart/form-data; boundary={boundary}"
return body.getvalue(), content_type

View File

@ -0,0 +1,53 @@
from __future__ import annotations
from importlib.metadata import version
__all__ = [
"inject_into_urllib3",
"extract_from_urllib3",
]
import typing
orig_HTTPSConnection: typing.Any = None
def inject_into_urllib3() -> None:
# First check if h2 version is valid
h2_version = version("h2")
if not h2_version.startswith("4."):
raise ImportError(
"urllib3 v2 supports h2 version 4.x.x, currently "
f"the 'h2' module is compiled with {h2_version!r}. "
"See: https://github.com/urllib3/urllib3/issues/3290"
)
# Import here to avoid circular dependencies.
from .. import connection as urllib3_connection
from .. import util as urllib3_util
from ..connectionpool import HTTPSConnectionPool
from ..util import ssl_ as urllib3_util_ssl
from .connection import HTTP2Connection
global orig_HTTPSConnection
orig_HTTPSConnection = urllib3_connection.HTTPSConnection
HTTPSConnectionPool.ConnectionCls = HTTP2Connection
urllib3_connection.HTTPSConnection = HTTP2Connection # type: ignore[misc]
# TODO: Offer 'http/1.1' as well, but for testing purposes this is handy.
urllib3_util.ALPN_PROTOCOLS = ["h2"]
urllib3_util_ssl.ALPN_PROTOCOLS = ["h2"]
def extract_from_urllib3() -> None:
from .. import connection as urllib3_connection
from .. import util as urllib3_util
from ..connectionpool import HTTPSConnectionPool
from ..util import ssl_ as urllib3_util_ssl
HTTPSConnectionPool.ConnectionCls = orig_HTTPSConnection
urllib3_connection.HTTPSConnection = orig_HTTPSConnection # type: ignore[misc]
urllib3_util.ALPN_PROTOCOLS = ["http/1.1"]
urllib3_util_ssl.ALPN_PROTOCOLS = ["http/1.1"]

View File

@ -0,0 +1,356 @@
from __future__ import annotations
import logging
import re
import threading
import types
import typing
import h2.config # type: ignore[import-untyped]
import h2.connection # type: ignore[import-untyped]
import h2.events # type: ignore[import-untyped]
from .._base_connection import _TYPE_BODY
from .._collections import HTTPHeaderDict
from ..connection import HTTPSConnection, _get_default_user_agent
from ..exceptions import ConnectionError
from ..response import BaseHTTPResponse
orig_HTTPSConnection = HTTPSConnection
T = typing.TypeVar("T")
log = logging.getLogger(__name__)
RE_IS_LEGAL_HEADER_NAME = re.compile(rb"^[!#$%&'*+\-.^_`|~0-9a-z]+$")
RE_IS_ILLEGAL_HEADER_VALUE = re.compile(rb"[\0\x00\x0a\x0d\r\n]|^[ \r\n\t]|[ \r\n\t]$")
def _is_legal_header_name(name: bytes) -> bool:
"""
"An implementation that validates fields according to the definitions in Sections
5.1 and 5.5 of [HTTP] only needs an additional check that field names do not
include uppercase characters." (https://httpwg.org/specs/rfc9113.html#n-field-validity)
`http.client._is_legal_header_name` does not validate the field name according to the
HTTP 1.1 spec, so we do that here, in addition to checking for uppercase characters.
This does not allow for the `:` character in the header name, so should not
be used to validate pseudo-headers.
"""
return bool(RE_IS_LEGAL_HEADER_NAME.match(name))
def _is_illegal_header_value(value: bytes) -> bool:
"""
"A field value MUST NOT contain the zero value (ASCII NUL, 0x00), line feed
(ASCII LF, 0x0a), or carriage return (ASCII CR, 0x0d) at any position. A field
value MUST NOT start or end with an ASCII whitespace character (ASCII SP or HTAB,
0x20 or 0x09)." (https://httpwg.org/specs/rfc9113.html#n-field-validity)
"""
return bool(RE_IS_ILLEGAL_HEADER_VALUE.search(value))
class _LockedObject(typing.Generic[T]):
"""
A wrapper class that hides a specific object behind a lock.
The goal here is to provide a simple way to protect access to an object
that cannot safely be simultaneously accessed from multiple threads. The
intended use of this class is simple: take hold of it with a context
manager, which returns the protected object.
"""
__slots__ = (
"lock",
"_obj",
)
def __init__(self, obj: T):
self.lock = threading.RLock()
self._obj = obj
def __enter__(self) -> T:
self.lock.acquire()
return self._obj
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: types.TracebackType | None,
) -> None:
self.lock.release()
class HTTP2Connection(HTTPSConnection):
def __init__(
self, host: str, port: int | None = None, **kwargs: typing.Any
) -> None:
self._h2_conn = self._new_h2_conn()
self._h2_stream: int | None = None
self._headers: list[tuple[bytes, bytes]] = []
if "proxy" in kwargs or "proxy_config" in kwargs: # Defensive:
raise NotImplementedError("Proxies aren't supported with HTTP/2")
super().__init__(host, port, **kwargs)
if self._tunnel_host is not None:
raise NotImplementedError("Tunneling isn't supported with HTTP/2")
def _new_h2_conn(self) -> _LockedObject[h2.connection.H2Connection]:
config = h2.config.H2Configuration(client_side=True)
return _LockedObject(h2.connection.H2Connection(config=config))
def connect(self) -> None:
super().connect()
with self._h2_conn as conn:
conn.initiate_connection()
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
def putrequest( # type: ignore[override]
self,
method: str,
url: str,
**kwargs: typing.Any,
) -> None:
"""putrequest
This deviates from the HTTPConnection method signature since we never need to override
sending accept-encoding headers or the host header.
"""
if "skip_host" in kwargs:
raise NotImplementedError("`skip_host` isn't supported")
if "skip_accept_encoding" in kwargs:
raise NotImplementedError("`skip_accept_encoding` isn't supported")
self._request_url = url or "/"
self._validate_path(url) # type: ignore[attr-defined]
if ":" in self.host:
authority = f"[{self.host}]:{self.port or 443}"
else:
authority = f"{self.host}:{self.port or 443}"
self._headers.append((b":scheme", b"https"))
self._headers.append((b":method", method.encode()))
self._headers.append((b":authority", authority.encode()))
self._headers.append((b":path", url.encode()))
with self._h2_conn as conn:
self._h2_stream = conn.get_next_available_stream_id()
def putheader(self, header: str | bytes, *values: str | bytes) -> None: # type: ignore[override]
# TODO SKIPPABLE_HEADERS from urllib3 are ignored.
header = header.encode() if isinstance(header, str) else header
header = header.lower() # A lot of upstream code uses capitalized headers.
if not _is_legal_header_name(header):
raise ValueError(f"Illegal header name {str(header)}")
for value in values:
value = value.encode() if isinstance(value, str) else value
if _is_illegal_header_value(value):
raise ValueError(f"Illegal header value {str(value)}")
self._headers.append((header, value))
def endheaders(self, message_body: typing.Any = None) -> None: # type: ignore[override]
if self._h2_stream is None:
raise ConnectionError("Must call `putrequest` first.")
with self._h2_conn as conn:
conn.send_headers(
stream_id=self._h2_stream,
headers=self._headers,
end_stream=(message_body is None),
)
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
self._headers = [] # Reset headers for the next request.
def send(self, data: typing.Any) -> None:
"""Send data to the server.
`data` can be: `str`, `bytes`, an iterable, or file-like objects
that support a .read() method.
"""
if self._h2_stream is None:
raise ConnectionError("Must call `putrequest` first.")
with self._h2_conn as conn:
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
if hasattr(data, "read"): # file-like objects
while True:
chunk = data.read(self.blocksize)
if not chunk:
break
if isinstance(chunk, str):
chunk = chunk.encode() # pragma: no cover
conn.send_data(self._h2_stream, chunk, end_stream=False)
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
conn.end_stream(self._h2_stream)
return
if isinstance(data, str): # str -> bytes
data = data.encode()
try:
if isinstance(data, bytes):
conn.send_data(self._h2_stream, data, end_stream=True)
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
else:
for chunk in data:
conn.send_data(self._h2_stream, chunk, end_stream=False)
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
conn.end_stream(self._h2_stream)
except TypeError:
raise TypeError(
"`data` should be str, bytes, iterable, or file. got %r"
% type(data)
)
def set_tunnel(
self,
host: str,
port: int | None = None,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
raise NotImplementedError(
"HTTP/2 does not support setting up a tunnel through a proxy"
)
def getresponse( # type: ignore[override]
self,
) -> HTTP2Response:
status = None
data = bytearray()
with self._h2_conn as conn:
end_stream = False
while not end_stream:
# TODO: Arbitrary read value.
if received_data := self.sock.recv(65535):
events = conn.receive_data(received_data)
for event in events:
if isinstance(event, h2.events.ResponseReceived):
headers = HTTPHeaderDict()
for header, value in event.headers:
if header == b":status":
status = int(value.decode())
else:
headers.add(
header.decode("ascii"), value.decode("ascii")
)
elif isinstance(event, h2.events.DataReceived):
data += event.data
conn.acknowledge_received_data(
event.flow_controlled_length, event.stream_id
)
elif isinstance(event, h2.events.StreamEnded):
end_stream = True
if data_to_send := conn.data_to_send():
self.sock.sendall(data_to_send)
assert status is not None
return HTTP2Response(
status=status,
headers=headers,
request_url=self._request_url,
data=bytes(data),
)
def request( # type: ignore[override]
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
*,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
**kwargs: typing.Any,
) -> None:
"""Send an HTTP/2 request"""
if "chunked" in kwargs:
# TODO this is often present from upstream.
# raise NotImplementedError("`chunked` isn't supported with HTTP/2")
pass
if self.sock is not None:
self.sock.settimeout(self.timeout)
self.putrequest(method, url)
headers = headers or {}
for k, v in headers.items():
if k.lower() == "transfer-encoding" and v == "chunked":
continue
else:
self.putheader(k, v)
if b"user-agent" not in dict(self._headers):
self.putheader(b"user-agent", _get_default_user_agent())
if body:
self.endheaders(message_body=body)
self.send(body)
else:
self.endheaders()
def close(self) -> None:
with self._h2_conn as conn:
try:
conn.close_connection()
if data := conn.data_to_send():
self.sock.sendall(data)
except Exception:
pass
# Reset all our HTTP/2 connection state.
self._h2_conn = self._new_h2_conn()
self._h2_stream = None
self._headers = []
super().close()
class HTTP2Response(BaseHTTPResponse):
# TODO: This is a woefully incomplete response object, but works for non-streaming.
def __init__(
self,
status: int,
headers: HTTPHeaderDict,
request_url: str,
data: bytes,
decode_content: bool = False, # TODO: support decoding
) -> None:
super().__init__(
status=status,
headers=headers,
# Following CPython, we map HTTP versions to major * 10 + minor integers
version=20,
version_string="HTTP/2",
# No reason phrase in HTTP/2
reason=None,
decode_content=decode_content,
request_url=request_url,
)
self._data = data
self.length_remaining = 0
@property
def data(self) -> bytes:
return self._data
def get_redirect_location(self) -> None:
return None
def close(self) -> None:
pass

View File

@ -0,0 +1,87 @@
from __future__ import annotations
import threading
class _HTTP2ProbeCache:
__slots__ = (
"_lock",
"_cache_locks",
"_cache_values",
)
def __init__(self) -> None:
self._lock = threading.Lock()
self._cache_locks: dict[tuple[str, int], threading.RLock] = {}
self._cache_values: dict[tuple[str, int], bool | None] = {}
def acquire_and_get(self, host: str, port: int) -> bool | None:
# By the end of this block we know that
# _cache_[values,locks] is available.
value = None
with self._lock:
key = (host, port)
try:
value = self._cache_values[key]
# If it's a known value we return right away.
if value is not None:
return value
except KeyError:
self._cache_locks[key] = threading.RLock()
self._cache_values[key] = None
# If the value is unknown, we acquire the lock to signal
# to the requesting thread that the probe is in progress
# or that the current thread needs to return their findings.
key_lock = self._cache_locks[key]
key_lock.acquire()
try:
# If the by the time we get the lock the value has been
# updated we want to return the updated value.
value = self._cache_values[key]
# In case an exception like KeyboardInterrupt is raised here.
except BaseException as e: # Defensive:
assert not isinstance(e, KeyError) # KeyError shouldn't be possible.
key_lock.release()
raise
return value
def set_and_release(
self, host: str, port: int, supports_http2: bool | None
) -> None:
key = (host, port)
key_lock = self._cache_locks[key]
with key_lock: # Uses an RLock, so can be locked again from same thread.
if supports_http2 is None and self._cache_values[key] is not None:
raise ValueError(
"Cannot reset HTTP/2 support for origin after value has been set."
) # Defensive: not expected in normal usage
self._cache_values[key] = supports_http2
key_lock.release()
def _values(self) -> dict[tuple[str, int], bool | None]:
"""This function is for testing purposes only. Gets the current state of the probe cache"""
with self._lock:
return {k: v for k, v in self._cache_values.items()}
def _reset(self) -> None:
"""This function is for testing purposes only. Reset the cache values"""
with self._lock:
self._cache_locks = {}
self._cache_values = {}
_HTTP2_PROBE_CACHE = _HTTP2ProbeCache()
set_and_release = _HTTP2_PROBE_CACHE.set_and_release
acquire_and_get = _HTTP2_PROBE_CACHE.acquire_and_get
_values = _HTTP2_PROBE_CACHE._values
_reset = _HTTP2_PROBE_CACHE._reset
__all__ = [
"set_and_release",
"acquire_and_get",
]

View File

@ -0,0 +1,637 @@
from __future__ import annotations
import functools
import logging
import typing
import warnings
from types import TracebackType
from urllib.parse import urljoin
from ._collections import HTTPHeaderDict, RecentlyUsedContainer
from ._request_methods import RequestMethods
from .connection import ProxyConfig
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
from .exceptions import (
LocationValueError,
MaxRetryError,
ProxySchemeUnknown,
URLSchemeUnknown,
)
from .response import BaseHTTPResponse
from .util.connection import _TYPE_SOCKET_OPTIONS
from .util.proxy import connection_requires_http_tunnel
from .util.retry import Retry
from .util.timeout import Timeout
from .util.url import Url, parse_url
if typing.TYPE_CHECKING:
import ssl
from typing_extensions import Self
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
log = logging.getLogger(__name__)
SSL_KEYWORDS = (
"key_file",
"cert_file",
"cert_reqs",
"ca_certs",
"ca_cert_data",
"ssl_version",
"ssl_minimum_version",
"ssl_maximum_version",
"ca_cert_dir",
"ssl_context",
"key_password",
"server_hostname",
)
# Default value for `blocksize` - a new parameter introduced to
# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
_DEFAULT_BLOCKSIZE = 16384
class PoolKey(typing.NamedTuple):
"""
All known keyword arguments that could be provided to the pool manager, its
pools, or the underlying connections.
All custom key schemes should include the fields in this key at a minimum.
"""
key_scheme: str
key_host: str
key_port: int | None
key_timeout: Timeout | float | int | None
key_retries: Retry | bool | int | None
key_block: bool | None
key_source_address: tuple[str, int] | None
key_key_file: str | None
key_key_password: str | None
key_cert_file: str | None
key_cert_reqs: str | None
key_ca_certs: str | None
key_ca_cert_data: str | bytes | None
key_ssl_version: int | str | None
key_ssl_minimum_version: ssl.TLSVersion | None
key_ssl_maximum_version: ssl.TLSVersion | None
key_ca_cert_dir: str | None
key_ssl_context: ssl.SSLContext | None
key_maxsize: int | None
key_headers: frozenset[tuple[str, str]] | None
key__proxy: Url | None
key__proxy_headers: frozenset[tuple[str, str]] | None
key__proxy_config: ProxyConfig | None
key_socket_options: _TYPE_SOCKET_OPTIONS | None
key__socks_options: frozenset[tuple[str, str]] | None
key_assert_hostname: bool | str | None
key_assert_fingerprint: str | None
key_server_hostname: str | None
key_blocksize: int | None
def _default_key_normalizer(
key_class: type[PoolKey], request_context: dict[str, typing.Any]
) -> PoolKey:
"""
Create a pool key out of a request context dictionary.
According to RFC 3986, both the scheme and host are case-insensitive.
Therefore, this function normalizes both before constructing the pool
key for an HTTPS request. If you wish to change this behaviour, provide
alternate callables to ``key_fn_by_scheme``.
:param key_class:
The class to use when constructing the key. This should be a namedtuple
with the ``scheme`` and ``host`` keys at a minimum.
:type key_class: namedtuple
:param request_context:
A dictionary-like object that contain the context for a request.
:type request_context: dict
:return: A namedtuple that can be used as a connection pool key.
:rtype: PoolKey
"""
# Since we mutate the dictionary, make a copy first
context = request_context.copy()
context["scheme"] = context["scheme"].lower()
context["host"] = context["host"].lower()
# These are both dictionaries and need to be transformed into frozensets
for key in ("headers", "_proxy_headers", "_socks_options"):
if key in context and context[key] is not None:
context[key] = frozenset(context[key].items())
# The socket_options key may be a list and needs to be transformed into a
# tuple.
socket_opts = context.get("socket_options")
if socket_opts is not None:
context["socket_options"] = tuple(socket_opts)
# Map the kwargs to the names in the namedtuple - this is necessary since
# namedtuples can't have fields starting with '_'.
for key in list(context.keys()):
context["key_" + key] = context.pop(key)
# Default to ``None`` for keys missing from the context
for field in key_class._fields:
if field not in context:
context[field] = None
# Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
if context.get("key_blocksize") is None:
context["key_blocksize"] = _DEFAULT_BLOCKSIZE
return key_class(**context)
#: A dictionary that maps a scheme to a callable that creates a pool key.
#: This can be used to alter the way pool keys are constructed, if desired.
#: Each PoolManager makes a copy of this dictionary so they can be configured
#: globally here, or individually on the instance.
key_fn_by_scheme = {
"http": functools.partial(_default_key_normalizer, PoolKey),
"https": functools.partial(_default_key_normalizer, PoolKey),
}
pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
class PoolManager(RequestMethods):
"""
Allows for arbitrary requests while transparently keeping track of
necessary connection pools for you.
:param num_pools:
Number of connection pools to cache before discarding the least
recently used pool.
:param headers:
Headers to include with all requests, unless other headers are given
explicitly.
:param \\**connection_pool_kw:
Additional parameters are used to create fresh
:class:`urllib3.connectionpool.ConnectionPool` instances.
Example:
.. code-block:: python
import urllib3
http = urllib3.PoolManager(num_pools=2)
resp1 = http.request("GET", "https://google.com/")
resp2 = http.request("GET", "https://google.com/mail")
resp3 = http.request("GET", "https://yahoo.com/")
print(len(http.pools))
# 2
"""
proxy: Url | None = None
proxy_config: ProxyConfig | None = None
def __init__(
self,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw: typing.Any,
) -> None:
super().__init__(headers)
self.connection_pool_kw = connection_pool_kw
self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
self.pools = RecentlyUsedContainer(num_pools)
# Locally set the pool classes and keys so other PoolManagers can
# override them.
self.pool_classes_by_scheme = pool_classes_by_scheme
self.key_fn_by_scheme = key_fn_by_scheme.copy()
def __enter__(self) -> Self:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> typing.Literal[False]:
self.clear()
# Return False to re-raise any potential exceptions
return False
def _new_pool(
self,
scheme: str,
host: str,
port: int,
request_context: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
"""
Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
any additional pool keyword arguments.
If ``request_context`` is provided, it is provided as keyword arguments
to the pool class used. This method is used to actually create the
connection pools handed out by :meth:`connection_from_url` and
companion methods. It is intended to be overridden for customization.
"""
pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
if request_context is None:
request_context = self.connection_pool_kw.copy()
# Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
# set to 'None' in the request_context.
if request_context.get("blocksize") is None:
request_context["blocksize"] = _DEFAULT_BLOCKSIZE
# Although the context has everything necessary to create the pool,
# this function has historically only used the scheme, host, and port
# in the positional args. When an API change is acceptable these can
# be removed.
for key in ("scheme", "host", "port"):
request_context.pop(key, None)
if scheme == "http":
for kw in SSL_KEYWORDS:
request_context.pop(kw, None)
return pool_cls(host, port, **request_context)
def clear(self) -> None:
"""
Empty our store of pools and direct them all to close.
This will not affect in-flight connections, but they will not be
re-used after completion.
"""
self.pools.clear()
def connection_from_host(
self,
host: str | None,
port: int | None = None,
scheme: str | None = "http",
pool_kwargs: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
If ``port`` isn't given, it will be derived from the ``scheme`` using
``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
provided, it is merged with the instance's ``connection_pool_kw``
variable and used to create the new connection pool, if one is
needed.
"""
if not host:
raise LocationValueError("No host specified.")
request_context = self._merge_pool_kwargs(pool_kwargs)
request_context["scheme"] = scheme or "http"
if not port:
port = port_by_scheme.get(request_context["scheme"].lower(), 80)
request_context["port"] = port
request_context["host"] = host
return self.connection_from_context(request_context)
def connection_from_context(
self, request_context: dict[str, typing.Any]
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
``request_context`` must at least contain the ``scheme`` key and its
value must be a key in ``key_fn_by_scheme`` instance variable.
"""
if "strict" in request_context:
warnings.warn(
"The 'strict' parameter is no longer needed on Python 3+. "
"This will raise an error in urllib3 v2.1.0.",
DeprecationWarning,
)
request_context.pop("strict")
scheme = request_context["scheme"].lower()
pool_key_constructor = self.key_fn_by_scheme.get(scheme)
if not pool_key_constructor:
raise URLSchemeUnknown(scheme)
pool_key = pool_key_constructor(request_context)
return self.connection_from_pool_key(pool_key, request_context=request_context)
def connection_from_pool_key(
self, pool_key: PoolKey, request_context: dict[str, typing.Any]
) -> HTTPConnectionPool:
"""
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
``pool_key`` should be a namedtuple that only contains immutable
objects. At a minimum it must have the ``scheme``, ``host``, and
``port`` fields.
"""
with self.pools.lock:
# If the scheme, host, or port doesn't match existing open
# connections, open a new ConnectionPool.
pool = self.pools.get(pool_key)
if pool:
return pool
# Make a fresh ConnectionPool of the desired type
scheme = request_context["scheme"]
host = request_context["host"]
port = request_context["port"]
pool = self._new_pool(scheme, host, port, request_context=request_context)
self.pools[pool_key] = pool
return pool
def connection_from_url(
self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
) -> HTTPConnectionPool:
"""
Similar to :func:`urllib3.connectionpool.connection_from_url`.
If ``pool_kwargs`` is not provided and a new pool needs to be
constructed, ``self.connection_pool_kw`` is used to initialize
the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
is provided, it is used instead. Note that if a new pool does not
need to be created for the request, the provided ``pool_kwargs`` are
not used.
"""
u = parse_url(url)
return self.connection_from_host(
u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
)
def _merge_pool_kwargs(
self, override: dict[str, typing.Any] | None
) -> dict[str, typing.Any]:
"""
Merge a dictionary of override values for self.connection_pool_kw.
This does not modify self.connection_pool_kw and returns a new dict.
Any keys in the override dictionary with a value of ``None`` are
removed from the merged dictionary.
"""
base_pool_kwargs = self.connection_pool_kw.copy()
if override:
for key, value in override.items():
if value is None:
try:
del base_pool_kwargs[key]
except KeyError:
pass
else:
base_pool_kwargs[key] = value
return base_pool_kwargs
def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
"""
Indicates if the proxy requires the complete destination URL in the
request. Normally this is only needed when not using an HTTP CONNECT
tunnel.
"""
if self.proxy is None:
return False
return not connection_requires_http_tunnel(
self.proxy, self.proxy_config, parsed_url.scheme
)
def urlopen( # type: ignore[override]
self, method: str, url: str, redirect: bool = True, **kw: typing.Any
) -> BaseHTTPResponse:
"""
Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
with custom cross-host redirect logic and only sends the request-uri
portion of the ``url``.
The given ``url`` parameter must be absolute, such that an appropriate
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
"""
u = parse_url(url)
if u.scheme is None:
warnings.warn(
"URLs without a scheme (ie 'https://') are deprecated and will raise an error "
"in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
"start with 'https://' or 'http://'. Read more in this issue: "
"https://github.com/urllib3/urllib3/issues/2920",
category=DeprecationWarning,
stacklevel=2,
)
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
kw["assert_same_host"] = False
kw["redirect"] = False
if "headers" not in kw:
kw["headers"] = self.headers
if self._proxy_requires_url_absolute_form(u):
response = conn.urlopen(method, url, **kw)
else:
response = conn.urlopen(method, u.request_uri, **kw)
redirect_location = redirect and response.get_redirect_location()
if not redirect_location:
return response
# Support relative URLs for redirecting.
redirect_location = urljoin(url, redirect_location)
if response.status == 303:
# Change the method according to RFC 9110, Section 15.4.4.
method = "GET"
# And lose the body not to transfer anything sensitive.
kw["body"] = None
kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
retries = kw.get("retries")
if not isinstance(retries, Retry):
retries = Retry.from_int(retries, redirect=redirect)
# Strip headers marked as unsafe to forward to the redirected location.
# Check remove_headers_on_redirect to avoid a potential network call within
# conn.is_same_host() which may use socket.gethostbyname() in the future.
if retries.remove_headers_on_redirect and not conn.is_same_host(
redirect_location
):
new_headers = kw["headers"].copy()
for header in kw["headers"]:
if header.lower() in retries.remove_headers_on_redirect:
new_headers.pop(header, None)
kw["headers"] = new_headers
try:
retries = retries.increment(method, url, response=response, _pool=conn)
except MaxRetryError:
if retries.raise_on_redirect:
response.drain_conn()
raise
return response
kw["retries"] = retries
kw["redirect"] = redirect
log.info("Redirecting %s -> %s", url, redirect_location)
response.drain_conn()
return self.urlopen(method, redirect_location, **kw)
class ProxyManager(PoolManager):
"""
Behaves just like :class:`PoolManager`, but sends all requests through
the defined proxy, using the CONNECT method for HTTPS URLs.
:param proxy_url:
The URL of the proxy to be used.
:param proxy_headers:
A dictionary containing headers that will be sent to the proxy. In case
of HTTP they are being sent with each request, while in the
HTTPS/CONNECT case they are sent only once. Could be used for proxy
authentication.
:param proxy_ssl_context:
The proxy SSL context is used to establish the TLS connection to the
proxy when using HTTPS proxies.
:param use_forwarding_for_https:
(Defaults to False) If set to True will forward requests to the HTTPS
proxy to be made on behalf of the client instead of creating a TLS
tunnel via the CONNECT method. **Enabling this flag means that request
and response headers and content will be visible from the HTTPS proxy**
whereas tunneling keeps request and response headers and content
private. IP address, target hostname, SNI, and port are always visible
to an HTTPS proxy even when this flag is disabled.
:param proxy_assert_hostname:
The hostname of the certificate to verify against.
:param proxy_assert_fingerprint:
The fingerprint of the certificate to verify against.
Example:
.. code-block:: python
import urllib3
proxy = urllib3.ProxyManager("https://localhost:3128/")
resp1 = proxy.request("GET", "https://google.com/")
resp2 = proxy.request("GET", "https://httpbin.org/")
print(len(proxy.pools))
# 1
resp3 = proxy.request("GET", "https://httpbin.org/")
resp4 = proxy.request("GET", "https://twitter.com/")
print(len(proxy.pools))
# 3
"""
def __init__(
self,
proxy_url: str,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
proxy_headers: typing.Mapping[str, str] | None = None,
proxy_ssl_context: ssl.SSLContext | None = None,
use_forwarding_for_https: bool = False,
proxy_assert_hostname: None | str | typing.Literal[False] = None,
proxy_assert_fingerprint: str | None = None,
**connection_pool_kw: typing.Any,
) -> None:
if isinstance(proxy_url, HTTPConnectionPool):
str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
else:
str_proxy_url = proxy_url
proxy = parse_url(str_proxy_url)
if proxy.scheme not in ("http", "https"):
raise ProxySchemeUnknown(proxy.scheme)
if not proxy.port:
port = port_by_scheme.get(proxy.scheme, 80)
proxy = proxy._replace(port=port)
self.proxy = proxy
self.proxy_headers = proxy_headers or {}
self.proxy_ssl_context = proxy_ssl_context
self.proxy_config = ProxyConfig(
proxy_ssl_context,
use_forwarding_for_https,
proxy_assert_hostname,
proxy_assert_fingerprint,
)
connection_pool_kw["_proxy"] = self.proxy
connection_pool_kw["_proxy_headers"] = self.proxy_headers
connection_pool_kw["_proxy_config"] = self.proxy_config
super().__init__(num_pools, headers, **connection_pool_kw)
def connection_from_host(
self,
host: str | None,
port: int | None = None,
scheme: str | None = "http",
pool_kwargs: dict[str, typing.Any] | None = None,
) -> HTTPConnectionPool:
if scheme == "https":
return super().connection_from_host(
host, port, scheme, pool_kwargs=pool_kwargs
)
return super().connection_from_host(
self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
)
def _set_proxy_headers(
self, url: str, headers: typing.Mapping[str, str] | None = None
) -> typing.Mapping[str, str]:
"""
Sets headers needed by proxies: specifically, the Accept and Host
headers. Only sets headers not provided by the user.
"""
headers_ = {"Accept": "*/*"}
netloc = parse_url(url).netloc
if netloc:
headers_["Host"] = netloc
if headers:
headers_.update(headers)
return headers_
def urlopen( # type: ignore[override]
self, method: str, url: str, redirect: bool = True, **kw: typing.Any
) -> BaseHTTPResponse:
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
u = parse_url(url)
if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
# For connections using HTTP CONNECT, httplib sets the necessary
# headers on the CONNECT to the proxy. If we're not using CONNECT,
# we'll definitely need to set 'Host' at the very least.
headers = kw.get("headers", self.headers)
kw["headers"] = self._set_proxy_headers(url, headers)
return super().urlopen(method, url, redirect=redirect, **kw)
def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
return ProxyManager(proxy_url=url, **kw)

View File

@ -0,0 +1,2 @@
# Instruct type checkers to look for inline type annotations in this package.
# See PEP 561.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,42 @@
# For backwards compatibility, provide imports that used to be here.
from __future__ import annotations
from .connection import is_connection_dropped
from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers
from .response import is_fp_closed
from .retry import Retry
from .ssl_ import (
ALPN_PROTOCOLS,
IS_PYOPENSSL,
SSLContext,
assert_fingerprint,
create_urllib3_context,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
)
from .timeout import Timeout
from .url import Url, parse_url
from .wait import wait_for_read, wait_for_write
__all__ = (
"IS_PYOPENSSL",
"SSLContext",
"ALPN_PROTOCOLS",
"Retry",
"Timeout",
"Url",
"assert_fingerprint",
"create_urllib3_context",
"is_connection_dropped",
"is_fp_closed",
"parse_url",
"make_headers",
"resolve_cert_reqs",
"resolve_ssl_version",
"ssl_wrap_socket",
"wait_for_read",
"wait_for_write",
"SKIP_HEADER",
"SKIPPABLE_HEADERS",
)

View File

@ -0,0 +1,137 @@
from __future__ import annotations
import socket
import typing
from ..exceptions import LocationParseError
from .timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
_TYPE_SOCKET_OPTIONS = list[tuple[int, int, typing.Union[int, bytes]]]
if typing.TYPE_CHECKING:
from .._base_connection import BaseHTTPConnection
def is_connection_dropped(conn: BaseHTTPConnection) -> bool: # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
:param conn: :class:`urllib3.connection.HTTPConnection` object.
"""
return not conn.is_connected
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
# One additional modification is that we avoid binding to IPv6 servers
# discovered in DNS if the system doesn't have IPv6 functionality.
def create_connection(
address: tuple[str, int],
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
socket_options: _TYPE_SOCKET_OPTIONS | None = None,
) -> socket.socket:
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`socket.getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith("["):
host = host.strip("[]")
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
try:
host.encode("idna")
except UnicodeError:
raise LocationParseError(f"'{host}', label empty or too long") from None
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not _DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
# Break explicitly a reference cycle
err = None
return sock
except OSError as _:
err = _
if sock is not None:
sock.close()
if err is not None:
try:
raise err
finally:
# Break explicitly a reference cycle
err = None
else:
raise OSError("getaddrinfo returns an empty list")
def _set_socket_options(
sock: socket.socket, options: _TYPE_SOCKET_OPTIONS | None
) -> None:
if options is None:
return
for opt in options:
sock.setsockopt(*opt)
def allowed_gai_family() -> socket.AddressFamily:
"""This function is designed to work in the context of
getaddrinfo, where family=socket.AF_UNSPEC is the default and
will perform a DNS search for both IPv6 and IPv4 records."""
family = socket.AF_INET
if HAS_IPV6:
family = socket.AF_UNSPEC
return family
def _has_ipv6(host: str) -> bool:
"""Returns True if the system can bind an IPv6 address."""
sock = None
has_ipv6 = False
if socket.has_ipv6:
# has_ipv6 returns true if cPython was compiled with IPv6 support.
# It does not tell us if the system has IPv6 support enabled. To
# determine that we must bind to an IPv6 address.
# https://github.com/urllib3/urllib3/pull/611
# https://bugs.python.org/issue658327
try:
sock = socket.socket(socket.AF_INET6)
sock.bind((host, 0))
has_ipv6 = True
except Exception:
pass
if sock:
sock.close()
return has_ipv6
HAS_IPV6 = _has_ipv6("::1")

View File

@ -0,0 +1,43 @@
from __future__ import annotations
import typing
from .url import Url
if typing.TYPE_CHECKING:
from ..connection import ProxyConfig
def connection_requires_http_tunnel(
proxy_url: Url | None = None,
proxy_config: ProxyConfig | None = None,
destination_scheme: str | None = None,
) -> bool:
"""
Returns True if the connection requires an HTTP CONNECT through the proxy.
:param URL proxy_url:
URL of the proxy.
:param ProxyConfig proxy_config:
Proxy configuration from poolmanager.py
:param str destination_scheme:
The scheme of the destination. (i.e https, http, etc)
"""
# If we're not using a proxy, no way to use a tunnel.
if proxy_url is None:
return False
# HTTP destinations never require tunneling, we always forward.
if destination_scheme == "http":
return False
# Support for forwarding with HTTPS proxies and HTTPS destinations.
if (
proxy_url.scheme == "https"
and proxy_config
and proxy_config.use_forwarding_for_https
):
return False
# Otherwise always use a tunnel.
return True

View File

@ -0,0 +1,258 @@
from __future__ import annotations
import io
import typing
from base64 import b64encode
from enum import Enum
from ..exceptions import UnrewindableBodyError
from .util import to_bytes
if typing.TYPE_CHECKING:
from typing import Final
# Pass as a value within ``headers`` to skip
# emitting some HTTP headers that are added automatically.
# The only headers that are supported are ``Accept-Encoding``,
# ``Host``, and ``User-Agent``.
SKIP_HEADER = "@@@SKIP_HEADER@@@"
SKIPPABLE_HEADERS = frozenset(["accept-encoding", "host", "user-agent"])
ACCEPT_ENCODING = "gzip,deflate"
try:
try:
import brotlicffi as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401
except ImportError:
import brotli as _unused_module_brotli # type: ignore[import-not-found] # noqa: F401
except ImportError:
pass
else:
ACCEPT_ENCODING += ",br"
try:
import zstandard as _unused_module_zstd # noqa: F401
except ImportError:
pass
else:
ACCEPT_ENCODING += ",zstd"
class _TYPE_FAILEDTELL(Enum):
token = 0
_FAILEDTELL: Final[_TYPE_FAILEDTELL] = _TYPE_FAILEDTELL.token
_TYPE_BODY_POSITION = typing.Union[int, _TYPE_FAILEDTELL]
# When sending a request with these methods we aren't expecting
# a body so don't need to set an explicit 'Content-Length: 0'
# The reason we do this in the negative instead of tracking methods
# which 'should' have a body is because unknown methods should be
# treated as if they were 'POST' which *does* expect a body.
_METHODS_NOT_EXPECTING_BODY = {"GET", "HEAD", "DELETE", "TRACE", "OPTIONS", "CONNECT"}
def make_headers(
keep_alive: bool | None = None,
accept_encoding: bool | list[str] | str | None = None,
user_agent: str | None = None,
basic_auth: str | None = None,
proxy_basic_auth: str | None = None,
disable_cache: bool | None = None,
) -> dict[str, str]:
"""
Shortcuts for generating request headers.
:param keep_alive:
If ``True``, adds 'connection: keep-alive' header.
:param accept_encoding:
Can be a boolean, list, or string.
``True`` translates to 'gzip,deflate'. If the dependencies for
Brotli (either the ``brotli`` or ``brotlicffi`` package) and/or Zstandard
(the ``zstandard`` package) algorithms are installed, then their encodings are
included in the string ('br' and 'zstd', respectively).
List will get joined by comma.
String will be used as provided.
:param user_agent:
String representing the user-agent you want, such as
"python-urllib3/0.6"
:param basic_auth:
Colon-separated username:password string for 'authorization: basic ...'
auth header.
:param proxy_basic_auth:
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
:param disable_cache:
If ``True``, adds 'cache-control: no-cache' header.
Example:
.. code-block:: python
import urllib3
print(urllib3.util.make_headers(keep_alive=True, user_agent="Batman/1.0"))
# {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
print(urllib3.util.make_headers(accept_encoding=True))
# {'accept-encoding': 'gzip,deflate'}
"""
headers: dict[str, str] = {}
if accept_encoding:
if isinstance(accept_encoding, str):
pass
elif isinstance(accept_encoding, list):
accept_encoding = ",".join(accept_encoding)
else:
accept_encoding = ACCEPT_ENCODING
headers["accept-encoding"] = accept_encoding
if user_agent:
headers["user-agent"] = user_agent
if keep_alive:
headers["connection"] = "keep-alive"
if basic_auth:
headers["authorization"] = (
f"Basic {b64encode(basic_auth.encode('latin-1')).decode()}"
)
if proxy_basic_auth:
headers["proxy-authorization"] = (
f"Basic {b64encode(proxy_basic_auth.encode('latin-1')).decode()}"
)
if disable_cache:
headers["cache-control"] = "no-cache"
return headers
def set_file_position(
body: typing.Any, pos: _TYPE_BODY_POSITION | None
) -> _TYPE_BODY_POSITION | None:
"""
If a position is provided, move file to that point.
Otherwise, we'll attempt to record a position for future use.
"""
if pos is not None:
rewind_body(body, pos)
elif getattr(body, "tell", None) is not None:
try:
pos = body.tell()
except OSError:
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body.
pos = _FAILEDTELL
return pos
def rewind_body(body: typing.IO[typing.AnyStr], body_pos: _TYPE_BODY_POSITION) -> None:
"""
Attempt to rewind body to a certain position.
Primarily used for request redirects and retries.
:param body:
File-like object that supports seek.
:param int pos:
Position to seek to in file.
"""
body_seek = getattr(body, "seek", None)
if body_seek is not None and isinstance(body_pos, int):
try:
body_seek(body_pos)
except OSError as e:
raise UnrewindableBodyError(
"An error occurred when rewinding request body for redirect/retry."
) from e
elif body_pos is _FAILEDTELL:
raise UnrewindableBodyError(
"Unable to record file position for rewinding "
"request body during a redirect/retry."
)
else:
raise ValueError(
f"body_pos must be of type integer, instead it was {type(body_pos)}."
)
class ChunksAndContentLength(typing.NamedTuple):
chunks: typing.Iterable[bytes] | None
content_length: int | None
def body_to_chunks(
body: typing.Any | None, method: str, blocksize: int
) -> ChunksAndContentLength:
"""Takes the HTTP request method, body, and blocksize and
transforms them into an iterable of chunks to pass to
socket.sendall() and an optional 'Content-Length' header.
A 'Content-Length' of 'None' indicates the length of the body
can't be determined so should use 'Transfer-Encoding: chunked'
for framing instead.
"""
chunks: typing.Iterable[bytes] | None
content_length: int | None
# No body, we need to make a recommendation on 'Content-Length'
# based on whether that request method is expected to have
# a body or not.
if body is None:
chunks = None
if method.upper() not in _METHODS_NOT_EXPECTING_BODY:
content_length = 0
else:
content_length = None
# Bytes or strings become bytes
elif isinstance(body, (str, bytes)):
chunks = (to_bytes(body),)
content_length = len(chunks[0])
# File-like object, TODO: use seek() and tell() for length?
elif hasattr(body, "read"):
def chunk_readable() -> typing.Iterable[bytes]:
nonlocal body, blocksize
encode = isinstance(body, io.TextIOBase)
while True:
datablock = body.read(blocksize)
if not datablock:
break
if encode:
datablock = datablock.encode("utf-8")
yield datablock
chunks = chunk_readable()
content_length = None
# Otherwise we need to start checking via duck-typing.
else:
try:
# Check if the body implements the buffer API.
mv = memoryview(body)
except TypeError:
try:
# Check if the body is an iterable
chunks = iter(body)
content_length = None
except TypeError:
raise TypeError(
f"'body' must be a bytes-like object, file-like "
f"object, or iterable. Instead was {body!r}"
) from None
else:
# Since it implements the buffer API can be passed directly to socket.sendall()
chunks = (body,)
content_length = mv.nbytes
return ChunksAndContentLength(chunks=chunks, content_length=content_length)

View File

@ -0,0 +1,101 @@
from __future__ import annotations
import http.client as httplib
from email.errors import MultipartInvariantViolationDefect, StartBoundaryNotFoundDefect
from ..exceptions import HeaderParsingError
def is_fp_closed(obj: object) -> bool:
"""
Checks whether a given file-like object is closed.
:param obj:
The file-like object to check.
"""
try:
# Check `isclosed()` first, in case Python3 doesn't set `closed`.
# GH Issue #928
return obj.isclosed() # type: ignore[no-any-return, attr-defined]
except AttributeError:
pass
try:
# Check via the official file-like-object way.
return obj.closed # type: ignore[no-any-return, attr-defined]
except AttributeError:
pass
try:
# Check if the object is a container for another file-like object that
# gets released on exhaustion (e.g. HTTPResponse).
return obj.fp is None # type: ignore[attr-defined]
except AttributeError:
pass
raise ValueError("Unable to determine whether fp is closed.")
def assert_header_parsing(headers: httplib.HTTPMessage) -> None:
"""
Asserts whether all headers have been successfully parsed.
Extracts encountered errors from the result of parsing headers.
Only works on Python 3.
:param http.client.HTTPMessage headers: Headers to verify.
:raises urllib3.exceptions.HeaderParsingError:
If parsing errors are found.
"""
# This will fail silently if we pass in the wrong kind of parameter.
# To make debugging easier add an explicit check.
if not isinstance(headers, httplib.HTTPMessage):
raise TypeError(f"expected httplib.Message, got {type(headers)}.")
unparsed_data = None
# get_payload is actually email.message.Message.get_payload;
# we're only interested in the result if it's not a multipart message
if not headers.is_multipart():
payload = headers.get_payload()
if isinstance(payload, (bytes, str)):
unparsed_data = payload
# httplib is assuming a response body is available
# when parsing headers even when httplib only sends
# header data to parse_headers() This results in
# defects on multipart responses in particular.
# See: https://github.com/urllib3/urllib3/issues/800
# So we ignore the following defects:
# - StartBoundaryNotFoundDefect:
# The claimed start boundary was never found.
# - MultipartInvariantViolationDefect:
# A message claimed to be a multipart but no subparts were found.
defects = [
defect
for defect in headers.defects
if not isinstance(
defect, (StartBoundaryNotFoundDefect, MultipartInvariantViolationDefect)
)
]
if defects or unparsed_data:
raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
def is_response_to_head(response: httplib.HTTPResponse) -> bool:
"""
Checks whether the request of a response has been a HEAD-request.
:param http.client.HTTPResponse response:
Response to check if the originating request
used 'HEAD' as a method.
"""
# FIXME: Can we do this somehow without accessing private httplib _method?
method_str = response._method # type: str # type: ignore[attr-defined]
return method_str.upper() == "HEAD"

View File

@ -0,0 +1,533 @@
from __future__ import annotations
import email
import logging
import random
import re
import time
import typing
from itertools import takewhile
from types import TracebackType
from ..exceptions import (
ConnectTimeoutError,
InvalidHeader,
MaxRetryError,
ProtocolError,
ProxyError,
ReadTimeoutError,
ResponseError,
)
from .util import reraise
if typing.TYPE_CHECKING:
from typing_extensions import Self
from ..connectionpool import ConnectionPool
from ..response import BaseHTTPResponse
log = logging.getLogger(__name__)
# Data structure for representing the metadata of requests that result in a retry.
class RequestHistory(typing.NamedTuple):
method: str | None
url: str | None
error: Exception | None
status: int | None
redirect_location: str | None
class Retry:
"""Retry configuration.
Each retry attempt will create a new Retry object with updated values, so
they can be safely reused.
Retries can be defined as a default for a pool:
.. code-block:: python
retries = Retry(connect=5, read=2, redirect=5)
http = PoolManager(retries=retries)
response = http.request("GET", "https://example.com/")
Or per-request (which overrides the default for the pool):
.. code-block:: python
response = http.request("GET", "https://example.com/", retries=Retry(10))
Retries can be disabled by passing ``False``:
.. code-block:: python
response = http.request("GET", "https://example.com/", retries=False)
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
retries are disabled, in which case the causing exception will be raised.
:param int total:
Total number of retries to allow. Takes precedence over other counts.
Set to ``None`` to remove this constraint and fall back on other
counts.
Set to ``0`` to fail on the first retry.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int connect:
How many connection-related errors to retry on.
These are errors raised before the request is sent to the remote server,
which we assume has not triggered the server to process the request.
Set to ``0`` to fail on the first retry of this type.
:param int read:
How many times to retry on read errors.
These errors are raised after the request was sent to the server, so the
request may have side-effects.
Set to ``0`` to fail on the first retry of this type.
:param int redirect:
How many redirects to perform. Limit this to avoid infinite redirect
loops.
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
308.
Set to ``0`` to fail on the first retry of this type.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int status:
How many times to retry on bad status codes.
These are retries made on responses, where status code matches
``status_forcelist``.
Set to ``0`` to fail on the first retry of this type.
:param int other:
How many times to retry on other errors.
Other errors are errors that are not connect, read, redirect or status errors.
These errors might be raised after the request was sent to the server, so the
request might have side-effects.
Set to ``0`` to fail on the first retry of this type.
If ``total`` is not set, it's a good idea to set this to 0 to account
for unexpected edge cases and avoid infinite retry loops.
:param Collection allowed_methods:
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
idempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_ALLOWED_METHODS`.
Set to a ``None`` value to retry on any verb.
:param Collection status_forcelist:
A set of integer HTTP status codes that we should force a retry on.
A retry is initiated if the request method is in ``allowed_methods``
and the response status code is in ``status_forcelist``.
By default, this is disabled with ``None``.
:param float backoff_factor:
A backoff factor to apply between attempts after the second try
(most errors are resolved immediately by a second try without a
delay). urllib3 will sleep for::
{backoff factor} * (2 ** ({number of previous retries}))
seconds. If `backoff_jitter` is non-zero, this sleep is extended by::
random.uniform(0, {backoff jitter})
seconds. For example, if the backoff_factor is 0.1, then :func:`Retry.sleep` will
sleep for [0.0s, 0.2s, 0.4s, 0.8s, ...] between retries. No backoff will ever
be longer than `backoff_max`.
By default, backoff is disabled (factor set to 0).
:param bool raise_on_redirect: Whether, if the number of redirects is
exhausted, to raise a MaxRetryError, or to return a response with a
response code in the 3xx range.
:param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
whether we should raise an exception, or return a response,
if status falls in ``status_forcelist`` range and retries have
been exhausted.
:param tuple history: The history of the request encountered during
each call to :meth:`~Retry.increment`. The list is in the order
the requests occurred. Each list item is of class :class:`RequestHistory`.
:param bool respect_retry_after_header:
Whether to respect Retry-After header on status codes defined as
:attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
:param Collection remove_headers_on_redirect:
Sequence of headers to remove from the request when a response
indicating a redirect is returned before firing off the redirected
request.
"""
#: Default methods to be used for ``allowed_methods``
DEFAULT_ALLOWED_METHODS = frozenset(
["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
)
#: Default status codes to be used for ``status_forcelist``
RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
#: Default headers to be used for ``remove_headers_on_redirect``
DEFAULT_REMOVE_HEADERS_ON_REDIRECT = frozenset(
["Cookie", "Authorization", "Proxy-Authorization"]
)
#: Default maximum backoff time.
DEFAULT_BACKOFF_MAX = 120
# Backward compatibility; assigned outside of the class.
DEFAULT: typing.ClassVar[Retry]
def __init__(
self,
total: bool | int | None = 10,
connect: int | None = None,
read: int | None = None,
redirect: bool | int | None = None,
status: int | None = None,
other: int | None = None,
allowed_methods: typing.Collection[str] | None = DEFAULT_ALLOWED_METHODS,
status_forcelist: typing.Collection[int] | None = None,
backoff_factor: float = 0,
backoff_max: float = DEFAULT_BACKOFF_MAX,
raise_on_redirect: bool = True,
raise_on_status: bool = True,
history: tuple[RequestHistory, ...] | None = None,
respect_retry_after_header: bool = True,
remove_headers_on_redirect: typing.Collection[
str
] = DEFAULT_REMOVE_HEADERS_ON_REDIRECT,
backoff_jitter: float = 0.0,
) -> None:
self.total = total
self.connect = connect
self.read = read
self.status = status
self.other = other
if redirect is False or total is False:
redirect = 0
raise_on_redirect = False
self.redirect = redirect
self.status_forcelist = status_forcelist or set()
self.allowed_methods = allowed_methods
self.backoff_factor = backoff_factor
self.backoff_max = backoff_max
self.raise_on_redirect = raise_on_redirect
self.raise_on_status = raise_on_status
self.history = history or ()
self.respect_retry_after_header = respect_retry_after_header
self.remove_headers_on_redirect = frozenset(
h.lower() for h in remove_headers_on_redirect
)
self.backoff_jitter = backoff_jitter
def new(self, **kw: typing.Any) -> Self:
params = dict(
total=self.total,
connect=self.connect,
read=self.read,
redirect=self.redirect,
status=self.status,
other=self.other,
allowed_methods=self.allowed_methods,
status_forcelist=self.status_forcelist,
backoff_factor=self.backoff_factor,
backoff_max=self.backoff_max,
raise_on_redirect=self.raise_on_redirect,
raise_on_status=self.raise_on_status,
history=self.history,
remove_headers_on_redirect=self.remove_headers_on_redirect,
respect_retry_after_header=self.respect_retry_after_header,
backoff_jitter=self.backoff_jitter,
)
params.update(kw)
return type(self)(**params) # type: ignore[arg-type]
@classmethod
def from_int(
cls,
retries: Retry | bool | int | None,
redirect: bool | int | None = True,
default: Retry | bool | int | None = None,
) -> Retry:
"""Backwards-compatibility for the old retries format."""
if retries is None:
retries = default if default is not None else cls.DEFAULT
if isinstance(retries, Retry):
return retries
redirect = bool(redirect) and None
new_retries = cls(retries, redirect=redirect)
log.debug("Converted retries value: %r -> %r", retries, new_retries)
return new_retries
def get_backoff_time(self) -> float:
"""Formula for computing the current backoff
:rtype: float
"""
# We want to consider only the last consecutive errors sequence (Ignore redirects).
consecutive_errors_len = len(
list(
takewhile(lambda x: x.redirect_location is None, reversed(self.history))
)
)
if consecutive_errors_len <= 1:
return 0
backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
if self.backoff_jitter != 0.0:
backoff_value += random.random() * self.backoff_jitter
return float(max(0, min(self.backoff_max, backoff_value)))
def parse_retry_after(self, retry_after: str) -> float:
seconds: float
# Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
if re.match(r"^\s*[0-9]+\s*$", retry_after):
seconds = int(retry_after)
else:
retry_date_tuple = email.utils.parsedate_tz(retry_after)
if retry_date_tuple is None:
raise InvalidHeader(f"Invalid Retry-After header: {retry_after}")
retry_date = email.utils.mktime_tz(retry_date_tuple)
seconds = retry_date - time.time()
seconds = max(seconds, 0)
return seconds
def get_retry_after(self, response: BaseHTTPResponse) -> float | None:
"""Get the value of Retry-After in seconds."""
retry_after = response.headers.get("Retry-After")
if retry_after is None:
return None
return self.parse_retry_after(retry_after)
def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:
retry_after = self.get_retry_after(response)
if retry_after:
time.sleep(retry_after)
return True
return False
def _sleep_backoff(self) -> None:
backoff = self.get_backoff_time()
if backoff <= 0:
return
time.sleep(backoff)
def sleep(self, response: BaseHTTPResponse | None = None) -> None:
"""Sleep between retry attempts.
This method will respect a server's ``Retry-After`` response header
and sleep the duration of the time requested. If that is not present, it
will use an exponential backoff. By default, the backoff factor is 0 and
this method will return immediately.
"""
if self.respect_retry_after_header and response:
slept = self.sleep_for_retry(response)
if slept:
return
self._sleep_backoff()
def _is_connection_error(self, err: Exception) -> bool:
"""Errors when we're fairly sure that the server did not receive the
request, so it should be safe to retry.
"""
if isinstance(err, ProxyError):
err = err.original_error
return isinstance(err, ConnectTimeoutError)
def _is_read_error(self, err: Exception) -> bool:
"""Errors that occur after the request has been started, so we should
assume that the server began processing it.
"""
return isinstance(err, (ReadTimeoutError, ProtocolError))
def _is_method_retryable(self, method: str) -> bool:
"""Checks if a given HTTP method should be retried upon, depending if
it is included in the allowed_methods
"""
if self.allowed_methods and method.upper() not in self.allowed_methods:
return False
return True
def is_retry(
self, method: str, status_code: int, has_retry_after: bool = False
) -> bool:
"""Is this method/status code retryable? (Based on allowlists and control
variables such as the number of total retries to allow, whether to
respect the Retry-After header, whether this header is present, and
whether the returned status code is on the list of status codes to
be retried upon on the presence of the aforementioned header)
"""
if not self._is_method_retryable(method):
return False
if self.status_forcelist and status_code in self.status_forcelist:
return True
return bool(
self.total
and self.respect_retry_after_header
and has_retry_after
and (status_code in self.RETRY_AFTER_STATUS_CODES)
)
def is_exhausted(self) -> bool:
"""Are we out of retries?"""
retry_counts = [
x
for x in (
self.total,
self.connect,
self.read,
self.redirect,
self.status,
self.other,
)
if x
]
if not retry_counts:
return False
return min(retry_counts) < 0
def increment(
self,
method: str | None = None,
url: str | None = None,
response: BaseHTTPResponse | None = None,
error: Exception | None = None,
_pool: ConnectionPool | None = None,
_stacktrace: TracebackType | None = None,
) -> Self:
"""Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.BaseHTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
connect = self.connect
read = self.read
redirect = self.redirect
status_count = self.status
other = self.other
cause = "unknown"
status = None
redirect_location = None
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
elif error and self._is_read_error(error):
# Read retry?
if read is False or method is None or not self._is_method_retryable(method):
raise reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
elif error:
# Other retry?
if other is not None:
other -= 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
cause = "too many redirects"
response_redirect_location = response.get_redirect_location()
if response_redirect_location:
redirect_location = response_redirect_location
status = response.status
else:
# Incrementing because of a server error like a 500 in
# status_forcelist and the given method is in the allowed_methods
cause = ResponseError.GENERIC_ERROR
if response and response.status:
if status_count is not None:
status_count -= 1
cause = ResponseError.SPECIFIC_ERROR.format(status_code=response.status)
status = response.status
history = self.history + (
RequestHistory(method, url, error, status, redirect_location),
)
new_retry = self.new(
total=total,
connect=connect,
read=read,
redirect=redirect,
status=status_count,
other=other,
history=history,
)
if new_retry.is_exhausted():
reason = error or ResponseError(cause)
raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
return new_retry
def __repr__(self) -> str:
return (
f"{type(self).__name__}(total={self.total}, connect={self.connect}, "
f"read={self.read}, redirect={self.redirect}, status={self.status})"
)
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)

View File

@ -0,0 +1,524 @@
from __future__ import annotations
import hashlib
import hmac
import os
import socket
import sys
import typing
import warnings
from binascii import unhexlify
from ..exceptions import ProxySchemeUnsupported, SSLError
from .url import _BRACELESS_IPV6_ADDRZ_RE, _IPV4_RE
SSLContext = None
SSLTransport = None
HAS_NEVER_CHECK_COMMON_NAME = False
IS_PYOPENSSL = False
ALPN_PROTOCOLS = ["http/1.1"]
_TYPE_VERSION_INFO = tuple[int, int, int, str, int]
# Maps the length of a digest to a possible hash function producing this digest
HASHFUNC_MAP = {
length: getattr(hashlib, algorithm, None)
for length, algorithm in ((32, "md5"), (40, "sha1"), (64, "sha256"))
}
def _is_bpo_43522_fixed(
implementation_name: str,
version_info: _TYPE_VERSION_INFO,
pypy_version_info: _TYPE_VERSION_INFO | None,
) -> bool:
"""Return True for CPython 3.9.3+ or 3.10+ and PyPy 7.3.8+ where
setting SSLContext.hostname_checks_common_name to False works.
Outside of CPython and PyPy we don't know which implementations work
or not so we conservatively use our hostname matching as we know that works
on all implementations.
https://github.com/urllib3/urllib3/issues/2192#issuecomment-821832963
https://foss.heptapod.net/pypy/pypy/-/issues/3539
"""
if implementation_name == "pypy":
# https://foss.heptapod.net/pypy/pypy/-/issues/3129
return pypy_version_info >= (7, 3, 8) # type: ignore[operator]
elif implementation_name == "cpython":
major_minor = version_info[:2]
micro = version_info[2]
return (major_minor == (3, 9) and micro >= 3) or major_minor >= (3, 10)
else: # Defensive:
return False
def _is_has_never_check_common_name_reliable(
openssl_version: str,
openssl_version_number: int,
implementation_name: str,
version_info: _TYPE_VERSION_INFO,
pypy_version_info: _TYPE_VERSION_INFO | None,
) -> bool:
# As of May 2023, all released versions of LibreSSL fail to reject certificates with
# only common names, see https://github.com/urllib3/urllib3/pull/3024
is_openssl = openssl_version.startswith("OpenSSL ")
# Before fixing OpenSSL issue #14579, the SSL_new() API was not copying hostflags
# like X509_CHECK_FLAG_NEVER_CHECK_SUBJECT, which tripped up CPython.
# https://github.com/openssl/openssl/issues/14579
# This was released in OpenSSL 1.1.1l+ (>=0x101010cf)
is_openssl_issue_14579_fixed = openssl_version_number >= 0x101010CF
return is_openssl and (
is_openssl_issue_14579_fixed
or _is_bpo_43522_fixed(implementation_name, version_info, pypy_version_info)
)
if typing.TYPE_CHECKING:
from ssl import VerifyMode
from typing import TypedDict
from .ssltransport import SSLTransport as SSLTransportType
class _TYPE_PEER_CERT_RET_DICT(TypedDict, total=False):
subjectAltName: tuple[tuple[str, str], ...]
subject: tuple[tuple[tuple[str, str], ...], ...]
serialNumber: str
# Mapping from 'ssl.PROTOCOL_TLSX' to 'TLSVersion.X'
_SSL_VERSION_TO_TLS_VERSION: dict[int, int] = {}
try: # Do we have ssl at all?
import ssl
from ssl import ( # type: ignore[assignment]
CERT_REQUIRED,
HAS_NEVER_CHECK_COMMON_NAME,
OP_NO_COMPRESSION,
OP_NO_TICKET,
OPENSSL_VERSION,
OPENSSL_VERSION_NUMBER,
PROTOCOL_TLS,
PROTOCOL_TLS_CLIENT,
VERIFY_X509_STRICT,
OP_NO_SSLv2,
OP_NO_SSLv3,
SSLContext,
TLSVersion,
)
PROTOCOL_SSLv23 = PROTOCOL_TLS
# Needed for Python 3.9 which does not define this
VERIFY_X509_PARTIAL_CHAIN = getattr(ssl, "VERIFY_X509_PARTIAL_CHAIN", 0x80000)
# Setting SSLContext.hostname_checks_common_name = False didn't work before CPython
# 3.9.3, and 3.10 (but OK on PyPy) or OpenSSL 1.1.1l+
if HAS_NEVER_CHECK_COMMON_NAME and not _is_has_never_check_common_name_reliable(
OPENSSL_VERSION,
OPENSSL_VERSION_NUMBER,
sys.implementation.name,
sys.version_info,
sys.pypy_version_info if sys.implementation.name == "pypy" else None, # type: ignore[attr-defined]
): # Defensive: for Python < 3.9.3
HAS_NEVER_CHECK_COMMON_NAME = False
# Need to be careful here in case old TLS versions get
# removed in future 'ssl' module implementations.
for attr in ("TLSv1", "TLSv1_1", "TLSv1_2"):
try:
_SSL_VERSION_TO_TLS_VERSION[getattr(ssl, f"PROTOCOL_{attr}")] = getattr(
TLSVersion, attr
)
except AttributeError: # Defensive:
continue
from .ssltransport import SSLTransport # type: ignore[assignment]
except ImportError:
OP_NO_COMPRESSION = 0x20000 # type: ignore[assignment]
OP_NO_TICKET = 0x4000 # type: ignore[assignment]
OP_NO_SSLv2 = 0x1000000 # type: ignore[assignment]
OP_NO_SSLv3 = 0x2000000 # type: ignore[assignment]
PROTOCOL_SSLv23 = PROTOCOL_TLS = 2 # type: ignore[assignment]
PROTOCOL_TLS_CLIENT = 16 # type: ignore[assignment]
VERIFY_X509_PARTIAL_CHAIN = 0x80000
VERIFY_X509_STRICT = 0x20 # type: ignore[assignment]
_TYPE_PEER_CERT_RET = typing.Union["_TYPE_PEER_CERT_RET_DICT", bytes, None]
def assert_fingerprint(cert: bytes | None, fingerprint: str) -> None:
"""
Checks if given fingerprint matches the supplied certificate.
:param cert:
Certificate as bytes object.
:param fingerprint:
Fingerprint as string of hexdigits, can be interspersed by colons.
"""
if cert is None:
raise SSLError("No certificate for the peer.")
fingerprint = fingerprint.replace(":", "").lower()
digest_length = len(fingerprint)
if digest_length not in HASHFUNC_MAP:
raise SSLError(f"Fingerprint of invalid length: {fingerprint}")
hashfunc = HASHFUNC_MAP.get(digest_length)
if hashfunc is None:
raise SSLError(
f"Hash function implementation unavailable for fingerprint length: {digest_length}"
)
# We need encode() here for py32; works on py2 and p33.
fingerprint_bytes = unhexlify(fingerprint.encode())
cert_digest = hashfunc(cert).digest()
if not hmac.compare_digest(cert_digest, fingerprint_bytes):
raise SSLError(
f'Fingerprints did not match. Expected "{fingerprint}", got "{cert_digest.hex()}"'
)
def resolve_cert_reqs(candidate: None | int | str) -> VerifyMode:
"""
Resolves the argument to a numeric constant, which can be passed to
the wrap_socket function/method from the ssl module.
Defaults to :data:`ssl.CERT_REQUIRED`.
If given a string it is assumed to be the name of the constant in the
:mod:`ssl` module or its abbreviation.
(So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
If it's neither `None` nor a string we assume it is already the numeric
constant which can directly be passed to wrap_socket.
"""
if candidate is None:
return CERT_REQUIRED
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, "CERT_" + candidate)
return res # type: ignore[no-any-return]
return candidate # type: ignore[return-value]
def resolve_ssl_version(candidate: None | int | str) -> int:
"""
like resolve_cert_reqs
"""
if candidate is None:
return PROTOCOL_TLS
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, "PROTOCOL_" + candidate)
return typing.cast(int, res)
return candidate
def create_urllib3_context(
ssl_version: int | None = None,
cert_reqs: int | None = None,
options: int | None = None,
ciphers: str | None = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
verify_flags: int | None = None,
) -> ssl.SSLContext:
"""Creates and configures an :class:`ssl.SSLContext` instance for use with urllib3.
:param ssl_version:
The desired protocol version to use. This will default to
PROTOCOL_SSLv23 which will negotiate the highest protocol that both
the server and your installation of OpenSSL support.
This parameter is deprecated instead use 'ssl_minimum_version'.
:param ssl_minimum_version:
The minimum version of TLS to be used. Use the 'ssl.TLSVersion' enum for specifying the value.
:param ssl_maximum_version:
The maximum version of TLS to be used. Use the 'ssl.TLSVersion' enum for specifying the value.
Not recommended to set to anything other than 'ssl.TLSVersion.MAXIMUM_SUPPORTED' which is the
default value.
:param cert_reqs:
Whether to require the certificate verification. This defaults to
``ssl.CERT_REQUIRED``.
:param options:
Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``, and ``ssl.OP_NO_TICKET``.
:param ciphers:
Which cipher suites to allow the server to select. Defaults to either system configured
ciphers if OpenSSL 1.1.1+, otherwise uses a secure default set of ciphers.
:param verify_flags:
The flags for certificate verification operations. These default to
``ssl.VERIFY_X509_PARTIAL_CHAIN`` and ``ssl.VERIFY_X509_STRICT`` for Python 3.13+.
:returns:
Constructed SSLContext object with specified options
:rtype: SSLContext
"""
if SSLContext is None:
raise TypeError("Can't create an SSLContext object without an ssl module")
# This means 'ssl_version' was specified as an exact value.
if ssl_version not in (None, PROTOCOL_TLS, PROTOCOL_TLS_CLIENT):
# Disallow setting 'ssl_version' and 'ssl_minimum|maximum_version'
# to avoid conflicts.
if ssl_minimum_version is not None or ssl_maximum_version is not None:
raise ValueError(
"Can't specify both 'ssl_version' and either "
"'ssl_minimum_version' or 'ssl_maximum_version'"
)
# 'ssl_version' is deprecated and will be removed in the future.
else:
# Use 'ssl_minimum_version' and 'ssl_maximum_version' instead.
ssl_minimum_version = _SSL_VERSION_TO_TLS_VERSION.get(
ssl_version, TLSVersion.MINIMUM_SUPPORTED
)
ssl_maximum_version = _SSL_VERSION_TO_TLS_VERSION.get(
ssl_version, TLSVersion.MAXIMUM_SUPPORTED
)
# This warning message is pushing users to use 'ssl_minimum_version'
# instead of both min/max. Best practice is to only set the minimum version and
# keep the maximum version to be it's default value: 'TLSVersion.MAXIMUM_SUPPORTED'
warnings.warn(
"'ssl_version' option is deprecated and will be "
"removed in urllib3 v2.1.0. Instead use 'ssl_minimum_version'",
category=DeprecationWarning,
stacklevel=2,
)
# PROTOCOL_TLS is deprecated in Python 3.10 so we always use PROTOCOL_TLS_CLIENT
context = SSLContext(PROTOCOL_TLS_CLIENT)
if ssl_minimum_version is not None:
context.minimum_version = ssl_minimum_version
else: # Python <3.10 defaults to 'MINIMUM_SUPPORTED' so explicitly set TLSv1.2 here
context.minimum_version = TLSVersion.TLSv1_2
if ssl_maximum_version is not None:
context.maximum_version = ssl_maximum_version
# Unless we're given ciphers defer to either system ciphers in
# the case of OpenSSL 1.1.1+ or use our own secure default ciphers.
if ciphers:
context.set_ciphers(ciphers)
# Setting the default here, as we may have no ssl module on import
cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
if options is None:
options = 0
# SSLv2 is easily broken and is considered harmful and dangerous
options |= OP_NO_SSLv2
# SSLv3 has several problems and is now dangerous
options |= OP_NO_SSLv3
# Disable compression to prevent CRIME attacks for OpenSSL 1.0+
# (issue #309)
options |= OP_NO_COMPRESSION
# TLSv1.2 only. Unless set explicitly, do not request tickets.
# This may save some bandwidth on wire, and although the ticket is encrypted,
# there is a risk associated with it being on wire,
# if the server is not rotating its ticketing keys properly.
options |= OP_NO_TICKET
context.options |= options
if verify_flags is None:
verify_flags = 0
# In Python 3.13+ ssl.create_default_context() sets VERIFY_X509_PARTIAL_CHAIN
# and VERIFY_X509_STRICT so we do the same
if sys.version_info >= (3, 13):
verify_flags |= VERIFY_X509_PARTIAL_CHAIN
verify_flags |= VERIFY_X509_STRICT
context.verify_flags |= verify_flags
# Enable post-handshake authentication for TLS 1.3, see GH #1634. PHA is
# necessary for conditional client cert authentication with TLS 1.3.
# The attribute is None for OpenSSL <= 1.1.0 or does not exist when using
# an SSLContext created by pyOpenSSL.
if getattr(context, "post_handshake_auth", None) is not None:
context.post_handshake_auth = True
# The order of the below lines setting verify_mode and check_hostname
# matter due to safe-guards SSLContext has to prevent an SSLContext with
# check_hostname=True, verify_mode=NONE/OPTIONAL.
# We always set 'check_hostname=False' for pyOpenSSL so we rely on our own
# 'ssl.match_hostname()' implementation.
if cert_reqs == ssl.CERT_REQUIRED and not IS_PYOPENSSL:
context.verify_mode = cert_reqs
context.check_hostname = True
else:
context.check_hostname = False
context.verify_mode = cert_reqs
try:
context.hostname_checks_common_name = False
except AttributeError: # Defensive: for CPython < 3.9.3; for PyPy < 7.3.8
pass
sslkeylogfile = os.environ.get("SSLKEYLOGFILE")
if sslkeylogfile:
context.keylog_filename = sslkeylogfile
return context
@typing.overload
def ssl_wrap_socket(
sock: socket.socket,
keyfile: str | None = ...,
certfile: str | None = ...,
cert_reqs: int | None = ...,
ca_certs: str | None = ...,
server_hostname: str | None = ...,
ssl_version: int | None = ...,
ciphers: str | None = ...,
ssl_context: ssl.SSLContext | None = ...,
ca_cert_dir: str | None = ...,
key_password: str | None = ...,
ca_cert_data: None | str | bytes = ...,
tls_in_tls: typing.Literal[False] = ...,
) -> ssl.SSLSocket: ...
@typing.overload
def ssl_wrap_socket(
sock: socket.socket,
keyfile: str | None = ...,
certfile: str | None = ...,
cert_reqs: int | None = ...,
ca_certs: str | None = ...,
server_hostname: str | None = ...,
ssl_version: int | None = ...,
ciphers: str | None = ...,
ssl_context: ssl.SSLContext | None = ...,
ca_cert_dir: str | None = ...,
key_password: str | None = ...,
ca_cert_data: None | str | bytes = ...,
tls_in_tls: bool = ...,
) -> ssl.SSLSocket | SSLTransportType: ...
def ssl_wrap_socket(
sock: socket.socket,
keyfile: str | None = None,
certfile: str | None = None,
cert_reqs: int | None = None,
ca_certs: str | None = None,
server_hostname: str | None = None,
ssl_version: int | None = None,
ciphers: str | None = None,
ssl_context: ssl.SSLContext | None = None,
ca_cert_dir: str | None = None,
key_password: str | None = None,
ca_cert_data: None | str | bytes = None,
tls_in_tls: bool = False,
) -> ssl.SSLSocket | SSLTransportType:
"""
All arguments except for server_hostname, ssl_context, tls_in_tls, ca_cert_data and
ca_cert_dir have the same meaning as they do when using
:func:`ssl.create_default_context`, :meth:`ssl.SSLContext.load_cert_chain`,
:meth:`ssl.SSLContext.set_ciphers` and :meth:`ssl.SSLContext.wrap_socket`.
:param server_hostname:
When SNI is supported, the expected hostname of the certificate
:param ssl_context:
A pre-made :class:`SSLContext` object. If none is provided, one will
be created using :func:`create_urllib3_context`.
:param ciphers:
A string of ciphers we wish the client to support.
:param ca_cert_dir:
A directory containing CA certificates in multiple separate files, as
supported by OpenSSL's -CApath flag or the capath argument to
SSLContext.load_verify_locations().
:param key_password:
Optional password if the keyfile is encrypted.
:param ca_cert_data:
Optional string containing CA certificates in PEM format suitable for
passing as the cadata parameter to SSLContext.load_verify_locations()
:param tls_in_tls:
Use SSLTransport to wrap the existing socket.
"""
context = ssl_context
if context is None:
# Note: This branch of code and all the variables in it are only used in tests.
# We should consider deprecating and removing this code.
context = create_urllib3_context(ssl_version, cert_reqs, ciphers=ciphers)
if ca_certs or ca_cert_dir or ca_cert_data:
try:
context.load_verify_locations(ca_certs, ca_cert_dir, ca_cert_data)
except OSError as e:
raise SSLError(e) from e
elif ssl_context is None and hasattr(context, "load_default_certs"):
# try to load OS default certs; works well on Windows.
context.load_default_certs()
# Attempt to detect if we get the goofy behavior of the
# keyfile being encrypted and OpenSSL asking for the
# passphrase via the terminal and instead error out.
if keyfile and key_password is None and _is_key_file_encrypted(keyfile):
raise SSLError("Client private key is encrypted, password is required")
if certfile:
if key_password is None:
context.load_cert_chain(certfile, keyfile)
else:
context.load_cert_chain(certfile, keyfile, key_password)
context.set_alpn_protocols(ALPN_PROTOCOLS)
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)
return ssl_sock
def is_ipaddress(hostname: str | bytes) -> bool:
"""Detects whether the hostname given is an IPv4 or IPv6 address.
Also detects IPv6 addresses with Zone IDs.
:param str hostname: Hostname to examine.
:return: True if the hostname is an IP address, False otherwise.
"""
if isinstance(hostname, bytes):
# IDN A-label bytes are ASCII compatible.
hostname = hostname.decode("ascii")
return bool(_IPV4_RE.match(hostname) or _BRACELESS_IPV6_ADDRZ_RE.match(hostname))
def _is_key_file_encrypted(key_file: str) -> bool:
"""Detects if a key file is encrypted or not."""
with open(key_file) as f:
for line in f:
# Look for Proc-Type: 4,ENCRYPTED
if "ENCRYPTED" in line:
return True
return False
def _ssl_wrap_socket_impl(
sock: socket.socket,
ssl_context: ssl.SSLContext,
tls_in_tls: bool,
server_hostname: str | None = None,
) -> ssl.SSLSocket | SSLTransportType:
if tls_in_tls:
if not SSLTransport:
# Import error, ssl is not available.
raise ProxySchemeUnsupported(
"TLS in TLS requires support for the 'ssl' module"
)
SSLTransport._validate_ssl_context_for_tls_in_tls(ssl_context)
return SSLTransport(sock, ssl_context, server_hostname)
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)

View File

@ -0,0 +1,159 @@
"""The match_hostname() function from Python 3.5, essential when using SSL."""
# Note: This file is under the PSF license as the code comes from the python
# stdlib. http://docs.python.org/3/license.html
# It is modified to remove commonName support.
from __future__ import annotations
import ipaddress
import re
import typing
from ipaddress import IPv4Address, IPv6Address
if typing.TYPE_CHECKING:
from .ssl_ import _TYPE_PEER_CERT_RET_DICT
__version__ = "3.5.0.1"
class CertificateError(ValueError):
pass
def _dnsname_match(
dn: typing.Any, hostname: str, max_wildcards: int = 1
) -> typing.Match[str] | None | bool:
"""Matching according to RFC 6125, section 6.4.3
http://tools.ietf.org/html/rfc6125#section-6.4.3
"""
pats = []
if not dn:
return False
# Ported from python3-syntax:
# leftmost, *remainder = dn.split(r'.')
parts = dn.split(r".")
leftmost = parts[0]
remainder = parts[1:]
wildcards = leftmost.count("*")
if wildcards > max_wildcards:
# Issue #17980: avoid denials of service by refusing more
# than one wildcard per fragment. A survey of established
# policy among SSL implementations showed it to be a
# reasonable choice.
raise CertificateError(
"too many wildcards in certificate DNS name: " + repr(dn)
)
# speed up common case w/o wildcards
if not wildcards:
return bool(dn.lower() == hostname.lower())
# RFC 6125, section 6.4.3, subitem 1.
# The client SHOULD NOT attempt to match a presented identifier in which
# the wildcard character comprises a label other than the left-most label.
if leftmost == "*":
# When '*' is a fragment by itself, it matches a non-empty dotless
# fragment.
pats.append("[^.]+")
elif leftmost.startswith("xn--") or hostname.startswith("xn--"):
# RFC 6125, section 6.4.3, subitem 3.
# The client SHOULD NOT attempt to match a presented identifier
# where the wildcard character is embedded within an A-label or
# U-label of an internationalized domain name.
pats.append(re.escape(leftmost))
else:
# Otherwise, '*' matches any dotless string, e.g. www*
pats.append(re.escape(leftmost).replace(r"\*", "[^.]*"))
# add the remaining fragments, ignore any wildcards
for frag in remainder:
pats.append(re.escape(frag))
pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE)
return pat.match(hostname)
def _ipaddress_match(ipname: str, host_ip: IPv4Address | IPv6Address) -> bool:
"""Exact matching of IP addresses.
RFC 9110 section 4.3.5: "A reference identity of IP-ID contains the decoded
bytes of the IP address. An IP version 4 address is 4 octets, and an IP
version 6 address is 16 octets. [...] A reference identity of type IP-ID
matches if the address is identical to an iPAddress value of the
subjectAltName extension of the certificate."
"""
# OpenSSL may add a trailing newline to a subjectAltName's IP address
# Divergence from upstream: ipaddress can't handle byte str
ip = ipaddress.ip_address(ipname.rstrip())
return bool(ip.packed == host_ip.packed)
def match_hostname(
cert: _TYPE_PEER_CERT_RET_DICT | None,
hostname: str,
hostname_checks_common_name: bool = False,
) -> None:
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
rules are followed, but IP addresses are not accepted for *hostname*.
CertificateError is raised on failure. On success, the function
returns nothing.
"""
if not cert:
raise ValueError(
"empty or no certificate, match_hostname needs a "
"SSL socket or SSL context with either "
"CERT_OPTIONAL or CERT_REQUIRED"
)
try:
# Divergence from upstream: ipaddress can't handle byte str
#
# The ipaddress module shipped with Python < 3.9 does not support
# scoped IPv6 addresses so we unconditionally strip the Zone IDs for
# now. Once we drop support for Python 3.9 we can remove this branch.
if "%" in hostname:
host_ip = ipaddress.ip_address(hostname[: hostname.rfind("%")])
else:
host_ip = ipaddress.ip_address(hostname)
except ValueError:
# Not an IP address (common case)
host_ip = None
dnsnames = []
san: tuple[tuple[str, str], ...] = cert.get("subjectAltName", ())
key: str
value: str
for key, value in san:
if key == "DNS":
if host_ip is None and _dnsname_match(value, hostname):
return
dnsnames.append(value)
elif key == "IP Address":
if host_ip is not None and _ipaddress_match(value, host_ip):
return
dnsnames.append(value)
# We only check 'commonName' if it's enabled and we're not verifying
# an IP address. IP addresses aren't valid within 'commonName'.
if hostname_checks_common_name and host_ip is None and not dnsnames:
for sub in cert.get("subject", ()):
for key, value in sub:
if key == "commonName":
if _dnsname_match(value, hostname):
return
dnsnames.append(value) # Defensive: for Python < 3.9.3
if len(dnsnames) > 1:
raise CertificateError(
"hostname %r "
"doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames)))
)
elif len(dnsnames) == 1:
raise CertificateError(f"hostname {hostname!r} doesn't match {dnsnames[0]!r}")
else:
raise CertificateError("no appropriate subjectAltName fields were found")

View File

@ -0,0 +1,271 @@
from __future__ import annotations
import io
import socket
import ssl
import typing
from ..exceptions import ProxySchemeUnsupported
if typing.TYPE_CHECKING:
from typing_extensions import Self
from .ssl_ import _TYPE_PEER_CERT_RET, _TYPE_PEER_CERT_RET_DICT
_WriteBuffer = typing.Union[bytearray, memoryview]
_ReturnValue = typing.TypeVar("_ReturnValue")
SSL_BLOCKSIZE = 16384
class SSLTransport:
"""
The SSLTransport wraps an existing socket and establishes an SSL connection.
Contrary to Python's implementation of SSLSocket, it allows you to chain
multiple TLS connections together. It's particularly useful if you need to
implement TLS within TLS.
The class supports most of the socket API operations.
"""
@staticmethod
def _validate_ssl_context_for_tls_in_tls(ssl_context: ssl.SSLContext) -> None:
"""
Raises a ProxySchemeUnsupported if the provided ssl_context can't be used
for TLS in TLS.
The only requirement is that the ssl_context provides the 'wrap_bio'
methods.
"""
if not hasattr(ssl_context, "wrap_bio"):
raise ProxySchemeUnsupported(
"TLS in TLS requires SSLContext.wrap_bio() which isn't "
"available on non-native SSLContext"
)
def __init__(
self,
socket: socket.socket,
ssl_context: ssl.SSLContext,
server_hostname: str | None = None,
suppress_ragged_eofs: bool = True,
) -> None:
"""
Create an SSLTransport around socket using the provided ssl_context.
"""
self.incoming = ssl.MemoryBIO()
self.outgoing = ssl.MemoryBIO()
self.suppress_ragged_eofs = suppress_ragged_eofs
self.socket = socket
self.sslobj = ssl_context.wrap_bio(
self.incoming, self.outgoing, server_hostname=server_hostname
)
# Perform initial handshake.
self._ssl_io_loop(self.sslobj.do_handshake)
def __enter__(self) -> Self:
return self
def __exit__(self, *_: typing.Any) -> None:
self.close()
def fileno(self) -> int:
return self.socket.fileno()
def read(self, len: int = 1024, buffer: typing.Any | None = None) -> int | bytes:
return self._wrap_ssl_read(len, buffer)
def recv(self, buflen: int = 1024, flags: int = 0) -> int | bytes:
if flags != 0:
raise ValueError("non-zero flags not allowed in calls to recv")
return self._wrap_ssl_read(buflen)
def recv_into(
self,
buffer: _WriteBuffer,
nbytes: int | None = None,
flags: int = 0,
) -> None | int | bytes:
if flags != 0:
raise ValueError("non-zero flags not allowed in calls to recv_into")
if nbytes is None:
nbytes = len(buffer)
return self.read(nbytes, buffer)
def sendall(self, data: bytes, flags: int = 0) -> None:
if flags != 0:
raise ValueError("non-zero flags not allowed in calls to sendall")
count = 0
with memoryview(data) as view, view.cast("B") as byte_view:
amount = len(byte_view)
while count < amount:
v = self.send(byte_view[count:])
count += v
def send(self, data: bytes, flags: int = 0) -> int:
if flags != 0:
raise ValueError("non-zero flags not allowed in calls to send")
return self._ssl_io_loop(self.sslobj.write, data)
def makefile(
self,
mode: str,
buffering: int | None = None,
*,
encoding: str | None = None,
errors: str | None = None,
newline: str | None = None,
) -> typing.BinaryIO | typing.TextIO | socket.SocketIO:
"""
Python's httpclient uses makefile and buffered io when reading HTTP
messages and we need to support it.
This is unfortunately a copy and paste of socket.py makefile with small
changes to point to the socket directly.
"""
if not set(mode) <= {"r", "w", "b"}:
raise ValueError(f"invalid mode {mode!r} (only r, w, b allowed)")
writing = "w" in mode
reading = "r" in mode or not writing
assert reading or writing
binary = "b" in mode
rawmode = ""
if reading:
rawmode += "r"
if writing:
rawmode += "w"
raw = socket.SocketIO(self, rawmode) # type: ignore[arg-type]
self.socket._io_refs += 1 # type: ignore[attr-defined]
if buffering is None:
buffering = -1
if buffering < 0:
buffering = io.DEFAULT_BUFFER_SIZE
if buffering == 0:
if not binary:
raise ValueError("unbuffered streams must be binary")
return raw
buffer: typing.BinaryIO
if reading and writing:
buffer = io.BufferedRWPair(raw, raw, buffering) # type: ignore[assignment]
elif reading:
buffer = io.BufferedReader(raw, buffering)
else:
assert writing
buffer = io.BufferedWriter(raw, buffering)
if binary:
return buffer
text = io.TextIOWrapper(buffer, encoding, errors, newline)
text.mode = mode # type: ignore[misc]
return text
def unwrap(self) -> None:
self._ssl_io_loop(self.sslobj.unwrap)
def close(self) -> None:
self.socket.close()
@typing.overload
def getpeercert(
self, binary_form: typing.Literal[False] = ...
) -> _TYPE_PEER_CERT_RET_DICT | None: ...
@typing.overload
def getpeercert(self, binary_form: typing.Literal[True]) -> bytes | None: ...
def getpeercert(self, binary_form: bool = False) -> _TYPE_PEER_CERT_RET:
return self.sslobj.getpeercert(binary_form) # type: ignore[return-value]
def version(self) -> str | None:
return self.sslobj.version()
def cipher(self) -> tuple[str, str, int] | None:
return self.sslobj.cipher()
def selected_alpn_protocol(self) -> str | None:
return self.sslobj.selected_alpn_protocol()
def shared_ciphers(self) -> list[tuple[str, str, int]] | None:
return self.sslobj.shared_ciphers()
def compression(self) -> str | None:
return self.sslobj.compression()
def settimeout(self, value: float | None) -> None:
self.socket.settimeout(value)
def gettimeout(self) -> float | None:
return self.socket.gettimeout()
def _decref_socketios(self) -> None:
self.socket._decref_socketios() # type: ignore[attr-defined]
def _wrap_ssl_read(self, len: int, buffer: bytearray | None = None) -> int | bytes:
try:
return self._ssl_io_loop(self.sslobj.read, len, buffer)
except ssl.SSLError as e:
if e.errno == ssl.SSL_ERROR_EOF and self.suppress_ragged_eofs:
return 0 # eof, return 0.
else:
raise
# func is sslobj.do_handshake or sslobj.unwrap
@typing.overload
def _ssl_io_loop(self, func: typing.Callable[[], None]) -> None: ...
# func is sslobj.write, arg1 is data
@typing.overload
def _ssl_io_loop(self, func: typing.Callable[[bytes], int], arg1: bytes) -> int: ...
# func is sslobj.read, arg1 is len, arg2 is buffer
@typing.overload
def _ssl_io_loop(
self,
func: typing.Callable[[int, bytearray | None], bytes],
arg1: int,
arg2: bytearray | None,
) -> bytes: ...
def _ssl_io_loop(
self,
func: typing.Callable[..., _ReturnValue],
arg1: None | bytes | int = None,
arg2: bytearray | None = None,
) -> _ReturnValue:
"""Performs an I/O loop between incoming/outgoing and the socket."""
should_loop = True
ret = None
while should_loop:
errno = None
try:
if arg1 is None and arg2 is None:
ret = func()
elif arg2 is None:
ret = func(arg1)
else:
ret = func(arg1, arg2)
except ssl.SSLError as e:
if e.errno not in (ssl.SSL_ERROR_WANT_READ, ssl.SSL_ERROR_WANT_WRITE):
# WANT_READ, and WANT_WRITE are expected, others are not.
raise e
errno = e.errno
buf = self.outgoing.read()
self.socket.sendall(buf)
if errno is None:
should_loop = False
elif errno == ssl.SSL_ERROR_WANT_READ:
buf = self.socket.recv(SSL_BLOCKSIZE)
if buf:
self.incoming.write(buf)
else:
self.incoming.write_eof()
return typing.cast(_ReturnValue, ret)

View File

@ -0,0 +1,275 @@
from __future__ import annotations
import time
import typing
from enum import Enum
from socket import getdefaulttimeout
from ..exceptions import TimeoutStateError
if typing.TYPE_CHECKING:
from typing import Final
class _TYPE_DEFAULT(Enum):
# This value should never be passed to socket.settimeout() so for safety we use a -1.
# socket.settimout() raises a ValueError for negative values.
token = -1
_DEFAULT_TIMEOUT: Final[_TYPE_DEFAULT] = _TYPE_DEFAULT.token
_TYPE_TIMEOUT = typing.Optional[typing.Union[float, _TYPE_DEFAULT]]
class Timeout:
"""Timeout configuration.
Timeouts can be defined as a default for a pool:
.. code-block:: python
import urllib3
timeout = urllib3.util.Timeout(connect=2.0, read=7.0)
http = urllib3.PoolManager(timeout=timeout)
resp = http.request("GET", "https://example.com/")
print(resp.status)
Or per-request (which overrides the default for the pool):
.. code-block:: python
response = http.request("GET", "https://example.com/", timeout=Timeout(10))
Timeouts can be disabled by setting all the parameters to ``None``:
.. code-block:: python
no_timeout = Timeout(connect=None, read=None)
response = http.request("GET", "https://example.com/", timeout=no_timeout)
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: int, float, or None
:param connect:
The maximum amount of time (in seconds) to wait for a connection
attempt to a server to succeed. Omitting the parameter will default the
connect timeout to the system default, probably `the global default
timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout for connection attempts.
:type connect: int, float, or None
:param read:
The maximum amount of time (in seconds) to wait between consecutive
read operations for a response from the server. Omitting the parameter
will default the read timeout to the system default, probably `the
global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout.
:type read: int, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response.
For example, Python's DNS resolver does not obey the timeout specified
on the socket. Other factors that can affect total request time include
high CPU load, high swap, the program running at a low priority level,
or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
not the total amount of time for the request to return a complete
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not trigger, even though the request will take
several minutes to complete.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT
def __init__(
self,
total: _TYPE_TIMEOUT = None,
connect: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
read: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
) -> None:
self._connect = self._validate_timeout(connect, "connect")
self._read = self._validate_timeout(read, "read")
self.total = self._validate_timeout(total, "total")
self._start_connect: float | None = None
def __repr__(self) -> str:
return f"{type(self).__name__}(connect={self._connect!r}, read={self._read!r}, total={self.total!r})"
# __str__ provided for backwards compatibility
__str__ = __repr__
@staticmethod
def resolve_default_timeout(timeout: _TYPE_TIMEOUT) -> float | None:
return getdefaulttimeout() if timeout is _DEFAULT_TIMEOUT else timeout
@classmethod
def _validate_timeout(cls, value: _TYPE_TIMEOUT, name: str) -> _TYPE_TIMEOUT:
"""Check that a timeout attribute is valid.
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is
used to specify in error messages.
:return: The validated and casted version of the given value.
:raises ValueError: If it is a numeric value less than or equal to
zero, or the type is not an integer, float, or None.
"""
if value is None or value is _DEFAULT_TIMEOUT:
return value
if isinstance(value, bool):
raise ValueError(
"Timeout cannot be a boolean value. It must "
"be an int, float or None."
)
try:
float(value)
except (TypeError, ValueError):
raise ValueError(
"Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value)
) from None
try:
if value <= 0:
raise ValueError(
"Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than or equal to 0." % (name, value)
)
except TypeError:
raise ValueError(
"Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value)
) from None
return value
@classmethod
def from_float(cls, timeout: _TYPE_TIMEOUT) -> Timeout:
"""Create a new Timeout from a legacy timeout value.
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value
passed to this function.
:param timeout: The legacy timeout value.
:type timeout: integer, float, :attr:`urllib3.util.Timeout.DEFAULT_TIMEOUT`, or None
:return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
def clone(self) -> Timeout:
"""Create a copy of the timeout object
Timeout properties are stored per-pool but each request needs a fresh
Timeout object to ensure each one has its own start/stop configured.
:return: a copy of the timeout object
:rtype: :class:`Timeout`
"""
# We can't use copy.deepcopy because that will also create a new object
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
# detect the user default.
return Timeout(connect=self._connect, read=self._read, total=self.total)
def start_connect(self) -> float:
"""Start the timeout clock, used during a connect() attempt
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to start a timer that has been started already.
"""
if self._start_connect is not None:
raise TimeoutStateError("Timeout timer has already been started.")
self._start_connect = time.monotonic()
return self._start_connect
def get_connect_duration(self) -> float:
"""Gets the time elapsed since the call to :meth:`start_connect`.
:return: Elapsed time in seconds.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
"""
if self._start_connect is None:
raise TimeoutStateError(
"Can't get connect duration for timer that has not started."
)
return time.monotonic() - self._start_connect
@property
def connect_timeout(self) -> _TYPE_TIMEOUT:
"""Get the value to use when setting a connection timeout.
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
return self._connect
if self._connect is None or self._connect is _DEFAULT_TIMEOUT:
return self.total
return min(self._connect, self.total) # type: ignore[type-var]
@property
def read_timeout(self) -> float | None:
"""Get the value for the read timeout.
This assumes some time has elapsed in the connection timeout and
computes the read timeout appropriately.
If self.total is set, the read timeout is dependent on the amount of
time taken by the connect timeout. If the connection time has not been
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: Value to use for the read timeout.
:rtype: int, float or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
"""
if (
self.total is not None
and self.total is not _DEFAULT_TIMEOUT
and self._read is not None
and self._read is not _DEFAULT_TIMEOUT
):
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(), self._read))
elif self.total is not None and self.total is not _DEFAULT_TIMEOUT:
return max(0, self.total - self.get_connect_duration())
else:
return self.resolve_default_timeout(self._read)

View File

@ -0,0 +1,469 @@
from __future__ import annotations
import re
import typing
from ..exceptions import LocationParseError
from .util import to_str
# We only want to normalize urls with an HTTP(S) scheme.
# urllib3 infers URLs without a scheme (None) to be http.
_NORMALIZABLE_SCHEMES = ("http", "https", None)
# Almost all of these patterns were derived from the
# 'rfc3986' module: https://github.com/python-hyper/rfc3986
_PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
_SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
_URI_RE = re.compile(
r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"
r"(?://([^\\/?#]*))?"
r"([^?#]*)"
r"(?:\?([^#]*))?"
r"(?:#(.*))?$",
re.UNICODE | re.DOTALL,
)
_IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
_HEX_PAT = "[0-9A-Fa-f]{1,4}"
_LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=_HEX_PAT, ipv4=_IPV4_PAT)
_subs = {"hex": _HEX_PAT, "ls32": _LS32_PAT}
_variations = [
# 6( h16 ":" ) ls32
"(?:%(hex)s:){6}%(ls32)s",
# "::" 5( h16 ":" ) ls32
"::(?:%(hex)s:){5}%(ls32)s",
# [ h16 ] "::" 4( h16 ":" ) ls32
"(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
"(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
"(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
"(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
# [ *4( h16 ":" ) h16 ] "::" ls32
"(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
# [ *5( h16 ":" ) h16 ] "::" h16
"(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
# [ *6( h16 ":" ) h16 ] "::"
"(?:(?:%(hex)s:){0,6}%(hex)s)?::",
]
_UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
_IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
_ZONE_ID_PAT = "(?:%25|%)(?:[" + _UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
_IPV6_ADDRZ_PAT = r"\[" + _IPV6_PAT + r"(?:" + _ZONE_ID_PAT + r")?\]"
_REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
_TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
_IPV4_RE = re.compile("^" + _IPV4_PAT + "$")
_IPV6_RE = re.compile("^" + _IPV6_PAT + "$")
_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT + "$")
_BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + _IPV6_ADDRZ_PAT[2:-2] + "$")
_ZONE_ID_RE = re.compile("(" + _ZONE_ID_PAT + r")\]$")
_HOST_PORT_PAT = ("^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
_REG_NAME_PAT,
_IPV4_PAT,
_IPV6_ADDRZ_PAT,
)
_HOST_PORT_RE = re.compile(_HOST_PORT_PAT, re.UNICODE | re.DOTALL)
_UNRESERVED_CHARS = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
)
_SUB_DELIM_CHARS = set("!$&'()*+,;=")
_USERINFO_CHARS = _UNRESERVED_CHARS | _SUB_DELIM_CHARS | {":"}
_PATH_CHARS = _USERINFO_CHARS | {"@", "/"}
_QUERY_CHARS = _FRAGMENT_CHARS = _PATH_CHARS | {"?"}
class Url(
typing.NamedTuple(
"Url",
[
("scheme", typing.Optional[str]),
("auth", typing.Optional[str]),
("host", typing.Optional[str]),
("port", typing.Optional[int]),
("path", typing.Optional[str]),
("query", typing.Optional[str]),
("fragment", typing.Optional[str]),
],
)
):
"""
Data structure for representing an HTTP URL. Used as a return value for
:func:`parse_url`. Both the scheme and host are normalized as they are
both case-insensitive according to RFC 3986.
"""
def __new__( # type: ignore[no-untyped-def]
cls,
scheme: str | None = None,
auth: str | None = None,
host: str | None = None,
port: int | None = None,
path: str | None = None,
query: str | None = None,
fragment: str | None = None,
):
if path and not path.startswith("/"):
path = "/" + path
if scheme is not None:
scheme = scheme.lower()
return super().__new__(cls, scheme, auth, host, port, path, query, fragment)
@property
def hostname(self) -> str | None:
"""For backwards-compatibility with urlparse. We're nice like that."""
return self.host
@property
def request_uri(self) -> str:
"""Absolute path including the query string."""
uri = self.path or "/"
if self.query is not None:
uri += "?" + self.query
return uri
@property
def authority(self) -> str | None:
"""
Authority component as defined in RFC 3986 3.2.
This includes userinfo (auth), host and port.
i.e.
userinfo@host:port
"""
userinfo = self.auth
netloc = self.netloc
if netloc is None or userinfo is None:
return netloc
else:
return f"{userinfo}@{netloc}"
@property
def netloc(self) -> str | None:
"""
Network location including host and port.
If you need the equivalent of urllib.parse's ``netloc``,
use the ``authority`` property instead.
"""
if self.host is None:
return None
if self.port:
return f"{self.host}:{self.port}"
return self.host
@property
def url(self) -> str:
"""
Convert self into a url
This function should more or less round-trip with :func:`.parse_url`. The
returned url may not be exactly the same as the url inputted to
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
with a blank port will have : removed).
Example:
.. code-block:: python
import urllib3
U = urllib3.util.parse_url("https://google.com/mail/")
print(U.url)
# "https://google.com/mail/"
print( urllib3.util.Url("https", "username:password",
"host.com", 80, "/path", "query", "fragment"
).url
)
# "https://username:password@host.com:80/path?query#fragment"
"""
scheme, auth, host, port, path, query, fragment = self
url = ""
# We use "is not None" we want things to happen with empty strings (or 0 port)
if scheme is not None:
url += scheme + "://"
if auth is not None:
url += auth + "@"
if host is not None:
url += host
if port is not None:
url += ":" + str(port)
if path is not None:
url += path
if query is not None:
url += "?" + query
if fragment is not None:
url += "#" + fragment
return url
def __str__(self) -> str:
return self.url
@typing.overload
def _encode_invalid_chars(
component: str, allowed_chars: typing.Container[str]
) -> str: # Abstract
...
@typing.overload
def _encode_invalid_chars(
component: None, allowed_chars: typing.Container[str]
) -> None: # Abstract
...
def _encode_invalid_chars(
component: str | None, allowed_chars: typing.Container[str]
) -> str | None:
"""Percent-encodes a URI component without reapplying
onto an already percent-encoded component.
"""
if component is None:
return component
component = to_str(component)
# Normalize existing percent-encoded bytes.
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
component, percent_encodings = _PERCENT_RE.subn(
lambda match: match.group(0).upper(), component
)
uri_bytes = component.encode("utf-8", "surrogatepass")
is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
encoded_component = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring
byte = uri_bytes[i : i + 1]
byte_ord = ord(byte)
if (is_percent_encoded and byte == b"%") or (
byte_ord < 128 and byte.decode() in allowed_chars
):
encoded_component += byte
continue
encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
return encoded_component.decode()
def _remove_path_dot_segments(path: str) -> str:
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = path.split("/") # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == ".":
continue
# Anything other than '..', should be appended to the output
if segment != "..":
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if path.startswith("/") and (not output or output[0]):
output.insert(0, "")
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if path.endswith(("/.", "/..")):
output.append("")
return "/".join(output)
@typing.overload
def _normalize_host(host: None, scheme: str | None) -> None: ...
@typing.overload
def _normalize_host(host: str, scheme: str | None) -> str: ...
def _normalize_host(host: str | None, scheme: str | None) -> str | None:
if host:
if scheme in _NORMALIZABLE_SCHEMES:
is_ipv6 = _IPV6_ADDRZ_RE.match(host)
if is_ipv6:
# IPv6 hosts of the form 'a::b%zone' are encoded in a URL as
# such per RFC 6874: 'a::b%25zone'. Unquote the ZoneID
# separator as necessary to return a valid RFC 4007 scoped IP.
match = _ZONE_ID_RE.search(host)
if match:
start, end = match.span(1)
zone_id = host[start:end]
if zone_id.startswith("%25") and zone_id != "%25":
zone_id = zone_id[3:]
else:
zone_id = zone_id[1:]
zone_id = _encode_invalid_chars(zone_id, _UNRESERVED_CHARS)
return f"{host[:start].lower()}%{zone_id}{host[end:]}"
else:
return host.lower()
elif not _IPV4_RE.match(host):
return to_str(
b".".join([_idna_encode(label) for label in host.split(".")]),
"ascii",
)
return host
def _idna_encode(name: str) -> bytes:
if not name.isascii():
try:
import idna
except ImportError:
raise LocationParseError(
"Unable to parse URL without the 'idna' module"
) from None
try:
return idna.encode(name.lower(), strict=True, std3_rules=True)
except idna.IDNAError:
raise LocationParseError(
f"Name '{name}' is not a valid IDNA label"
) from None
return name.lower().encode("ascii")
def _encode_target(target: str) -> str:
"""Percent-encodes a request target so that there are no invalid characters
Pre-condition for this function is that 'target' must start with '/'.
If that is the case then _TARGET_RE will always produce a match.
"""
match = _TARGET_RE.match(target)
if not match: # Defensive:
raise LocationParseError(f"{target!r} is not a valid request URI")
path, query = match.groups()
encoded_target = _encode_invalid_chars(path, _PATH_CHARS)
if query is not None:
query = _encode_invalid_chars(query, _QUERY_CHARS)
encoded_target += "?" + query
return encoded_target
def parse_url(url: str) -> Url:
"""
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
performed to parse incomplete urls. Fields not provided will be None.
This parser is RFC 3986 and RFC 6874 compliant.
The parser logic and helper functions are based heavily on
work done in the ``rfc3986`` module.
:param str url: URL to parse into a :class:`.Url` namedtuple.
Partly backwards-compatible with :mod:`urllib.parse`.
Example:
.. code-block:: python
import urllib3
print( urllib3.util.parse_url('http://google.com/mail/'))
# Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
print( urllib3.util.parse_url('google.com:80'))
# Url(scheme=None, host='google.com', port=80, path=None, ...)
print( urllib3.util.parse_url('/foo?bar'))
# Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
"""
if not url:
# Empty
return Url()
source_url = url
if not _SCHEME_RE.search(url):
url = "//" + url
scheme: str | None
authority: str | None
auth: str | None
host: str | None
port: str | None
port_int: int | None
path: str | None
query: str | None
fragment: str | None
try:
scheme, authority, path, query, fragment = _URI_RE.match(url).groups() # type: ignore[union-attr]
normalize_uri = scheme is None or scheme.lower() in _NORMALIZABLE_SCHEMES
if scheme:
scheme = scheme.lower()
if authority:
auth, _, host_port = authority.rpartition("@")
auth = auth or None
host, port = _HOST_PORT_RE.match(host_port).groups() # type: ignore[union-attr]
if auth and normalize_uri:
auth = _encode_invalid_chars(auth, _USERINFO_CHARS)
if port == "":
port = None
else:
auth, host, port = None, None, None
if port is not None:
port_int = int(port)
if not (0 <= port_int <= 65535):
raise LocationParseError(url)
else:
port_int = None
host = _normalize_host(host, scheme)
if normalize_uri and path:
path = _remove_path_dot_segments(path)
path = _encode_invalid_chars(path, _PATH_CHARS)
if normalize_uri and query:
query = _encode_invalid_chars(query, _QUERY_CHARS)
if normalize_uri and fragment:
fragment = _encode_invalid_chars(fragment, _FRAGMENT_CHARS)
except (ValueError, AttributeError) as e:
raise LocationParseError(source_url) from e
# For the sake of backwards compatibility we put empty
# string values for path if there are any defined values
# beyond the path in the URL.
# TODO: Remove this when we break backwards compatibility.
if not path:
if query is not None or fragment is not None:
path = ""
else:
path = None
return Url(
scheme=scheme,
auth=auth,
host=host,
port=port_int,
path=path,
query=query,
fragment=fragment,
)

View File

@ -0,0 +1,42 @@
from __future__ import annotations
import typing
from types import TracebackType
def to_bytes(
x: str | bytes, encoding: str | None = None, errors: str | None = None
) -> bytes:
if isinstance(x, bytes):
return x
elif not isinstance(x, str):
raise TypeError(f"not expecting type {type(x).__name__}")
if encoding or errors:
return x.encode(encoding or "utf-8", errors=errors or "strict")
return x.encode()
def to_str(
x: str | bytes, encoding: str | None = None, errors: str | None = None
) -> str:
if isinstance(x, str):
return x
elif not isinstance(x, bytes):
raise TypeError(f"not expecting type {type(x).__name__}")
if encoding or errors:
return x.decode(encoding or "utf-8", errors=errors or "strict")
return x.decode()
def reraise(
tp: type[BaseException] | None,
value: BaseException,
tb: TracebackType | None = None,
) -> typing.NoReturn:
try:
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
finally:
value = None # type: ignore[assignment]
tb = None

View File

@ -0,0 +1,124 @@
from __future__ import annotations
import select
import socket
from functools import partial
__all__ = ["wait_for_read", "wait_for_write"]
# How should we wait on sockets?
#
# There are two types of APIs you can use for waiting on sockets: the fancy
# modern stateful APIs like epoll/kqueue, and the older stateless APIs like
# select/poll. The stateful APIs are more efficient when you have a lots of
# sockets to keep track of, because you can set them up once and then use them
# lots of times. But we only ever want to wait on a single socket at a time
# and don't want to keep track of state, so the stateless APIs are actually
# more efficient. So we want to use select() or poll().
#
# Now, how do we choose between select() and poll()? On traditional Unixes,
# select() has a strange calling convention that makes it slow, or fail
# altogether, for high-numbered file descriptors. The point of poll() is to fix
# that, so on Unixes, we prefer poll().
#
# On Windows, there is no poll() (or at least Python doesn't provide a wrapper
# for it), but that's OK, because on Windows, select() doesn't have this
# strange calling convention; plain select() works fine.
#
# So: on Windows we use select(), and everywhere else we use poll(). We also
# fall back to select() in case poll() is somehow broken or missing.
def select_wait_for_socket(
sock: socket.socket,
read: bool = False,
write: bool = False,
timeout: float | None = None,
) -> bool:
if not read and not write:
raise RuntimeError("must specify at least one of read=True, write=True")
rcheck = []
wcheck = []
if read:
rcheck.append(sock)
if write:
wcheck.append(sock)
# When doing a non-blocking connect, most systems signal success by
# marking the socket writable. Windows, though, signals success by marked
# it as "exceptional". We paper over the difference by checking the write
# sockets for both conditions. (The stdlib selectors module does the same
# thing.)
fn = partial(select.select, rcheck, wcheck, wcheck)
rready, wready, xready = fn(timeout)
return bool(rready or wready or xready)
def poll_wait_for_socket(
sock: socket.socket,
read: bool = False,
write: bool = False,
timeout: float | None = None,
) -> bool:
if not read and not write:
raise RuntimeError("must specify at least one of read=True, write=True")
mask = 0
if read:
mask |= select.POLLIN
if write:
mask |= select.POLLOUT
poll_obj = select.poll()
poll_obj.register(sock, mask)
# For some reason, poll() takes timeout in milliseconds
def do_poll(t: float | None) -> list[tuple[int, int]]:
if t is not None:
t *= 1000
return poll_obj.poll(t)
return bool(do_poll(timeout))
def _have_working_poll() -> bool:
# Apparently some systems have a select.poll that fails as soon as you try
# to use it, either due to strange configuration or broken monkeypatching
# from libraries like eventlet/greenlet.
try:
poll_obj = select.poll()
poll_obj.poll(0)
except (AttributeError, OSError):
return False
else:
return True
def wait_for_socket(
sock: socket.socket,
read: bool = False,
write: bool = False,
timeout: float | None = None,
) -> bool:
# We delay choosing which implementation to use until the first time we're
# called. We could do it at import time, but then we might make the wrong
# decision if someone goes wild with monkeypatching select.poll after
# we're imported.
global wait_for_socket
if _have_working_poll():
wait_for_socket = poll_wait_for_socket
elif hasattr(select, "select"):
wait_for_socket = select_wait_for_socket
return wait_for_socket(sock, read, write, timeout)
def wait_for_read(sock: socket.socket, timeout: float | None = None) -> bool:
"""Waits for reading to be available on a given socket.
Returns True if the socket is readable, or False if the timeout expired.
"""
return wait_for_socket(sock, read=True, timeout=timeout)
def wait_for_write(sock: socket.socket, timeout: float | None = None) -> bool:
"""Waits for writing to be available on a given socket.
Returns True if the socket is readable, or False if the timeout expired.
"""
return wait_for_socket(sock, write=True, timeout=timeout)