Update 2025-04-24_11:44:19

This commit is contained in:
oib
2025-04-24 11:44:23 +02:00
commit e748c737f4
3408 changed files with 717481 additions and 0 deletions

View File

@ -0,0 +1,16 @@
from __future__ import annotations
import urllib3.connection
from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection
def inject_into_urllib3() -> None:
# override connection classes to use emscripten specific classes
# n.b. mypy complains about the overriding of classes below
# if it isn't ignored
HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection
HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection
urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment]
urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment]

View File

@ -0,0 +1,255 @@
from __future__ import annotations
import os
import typing
# use http.client.HTTPException for consistency with non-emscripten
from http.client import HTTPException as HTTPException # noqa: F401
from http.client import ResponseNotReady
from ..._base_connection import _TYPE_BODY
from ...connection import HTTPConnection, ProxyConfig, port_by_scheme
from ...exceptions import TimeoutError
from ...response import BaseHTTPResponse
from ...util.connection import _TYPE_SOCKET_OPTIONS
from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
from ...util.url import Url
from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request
from .request import EmscriptenRequest
from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse
if typing.TYPE_CHECKING:
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
class EmscriptenHTTPConnection:
default_port: typing.ClassVar[int] = port_by_scheme["http"]
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
timeout: None | (float)
host: str
port: int
blocksize: int
source_address: tuple[str, int] | None
socket_options: _TYPE_SOCKET_OPTIONS | None
proxy: Url | None
proxy_config: ProxyConfig | None
is_verified: bool = False
proxy_is_verified: bool | None = None
_response: EmscriptenResponse | None
def __init__(
self,
host: str,
port: int = 0,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 8192,
socket_options: _TYPE_SOCKET_OPTIONS | None = None,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
) -> None:
self.host = host
self.port = port
self.timeout = timeout if isinstance(timeout, float) else 0.0
self.scheme = "http"
self._closed = True
self._response = None
# ignore these things because we don't
# have control over that stuff
self.proxy = None
self.proxy_config = None
self.blocksize = blocksize
self.source_address = None
self.socket_options = None
self.is_verified = False
def set_tunnel(
self,
host: str,
port: int | None = 0,
headers: typing.Mapping[str, str] | None = None,
scheme: str = "http",
) -> None:
pass
def connect(self) -> None:
pass
def request(
self,
method: str,
url: str,
body: _TYPE_BODY | None = None,
headers: typing.Mapping[str, str] | None = None,
# We know *at least* botocore is depending on the order of the
# first 3 parameters so to be safe we only mark the later ones
# as keyword-only to ensure we have space to extend.
*,
chunked: bool = False,
preload_content: bool = True,
decode_content: bool = True,
enforce_content_length: bool = True,
) -> None:
self._closed = False
if url.startswith("/"):
# no scheme / host / port included, make a full url
url = f"{self.scheme}://{self.host}:{self.port}" + url
request = EmscriptenRequest(
url=url,
method=method,
timeout=self.timeout if self.timeout else 0,
decode_content=decode_content,
)
request.set_body(body)
if headers:
for k, v in headers.items():
request.set_header(k, v)
self._response = None
try:
if not preload_content:
self._response = send_streaming_request(request)
if self._response is None:
self._response = send_request(request)
except _TimeoutError as e:
raise TimeoutError(e.message) from e
except _RequestError as e:
raise HTTPException(e.message) from e
def getresponse(self) -> BaseHTTPResponse:
if self._response is not None:
return EmscriptenHttpResponseWrapper(
internal_response=self._response,
url=self._response.request.url,
connection=self,
)
else:
raise ResponseNotReady()
def close(self) -> None:
self._closed = True
self._response = None
@property
def is_closed(self) -> bool:
"""Whether the connection either is brand new or has been previously closed.
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
properties must be False.
"""
return self._closed
@property
def is_connected(self) -> bool:
"""Whether the connection is actively connected to any origin (proxy or target)"""
return True
@property
def has_connected_to_proxy(self) -> bool:
"""Whether the connection has successfully connected to its proxy.
This returns False if no proxy is in use. Used to determine whether
errors are coming from the proxy layer or from tunnelling to the target origin.
"""
return False
class EmscriptenHTTPSConnection(EmscriptenHTTPConnection):
default_port = port_by_scheme["https"]
# all this is basically ignored, as browser handles https
cert_reqs: int | str | None = None
ca_certs: str | None = None
ca_cert_dir: str | None = None
ca_cert_data: None | str | bytes = None
cert_file: str | None
key_file: str | None
key_password: str | None
ssl_context: typing.Any | None
ssl_version: int | str | None = None
ssl_minimum_version: int | None = None
ssl_maximum_version: int | None = None
assert_hostname: None | str | typing.Literal[False]
assert_fingerprint: str | None = None
def __init__(
self,
host: str,
port: int = 0,
*,
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
source_address: tuple[str, int] | None = None,
blocksize: int = 16384,
socket_options: (
None | _TYPE_SOCKET_OPTIONS
) = HTTPConnection.default_socket_options,
proxy: Url | None = None,
proxy_config: ProxyConfig | None = None,
cert_reqs: int | str | None = None,
assert_hostname: None | str | typing.Literal[False] = None,
assert_fingerprint: str | None = None,
server_hostname: str | None = None,
ssl_context: typing.Any | None = None,
ca_certs: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
ssl_minimum_version: int | None = None,
ssl_maximum_version: int | None = None,
ssl_version: int | str | None = None, # Deprecated
cert_file: str | None = None,
key_file: str | None = None,
key_password: str | None = None,
) -> None:
super().__init__(
host,
port=port,
timeout=timeout,
source_address=source_address,
blocksize=blocksize,
socket_options=socket_options,
proxy=proxy,
proxy_config=proxy_config,
)
self.scheme = "https"
self.key_file = key_file
self.cert_file = cert_file
self.key_password = key_password
self.ssl_context = ssl_context
self.server_hostname = server_hostname
self.assert_hostname = assert_hostname
self.assert_fingerprint = assert_fingerprint
self.ssl_version = ssl_version
self.ssl_minimum_version = ssl_minimum_version
self.ssl_maximum_version = ssl_maximum_version
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
self.ca_cert_data = ca_cert_data
self.cert_reqs = None
# The browser will automatically verify all requests.
# We have no control over that setting.
self.is_verified = True
def set_cert(
self,
key_file: str | None = None,
cert_file: str | None = None,
cert_reqs: int | str | None = None,
key_password: str | None = None,
ca_certs: str | None = None,
assert_hostname: None | str | typing.Literal[False] = None,
assert_fingerprint: str | None = None,
ca_cert_dir: str | None = None,
ca_cert_data: None | str | bytes = None,
) -> None:
pass
# verify that this class implements BaseHTTP(s) connection correctly
if typing.TYPE_CHECKING:
_supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0)
_supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0)

View File

@ -0,0 +1,110 @@
let Status = {
SUCCESS_HEADER: -1,
SUCCESS_EOF: -2,
ERROR_TIMEOUT: -3,
ERROR_EXCEPTION: -4,
};
let connections = {};
let nextConnectionID = 1;
const encoder = new TextEncoder();
self.addEventListener("message", async function (event) {
if (event.data.close) {
let connectionID = event.data.close;
delete connections[connectionID];
return;
} else if (event.data.getMore) {
let connectionID = event.data.getMore;
let { curOffset, value, reader, intBuffer, byteBuffer } =
connections[connectionID];
// if we still have some in buffer, then just send it back straight away
if (!value || curOffset >= value.length) {
// read another buffer if required
try {
let readResponse = await reader.read();
if (readResponse.done) {
// read everything - clear connection and return
delete connections[connectionID];
Atomics.store(intBuffer, 0, Status.SUCCESS_EOF);
Atomics.notify(intBuffer, 0);
// finished reading successfully
// return from event handler
return;
}
curOffset = 0;
connections[connectionID].value = readResponse.value;
value = readResponse.value;
} catch (error) {
console.log("Request exception:", error);
let errorBytes = encoder.encode(error.message);
let written = errorBytes.length;
byteBuffer.set(errorBytes);
intBuffer[1] = written;
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
Atomics.notify(intBuffer, 0);
}
}
// send as much buffer as we can
let curLen = value.length - curOffset;
if (curLen > byteBuffer.length) {
curLen = byteBuffer.length;
}
byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0);
Atomics.store(intBuffer, 0, curLen); // store current length in bytes
Atomics.notify(intBuffer, 0);
curOffset += curLen;
connections[connectionID].curOffset = curOffset;
return;
} else {
// start fetch
let connectionID = nextConnectionID;
nextConnectionID += 1;
const intBuffer = new Int32Array(event.data.buffer);
const byteBuffer = new Uint8Array(event.data.buffer, 8);
try {
const response = await fetch(event.data.url, event.data.fetchParams);
// return the headers first via textencoder
var headers = [];
for (const pair of response.headers.entries()) {
headers.push([pair[0], pair[1]]);
}
let headerObj = {
headers: headers,
status: response.status,
connectionID,
};
const headerText = JSON.stringify(headerObj);
let headerBytes = encoder.encode(headerText);
let written = headerBytes.length;
byteBuffer.set(headerBytes);
intBuffer[1] = written;
// make a connection
connections[connectionID] = {
reader: response.body.getReader(),
intBuffer: intBuffer,
byteBuffer: byteBuffer,
value: undefined,
curOffset: 0,
};
// set header ready
Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER);
Atomics.notify(intBuffer, 0);
// all fetching after this goes through a new postmessage call with getMore
// this allows for parallel requests
} catch (error) {
console.log("Request exception:", error);
let errorBytes = encoder.encode(error.message);
let written = errorBytes.length;
byteBuffer.set(errorBytes);
intBuffer[1] = written;
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
Atomics.notify(intBuffer, 0);
}
}
});
self.postMessage({ inited: true });

View File

@ -0,0 +1,708 @@
"""
Support for streaming http requests in emscripten.
A few caveats -
If your browser (or Node.js) has WebAssembly JavaScript Promise Integration enabled
https://github.com/WebAssembly/js-promise-integration/blob/main/proposals/js-promise-integration/Overview.md
*and* you launch pyodide using `pyodide.runPythonAsync`, this will fetch data using the
JavaScript asynchronous fetch api (wrapped via `pyodide.ffi.call_sync`). In this case
timeouts and streaming should just work.
Otherwise, it uses a combination of XMLHttpRequest and a web-worker for streaming.
This approach has several caveats:
Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed.
Streaming only works if you're running pyodide in a web worker.
Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch
operation, so it requires that you have crossOriginIsolation enabled, by serving over https
(or from localhost) with the two headers below set:
Cross-Origin-Opener-Policy: same-origin
Cross-Origin-Embedder-Policy: require-corp
You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in
JavaScript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole
request into a buffer and then returning it. it shows a warning in the JavaScript console in this case.
Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once
control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch.
NB: in this code, there are a lot of JavaScript objects. They are named js_*
to make it clear what type of object they are.
"""
from __future__ import annotations
import io
import json
from email.parser import Parser
from importlib.resources import files
from typing import TYPE_CHECKING, Any
import js # type: ignore[import-not-found]
from pyodide.ffi import ( # type: ignore[import-not-found]
JsArray,
JsException,
JsProxy,
to_js,
)
if TYPE_CHECKING:
from typing_extensions import Buffer
from .request import EmscriptenRequest
from .response import EmscriptenResponse
"""
There are some headers that trigger unintended CORS preflight requests.
See also https://github.com/koenvo/pyodide-http/issues/22
"""
HEADERS_TO_IGNORE = ("user-agent",)
SUCCESS_HEADER = -1
SUCCESS_EOF = -2
ERROR_TIMEOUT = -3
ERROR_EXCEPTION = -4
_STREAMING_WORKER_CODE = (
files(__package__)
.joinpath("emscripten_fetch_worker.js")
.read_text(encoding="utf-8")
)
class _RequestError(Exception):
def __init__(
self,
message: str | None = None,
*,
request: EmscriptenRequest | None = None,
response: EmscriptenResponse | None = None,
):
self.request = request
self.response = response
self.message = message
super().__init__(self.message)
class _StreamingError(_RequestError):
pass
class _TimeoutError(_RequestError):
pass
def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy:
return to_js(dict_val, dict_converter=js.Object.fromEntries)
class _ReadStream(io.RawIOBase):
def __init__(
self,
int_buffer: JsArray,
byte_buffer: JsArray,
timeout: float,
worker: JsProxy,
connection_id: int,
request: EmscriptenRequest,
):
self.int_buffer = int_buffer
self.byte_buffer = byte_buffer
self.read_pos = 0
self.read_len = 0
self.connection_id = connection_id
self.worker = worker
self.timeout = int(1000 * timeout) if timeout > 0 else None
self.is_live = True
self._is_closed = False
self.request: EmscriptenRequest | None = request
def __del__(self) -> None:
self.close()
# this is compatible with _base_connection
def is_closed(self) -> bool:
return self._is_closed
# for compatibility with RawIOBase
@property
def closed(self) -> bool:
return self.is_closed()
def close(self) -> None:
if self.is_closed():
return
self.read_len = 0
self.read_pos = 0
self.int_buffer = None
self.byte_buffer = None
self._is_closed = True
self.request = None
if self.is_live:
self.worker.postMessage(_obj_from_dict({"close": self.connection_id}))
self.is_live = False
super().close()
def readable(self) -> bool:
return True
def writable(self) -> bool:
return False
def seekable(self) -> bool:
return False
def readinto(self, byte_obj: Buffer) -> int:
if not self.int_buffer:
raise _StreamingError(
"No buffer for stream in _ReadStream.readinto",
request=self.request,
response=None,
)
if self.read_len == 0:
# wait for the worker to send something
js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT)
self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id}))
if (
js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout)
== "timed-out"
):
raise _TimeoutError
data_len = self.int_buffer[0]
if data_len > 0:
self.read_len = data_len
self.read_pos = 0
elif data_len == ERROR_EXCEPTION:
string_len = self.int_buffer[1]
# decode the error string
js_decoder = js.TextDecoder.new()
json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len))
raise _StreamingError(
f"Exception thrown in fetch: {json_str}",
request=self.request,
response=None,
)
else:
# EOF, free the buffers and return zero
# and free the request
self.is_live = False
self.close()
return 0
# copy from int32array to python bytes
ret_length = min(self.read_len, len(memoryview(byte_obj)))
subarray = self.byte_buffer.subarray(
self.read_pos, self.read_pos + ret_length
).to_py()
memoryview(byte_obj)[0:ret_length] = subarray
self.read_len -= ret_length
self.read_pos += ret_length
return ret_length
class _StreamingFetcher:
def __init__(self) -> None:
# make web-worker and data buffer on startup
self.streaming_ready = False
js_data_blob = js.Blob.new(
to_js([_STREAMING_WORKER_CODE], create_pyproxies=False),
_obj_from_dict({"type": "application/javascript"}),
)
def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None:
def onMsg(e: JsProxy) -> None:
self.streaming_ready = True
js_resolve_fn(e)
def onErr(e: JsProxy) -> None:
js_reject_fn(e) # Defensive: never happens in ci
self.js_worker.onmessage = onMsg
self.js_worker.onerror = onErr
js_data_url = js.URL.createObjectURL(js_data_blob)
self.js_worker = js.globalThis.Worker.new(js_data_url)
self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver)
def send(self, request: EmscriptenRequest) -> EmscriptenResponse:
headers = {
k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE
}
body = request.body
fetch_data = {"headers": headers, "body": to_js(body), "method": request.method}
# start the request off in the worker
timeout = int(1000 * request.timeout) if request.timeout > 0 else None
js_shared_buffer = js.SharedArrayBuffer.new(1048576)
js_int_buffer = js.Int32Array.new(js_shared_buffer)
js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8)
js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT)
js.Atomics.notify(js_int_buffer, 0)
js_absolute_url = js.URL.new(request.url, js.location).href
self.js_worker.postMessage(
_obj_from_dict(
{
"buffer": js_shared_buffer,
"url": js_absolute_url,
"fetchParams": fetch_data,
}
)
)
# wait for the worker to send something
js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout)
if js_int_buffer[0] == ERROR_TIMEOUT:
raise _TimeoutError(
"Timeout connecting to streaming request",
request=request,
response=None,
)
elif js_int_buffer[0] == SUCCESS_HEADER:
# got response
# header length is in second int of intBuffer
string_len = js_int_buffer[1]
# decode the rest to a JSON string
js_decoder = js.TextDecoder.new()
# this does a copy (the slice) because decode can't work on shared array
# for some silly reason
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
# get it as an object
response_obj = json.loads(json_str)
return EmscriptenResponse(
request=request,
status_code=response_obj["status"],
headers=response_obj["headers"],
body=_ReadStream(
js_int_buffer,
js_byte_buffer,
request.timeout,
self.js_worker,
response_obj["connectionID"],
request,
),
)
elif js_int_buffer[0] == ERROR_EXCEPTION:
string_len = js_int_buffer[1]
# decode the error string
js_decoder = js.TextDecoder.new()
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
raise _StreamingError(
f"Exception thrown in fetch: {json_str}", request=request, response=None
)
else:
raise _StreamingError(
f"Unknown status from worker in fetch: {js_int_buffer[0]}",
request=request,
response=None,
)
class _JSPIReadStream(io.RawIOBase):
"""
A read stream that uses pyodide.ffi.run_sync to read from a JavaScript fetch
response. This requires support for WebAssembly JavaScript Promise Integration
in the containing browser, and for pyodide to be launched via runPythonAsync.
:param js_read_stream:
The JavaScript stream reader
:param timeout:
Timeout in seconds
:param request:
The request we're handling
:param response:
The response this stream relates to
:param js_abort_controller:
A JavaScript AbortController object, used for timeouts
"""
def __init__(
self,
js_read_stream: Any,
timeout: float,
request: EmscriptenRequest,
response: EmscriptenResponse,
js_abort_controller: Any, # JavaScript AbortController for timeouts
):
self.js_read_stream = js_read_stream
self.timeout = timeout
self._is_closed = False
self._is_done = False
self.request: EmscriptenRequest | None = request
self.response: EmscriptenResponse | None = response
self.current_buffer = None
self.current_buffer_pos = 0
self.js_abort_controller = js_abort_controller
def __del__(self) -> None:
self.close()
# this is compatible with _base_connection
def is_closed(self) -> bool:
return self._is_closed
# for compatibility with RawIOBase
@property
def closed(self) -> bool:
return self.is_closed()
def close(self) -> None:
if self.is_closed():
return
self.read_len = 0
self.read_pos = 0
self.js_read_stream.cancel()
self.js_read_stream = None
self._is_closed = True
self._is_done = True
self.request = None
self.response = None
super().close()
def readable(self) -> bool:
return True
def writable(self) -> bool:
return False
def seekable(self) -> bool:
return False
def _get_next_buffer(self) -> bool:
result_js = _run_sync_with_timeout(
self.js_read_stream.read(),
self.timeout,
self.js_abort_controller,
request=self.request,
response=self.response,
)
if result_js.done:
self._is_done = True
return False
else:
self.current_buffer = result_js.value.to_py()
self.current_buffer_pos = 0
return True
def readinto(self, byte_obj: Buffer) -> int:
if self.current_buffer is None:
if not self._get_next_buffer() or self.current_buffer is None:
self.close()
return 0
ret_length = min(
len(byte_obj), len(self.current_buffer) - self.current_buffer_pos
)
byte_obj[0:ret_length] = self.current_buffer[
self.current_buffer_pos : self.current_buffer_pos + ret_length
]
self.current_buffer_pos += ret_length
if self.current_buffer_pos == len(self.current_buffer):
self.current_buffer = None
return ret_length
# check if we are in a worker or not
def is_in_browser_main_thread() -> bool:
return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window
def is_cross_origin_isolated() -> bool:
return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated
def is_in_node() -> bool:
return (
hasattr(js, "process")
and hasattr(js.process, "release")
and hasattr(js.process.release, "name")
and js.process.release.name == "node"
)
def is_worker_available() -> bool:
return hasattr(js, "Worker") and hasattr(js, "Blob")
_fetcher: _StreamingFetcher | None = None
if is_worker_available() and (
(is_cross_origin_isolated() and not is_in_browser_main_thread())
and (not is_in_node())
):
_fetcher = _StreamingFetcher()
else:
_fetcher = None
NODE_JSPI_ERROR = (
"urllib3 only works in Node.js with pyodide.runPythonAsync"
" and requires the flag --experimental-wasm-stack-switching in "
" versions of node <24."
)
def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None:
if has_jspi():
return send_jspi_request(request, True)
elif is_in_node():
raise _RequestError(
message=NODE_JSPI_ERROR,
request=request,
response=None,
)
if _fetcher and streaming_ready():
return _fetcher.send(request)
else:
_show_streaming_warning()
return None
_SHOWN_TIMEOUT_WARNING = False
def _show_timeout_warning() -> None:
global _SHOWN_TIMEOUT_WARNING
if not _SHOWN_TIMEOUT_WARNING:
_SHOWN_TIMEOUT_WARNING = True
message = "Warning: Timeout is not available on main browser thread"
js.console.warn(message)
_SHOWN_STREAMING_WARNING = False
def _show_streaming_warning() -> None:
global _SHOWN_STREAMING_WARNING
if not _SHOWN_STREAMING_WARNING:
_SHOWN_STREAMING_WARNING = True
message = "Can't stream HTTP requests because: \n"
if not is_cross_origin_isolated():
message += " Page is not cross-origin isolated\n"
if is_in_browser_main_thread():
message += " Python is running in main browser thread\n"
if not is_worker_available():
message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in
if streaming_ready() is False:
message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch
is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`"""
from js import console
console.warn(message)
def send_request(request: EmscriptenRequest) -> EmscriptenResponse:
if has_jspi():
return send_jspi_request(request, False)
elif is_in_node():
raise _RequestError(
message=NODE_JSPI_ERROR,
request=request,
response=None,
)
try:
js_xhr = js.XMLHttpRequest.new()
if not is_in_browser_main_thread():
js_xhr.responseType = "arraybuffer"
if request.timeout:
js_xhr.timeout = int(request.timeout * 1000)
else:
js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15")
if request.timeout:
# timeout isn't available on the main thread - show a warning in console
# if it is set
_show_timeout_warning()
js_xhr.open(request.method, request.url, False)
for name, value in request.headers.items():
if name.lower() not in HEADERS_TO_IGNORE:
js_xhr.setRequestHeader(name, value)
js_xhr.send(to_js(request.body))
headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders()))
if not is_in_browser_main_thread():
body = js_xhr.response.to_py().tobytes()
else:
body = js_xhr.response.encode("ISO-8859-15")
return EmscriptenResponse(
status_code=js_xhr.status, headers=headers, body=body, request=request
)
except JsException as err:
if err.name == "TimeoutError":
raise _TimeoutError(err.message, request=request)
elif err.name == "NetworkError":
raise _RequestError(err.message, request=request)
else:
# general http error
raise _RequestError(err.message, request=request)
def send_jspi_request(
request: EmscriptenRequest, streaming: bool
) -> EmscriptenResponse:
"""
Send a request using WebAssembly JavaScript Promise Integration
to wrap the asynchronous JavaScript fetch api (experimental).
:param request:
Request to send
:param streaming:
Whether to stream the response
:return: The response object
:rtype: EmscriptenResponse
"""
timeout = request.timeout
js_abort_controller = js.AbortController.new()
headers = {k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE}
req_body = request.body
fetch_data = {
"headers": headers,
"body": to_js(req_body),
"method": request.method,
"signal": js_abort_controller.signal,
}
# Call JavaScript fetch (async api, returns a promise)
fetcher_promise_js = js.fetch(request.url, _obj_from_dict(fetch_data))
# Now suspend WebAssembly until we resolve that promise
# or time out.
response_js = _run_sync_with_timeout(
fetcher_promise_js,
timeout,
js_abort_controller,
request=request,
response=None,
)
headers = {}
header_iter = response_js.headers.entries()
while True:
iter_value_js = header_iter.next()
if getattr(iter_value_js, "done", False):
break
else:
headers[str(iter_value_js.value[0])] = str(iter_value_js.value[1])
status_code = response_js.status
body: bytes | io.RawIOBase = b""
response = EmscriptenResponse(
status_code=status_code, headers=headers, body=b"", request=request
)
if streaming:
# get via inputstream
if response_js.body is not None:
# get a reader from the fetch response
body_stream_js = response_js.body.getReader()
body = _JSPIReadStream(
body_stream_js, timeout, request, response, js_abort_controller
)
else:
# get directly via arraybuffer
# n.b. this is another async JavaScript call.
body = _run_sync_with_timeout(
response_js.arrayBuffer(),
timeout,
js_abort_controller,
request=request,
response=response,
).to_py()
response.body = body
return response
def _run_sync_with_timeout(
promise: Any,
timeout: float,
js_abort_controller: Any,
request: EmscriptenRequest | None,
response: EmscriptenResponse | None,
) -> Any:
"""
Await a JavaScript promise synchronously with a timeout which is implemented
via the AbortController
:param promise:
Javascript promise to await
:param timeout:
Timeout in seconds
:param js_abort_controller:
A JavaScript AbortController object, used on timeout
:param request:
The request being handled
:param response:
The response being handled (if it exists yet)
:raises _TimeoutError: If the request times out
:raises _RequestError: If the request raises a JavaScript exception
:return: The result of awaiting the promise.
"""
timer_id = None
if timeout > 0:
timer_id = js.setTimeout(
js_abort_controller.abort.bind(js_abort_controller), int(timeout * 1000)
)
try:
from pyodide.ffi import run_sync
# run_sync here uses WebAssembly JavaScript Promise Integration to
# suspend python until the JavaScript promise resolves.
return run_sync(promise)
except JsException as err:
if err.name == "AbortError":
raise _TimeoutError(
message="Request timed out", request=request, response=response
)
else:
raise _RequestError(message=err.message, request=request, response=response)
finally:
if timer_id is not None:
js.clearTimeout(timer_id)
def has_jspi() -> bool:
"""
Return true if jspi can be used.
This requires both browser support and also WebAssembly
to be in the correct state - i.e. that the javascript
call into python was async not sync.
:return: True if jspi can be used.
:rtype: bool
"""
try:
from pyodide.ffi import can_run_sync, run_sync # noqa: F401
return bool(can_run_sync())
except ImportError:
return False
def streaming_ready() -> bool | None:
if _fetcher:
return _fetcher.streaming_ready
else:
return None # no fetcher, return None to signify that
async def wait_for_streaming_ready() -> bool:
if _fetcher:
await _fetcher.js_worker_ready_promise
return True
else:
return False

View File

@ -0,0 +1,22 @@
from __future__ import annotations
from dataclasses import dataclass, field
from ..._base_connection import _TYPE_BODY
@dataclass
class EmscriptenRequest:
method: str
url: str
params: dict[str, str] | None = None
body: _TYPE_BODY | None = None
headers: dict[str, str] = field(default_factory=dict)
timeout: float = 0
decode_content: bool = True
def set_header(self, name: str, value: str) -> None:
self.headers[name.capitalize()] = value
def set_body(self, body: _TYPE_BODY | None) -> None:
self.body = body

View File

@ -0,0 +1,277 @@
from __future__ import annotations
import json as _json
import logging
import typing
from contextlib import contextmanager
from dataclasses import dataclass
from http.client import HTTPException as HTTPException
from io import BytesIO, IOBase
from ...exceptions import InvalidHeader, TimeoutError
from ...response import BaseHTTPResponse
from ...util.retry import Retry
from .request import EmscriptenRequest
if typing.TYPE_CHECKING:
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
log = logging.getLogger(__name__)
@dataclass
class EmscriptenResponse:
status_code: int
headers: dict[str, str]
body: IOBase | bytes
request: EmscriptenRequest
class EmscriptenHttpResponseWrapper(BaseHTTPResponse):
def __init__(
self,
internal_response: EmscriptenResponse,
url: str | None = None,
connection: BaseHTTPConnection | BaseHTTPSConnection | None = None,
):
self._pool = None # set by pool class
self._body = None
self._response = internal_response
self._url = url
self._connection = connection
self._closed = False
super().__init__(
headers=internal_response.headers,
status=internal_response.status_code,
request_url=url,
version=0,
version_string="HTTP/?",
reason="",
decode_content=True,
)
self.length_remaining = self._init_length(self._response.request.method)
self.length_is_certain = False
@property
def url(self) -> str | None:
return self._url
@url.setter
def url(self, url: str | None) -> None:
self._url = url
@property
def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None:
return self._connection
@property
def retries(self) -> Retry | None:
return self._retries
@retries.setter
def retries(self, retries: Retry | None) -> None:
# Override the request_url if retries has a redirect location.
self._retries = retries
def stream(
self, amt: int | None = 2**16, decode_content: bool | None = None
) -> typing.Generator[bytes]:
"""
A generator wrapper for the read() method. A call will block until
``amt`` bytes have been read from the connection or until the
connection is closed.
:param amt:
How much of the content to read. The generator will return up to
much data per iteration, but may return less. This is particularly
likely when using compressed data. However, the empty string will
never be returned.
:param decode_content:
If True, will attempt to decode the body based on the
'content-encoding' header.
"""
while True:
data = self.read(amt=amt, decode_content=decode_content)
if data:
yield data
else:
break
def _init_length(self, request_method: str | None) -> int | None:
length: int | None
content_length: str | None = self.headers.get("content-length")
if content_length is not None:
try:
# RFC 7230 section 3.3.2 specifies multiple content lengths can
# be sent in a single Content-Length header
# (e.g. Content-Length: 42, 42). This line ensures the values
# are all valid ints and that as long as the `set` length is 1,
# all values are the same. Otherwise, the header is invalid.
lengths = {int(val) for val in content_length.split(",")}
if len(lengths) > 1:
raise InvalidHeader(
"Content-Length contained multiple "
"unmatching values (%s)" % content_length
)
length = lengths.pop()
except ValueError:
length = None
else:
if length < 0:
length = None
else: # if content_length is None
length = None
# Check for responses that shouldn't include a body
if (
self.status in (204, 304)
or 100 <= self.status < 200
or request_method == "HEAD"
):
length = 0
return length
def read(
self,
amt: int | None = None,
decode_content: bool | None = None, # ignored because browser decodes always
cache_content: bool = False,
) -> bytes:
if (
self._closed
or self._response is None
or (isinstance(self._response.body, IOBase) and self._response.body.closed)
):
return b""
with self._error_catcher():
# body has been preloaded as a string by XmlHttpRequest
if not isinstance(self._response.body, IOBase):
self.length_remaining = len(self._response.body)
self.length_is_certain = True
# wrap body in IOStream
self._response.body = BytesIO(self._response.body)
if amt is not None and amt >= 0:
# don't cache partial content
cache_content = False
data = self._response.body.read(amt)
else: # read all we can (and cache it)
data = self._response.body.read()
if cache_content:
self._body = data
if self.length_remaining is not None:
self.length_remaining = max(self.length_remaining - len(data), 0)
if len(data) == 0 or (
self.length_is_certain and self.length_remaining == 0
):
# definitely finished reading, close response stream
self._response.body.close()
return typing.cast(bytes, data)
def read_chunked(
self,
amt: int | None = None,
decode_content: bool | None = None,
) -> typing.Generator[bytes]:
# chunked is handled by browser
while True:
bytes = self.read(amt, decode_content)
if not bytes:
break
yield bytes
def release_conn(self) -> None:
if not self._pool or not self._connection:
return None
self._pool._put_conn(self._connection)
self._connection = None
def drain_conn(self) -> None:
self.close()
@property
def data(self) -> bytes:
if self._body:
return self._body
else:
return self.read(cache_content=True)
def json(self) -> typing.Any:
"""
Deserializes the body of the HTTP response as a Python object.
The body of the HTTP response must be encoded using UTF-8, as per
`RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
your custom decoder instead.
If the body of the HTTP response is not decodable to UTF-8, a
`UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
valid JSON document, a `json.JSONDecodeError` will be raised.
Read more :ref:`here <json_content>`.
:returns: The body of the HTTP response as a Python object.
"""
data = self.data.decode("utf-8")
return _json.loads(data)
def close(self) -> None:
if not self._closed:
if isinstance(self._response.body, IOBase):
self._response.body.close()
if self._connection:
self._connection.close()
self._connection = None
self._closed = True
@contextmanager
def _error_catcher(self) -> typing.Generator[None]:
"""
Catch Emscripten specific exceptions thrown by fetch.py,
instead re-raising urllib3 variants, so that low-level exceptions
are not leaked in the high-level api.
On exit, release the connection back to the pool.
"""
from .fetch import _RequestError, _TimeoutError # avoid circular import
clean_exit = False
try:
yield
# If no exception is thrown, we should avoid cleaning up
# unnecessarily.
clean_exit = True
except _TimeoutError as e:
raise TimeoutError(str(e))
except _RequestError as e:
raise HTTPException(str(e))
finally:
# If we didn't terminate cleanly, we need to throw away our
# connection.
if not clean_exit:
# The response may not be closed but we're not going to use it
# anymore so close it now
if (
isinstance(self._response.body, IOBase)
and not self._response.body.closed
):
self._response.body.close()
# release the connection back to the pool
self.release_conn()
else:
# If we have read everything from the response stream,
# return the connection back to the pool.
if (
isinstance(self._response.body, IOBase)
and self._response.body.closed
):
self.release_conn()

View File

@ -0,0 +1,564 @@
"""
Module for using pyOpenSSL as a TLS backend. This module was relevant before
the standard library ``ssl`` module supported SNI, but now that we've dropped
support for Python 2.7 all relevant Python versions support SNI so
**this module is no longer recommended**.
This needs the following packages installed:
* `pyOpenSSL`_ (tested with 16.0.0)
* `cryptography`_ (minimum 1.3.4, from pyopenssl)
* `idna`_ (minimum 2.0)
However, pyOpenSSL depends on cryptography, so while we use all three directly here we
end up having relatively few packages required.
You can install them with the following command:
.. code-block:: bash
$ python -m pip install pyopenssl cryptography idna
To activate certificate checking, call
:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
before you begin making HTTP requests. This can be done in a ``sitecustomize``
module, or at any other time before your application begins using ``urllib3``,
like this:
.. code-block:: python
try:
import urllib3.contrib.pyopenssl
urllib3.contrib.pyopenssl.inject_into_urllib3()
except ImportError:
pass
.. _pyopenssl: https://www.pyopenssl.org
.. _cryptography: https://cryptography.io
.. _idna: https://github.com/kjd/idna
"""
from __future__ import annotations
import OpenSSL.SSL # type: ignore[import-untyped]
from cryptography import x509
try:
from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined]
except ImportError:
# UnsupportedExtension is gone in cryptography >= 2.1.0
class UnsupportedExtension(Exception): # type: ignore[no-redef]
pass
import logging
import ssl
import typing
from io import BytesIO
from socket import socket as socket_cls
from socket import timeout
from .. import util
if typing.TYPE_CHECKING:
from OpenSSL.crypto import X509 # type: ignore[import-untyped]
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
# Map from urllib3 to PyOpenSSL compatible parameter-values.
_openssl_versions: dict[int, int] = {
util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
}
if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
if hasattr(ssl, "PROTOCOL_TLSv1_2") and hasattr(OpenSSL.SSL, "TLSv1_2_METHOD"):
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
_stdlib_to_openssl_verify = {
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
}
_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()}
# The SSLvX values are the most likely to be missing in the future
# but we check them all just to be sure.
_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr(
OpenSSL.SSL, "OP_NO_SSLv3", 0
)
_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0)
_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0)
_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0)
_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0)
_openssl_to_ssl_minimum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1,
ssl.TLSVersion.TLSv1_3: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
ssl.TLSVersion.MAXIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
),
}
_openssl_to_ssl_maximum_version: dict[int, int] = {
ssl.TLSVersion.MINIMUM_SUPPORTED: (
_OP_NO_SSLv2_OR_SSLv3
| _OP_NO_TLSv1
| _OP_NO_TLSv1_1
| _OP_NO_TLSv1_2
| _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1: (
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3
),
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3,
ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3,
ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
}
# OpenSSL will only write 16K at a time
SSL_WRITE_BLOCKSIZE = 16384
orig_util_SSLContext = util.ssl_.SSLContext
log = logging.getLogger(__name__)
def inject_into_urllib3() -> None:
"Monkey-patch urllib3 with PyOpenSSL-backed SSL-support."
_validate_dependencies_met()
util.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment]
util.IS_PYOPENSSL = True
util.ssl_.IS_PYOPENSSL = True
def extract_from_urllib3() -> None:
"Undo monkey-patching by :func:`inject_into_urllib3`."
util.SSLContext = orig_util_SSLContext
util.ssl_.SSLContext = orig_util_SSLContext
util.IS_PYOPENSSL = False
util.ssl_.IS_PYOPENSSL = False
def _validate_dependencies_met() -> None:
"""
Verifies that PyOpenSSL's package-level dependencies have been met.
Throws `ImportError` if they are not met.
"""
# Method added in `cryptography==1.1`; not available in older versions
from cryptography.x509.extensions import Extensions
if getattr(Extensions, "get_extension_for_class", None) is None:
raise ImportError(
"'cryptography' module missing required functionality. "
"Try upgrading to v1.3.4 or newer."
)
# pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509
# attribute is only present on those versions.
from OpenSSL.crypto import X509
x509 = X509()
if getattr(x509, "_x509", None) is None:
raise ImportError(
"'pyOpenSSL' module missing required functionality. "
"Try upgrading to v0.14 or newer."
)
def _dnsname_to_stdlib(name: str) -> str | None:
"""
Converts a dNSName SubjectAlternativeName field to the form used by the
standard library on the given Python version.
Cryptography produces a dNSName as a unicode string that was idna-decoded
from ASCII bytes. We need to idna-encode that string to get it back, and
then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib
uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8).
If the name cannot be idna-encoded then we return None signalling that
the name given should be skipped.
"""
def idna_encode(name: str) -> bytes | None:
"""
Borrowed wholesale from the Python Cryptography Project. It turns out
that we can't just safely call `idna.encode`: it can explode for
wildcard names. This avoids that problem.
"""
import idna
try:
for prefix in ["*.", "."]:
if name.startswith(prefix):
name = name[len(prefix) :]
return prefix.encode("ascii") + idna.encode(name)
return idna.encode(name)
except idna.core.IDNAError:
return None
# Don't send IPv6 addresses through the IDNA encoder.
if ":" in name:
return name
encoded_name = idna_encode(name)
if encoded_name is None:
return None
return encoded_name.decode("utf-8")
def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]:
"""
Given an PyOpenSSL certificate, provides all the subject alternative names.
"""
cert = peer_cert.to_cryptography()
# We want to find the SAN extension. Ask Cryptography to locate it (it's
# faster than looping in Python)
try:
ext = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value
except x509.ExtensionNotFound:
# No such extension, return the empty list.
return []
except (
x509.DuplicateExtension,
UnsupportedExtension,
x509.UnsupportedGeneralNameType,
UnicodeError,
) as e:
# A problem has been found with the quality of the certificate. Assume
# no SAN field is present.
log.warning(
"A problem was encountered with the certificate that prevented "
"urllib3 from finding the SubjectAlternativeName field. This can "
"affect certificate validation. The error was %s",
e,
)
return []
# We want to return dNSName and iPAddress fields. We need to cast the IPs
# back to strings because the match_hostname function wants them as
# strings.
# Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8
# decoded. This is pretty frustrating, but that's what the standard library
# does with certificates, and so we need to attempt to do the same.
# We also want to skip over names which cannot be idna encoded.
names = [
("DNS", name)
for name in map(_dnsname_to_stdlib, ext.get_values_for_type(x509.DNSName))
if name is not None
]
names.extend(
("IP Address", str(name)) for name in ext.get_values_for_type(x509.IPAddress)
)
return names
class WrappedSocket:
"""API-compatibility wrapper for Python OpenSSL's Connection-class."""
def __init__(
self,
connection: OpenSSL.SSL.Connection,
socket: socket_cls,
suppress_ragged_eofs: bool = True,
) -> None:
self.connection = connection
self.socket = socket
self.suppress_ragged_eofs = suppress_ragged_eofs
self._io_refs = 0
self._closed = False
def fileno(self) -> int:
return self.socket.fileno()
# Copy-pasted from Python 3.5 source code
def _decref_socketios(self) -> None:
if self._io_refs > 0:
self._io_refs -= 1
if self._closed:
self.close()
def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes:
try:
data = self.connection.recv(*args, **kwargs)
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return b""
else:
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return b""
else:
raise
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") from e
else:
return self.recv(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"read error: {e!r}") from e
else:
return data # type: ignore[no-any-return]
def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int:
try:
return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return]
except OpenSSL.SSL.SysCallError as e:
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
return 0
else:
raise OSError(e.args[0], str(e)) from e
except OpenSSL.SSL.ZeroReturnError:
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
return 0
else:
raise
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
raise timeout("The read operation timed out") from e
else:
return self.recv_into(*args, **kwargs)
# TLS 1.3 post-handshake authentication
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"read error: {e!r}") from e
def settimeout(self, timeout: float) -> None:
return self.socket.settimeout(timeout)
def _send_until_done(self, data: bytes) -> int:
while True:
try:
return self.connection.send(data) # type: ignore[no-any-return]
except OpenSSL.SSL.WantWriteError as e:
if not util.wait_for_write(self.socket, self.socket.gettimeout()):
raise timeout() from e
continue
except OpenSSL.SSL.SysCallError as e:
raise OSError(e.args[0], str(e)) from e
def sendall(self, data: bytes) -> None:
total_sent = 0
while total_sent < len(data):
sent = self._send_until_done(
data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE]
)
total_sent += sent
def shutdown(self, how: int) -> None:
try:
self.connection.shutdown()
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"shutdown error: {e!r}") from e
def close(self) -> None:
self._closed = True
if self._io_refs <= 0:
self._real_close()
def _real_close(self) -> None:
try:
return self.connection.close() # type: ignore[no-any-return]
except OpenSSL.SSL.Error:
return
def getpeercert(
self, binary_form: bool = False
) -> dict[str, list[typing.Any]] | None:
x509 = self.connection.get_peer_certificate()
if not x509:
return x509 # type: ignore[no-any-return]
if binary_form:
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return]
return {
"subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item]
"subjectAltName": get_subj_alt_name(x509),
}
def version(self) -> str:
return self.connection.get_protocol_version_name() # type: ignore[no-any-return]
def selected_alpn_protocol(self) -> str | None:
alpn_proto = self.connection.get_alpn_proto_negotiated()
return alpn_proto.decode() if alpn_proto else None
WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined]
class PyOpenSSLContext:
"""
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
for translating the interface of the standard library ``SSLContext`` object
to calls into PyOpenSSL.
"""
def __init__(self, protocol: int) -> None:
self.protocol = _openssl_versions[protocol]
self._ctx = OpenSSL.SSL.Context(self.protocol)
self._options = 0
self.check_hostname = False
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
self._verify_flags: int = ssl.VERIFY_X509_TRUSTED_FIRST
@property
def options(self) -> int:
return self._options
@options.setter
def options(self, value: int) -> None:
self._options = value
self._set_ctx_options()
@property
def verify_flags(self) -> int:
return self._verify_flags
@verify_flags.setter
def verify_flags(self, value: int) -> None:
self._verify_flags = value
self._ctx.get_cert_store().set_flags(self._verify_flags)
@property
def verify_mode(self) -> int:
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
@verify_mode.setter
def verify_mode(self, value: ssl.VerifyMode) -> None:
self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback)
def set_default_verify_paths(self) -> None:
self._ctx.set_default_verify_paths()
def set_ciphers(self, ciphers: bytes | str) -> None:
if isinstance(ciphers, str):
ciphers = ciphers.encode("utf-8")
self._ctx.set_cipher_list(ciphers)
def load_verify_locations(
self,
cafile: str | None = None,
capath: str | None = None,
cadata: bytes | None = None,
) -> None:
if cafile is not None:
cafile = cafile.encode("utf-8") # type: ignore[assignment]
if capath is not None:
capath = capath.encode("utf-8") # type: ignore[assignment]
try:
self._ctx.load_verify_locations(cafile, capath)
if cadata is not None:
self._ctx.load_verify_locations(BytesIO(cadata))
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e
def load_cert_chain(
self,
certfile: str,
keyfile: str | None = None,
password: str | None = None,
) -> None:
try:
self._ctx.use_certificate_chain_file(certfile)
if password is not None:
if not isinstance(password, bytes):
password = password.encode("utf-8") # type: ignore[assignment]
self._ctx.set_passwd_cb(lambda *_: password)
self._ctx.use_privatekey_file(keyfile or certfile)
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e
def set_alpn_protocols(self, protocols: list[bytes | str]) -> None:
protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return]
def wrap_socket(
self,
sock: socket_cls,
server_side: bool = False,
do_handshake_on_connect: bool = True,
suppress_ragged_eofs: bool = True,
server_hostname: bytes | str | None = None,
) -> WrappedSocket:
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
# If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3
if server_hostname and not util.ssl_.is_ipaddress(server_hostname):
if isinstance(server_hostname, str):
server_hostname = server_hostname.encode("utf-8")
cnx.set_tlsext_host_name(server_hostname)
cnx.set_connect_state()
while True:
try:
cnx.do_handshake()
except OpenSSL.SSL.WantReadError as e:
if not util.wait_for_read(sock, sock.gettimeout()):
raise timeout("select timed out") from e
continue
except OpenSSL.SSL.Error as e:
raise ssl.SSLError(f"bad handshake: {e!r}") from e
break
return WrappedSocket(cnx, sock)
def _set_ctx_options(self) -> None:
self._ctx.set_options(
self._options
| _openssl_to_ssl_minimum_version[self._minimum_version]
| _openssl_to_ssl_maximum_version[self._maximum_version]
)
@property
def minimum_version(self) -> int:
return self._minimum_version
@minimum_version.setter
def minimum_version(self, minimum_version: int) -> None:
self._minimum_version = minimum_version
self._set_ctx_options()
@property
def maximum_version(self) -> int:
return self._maximum_version
@maximum_version.setter
def maximum_version(self, maximum_version: int) -> None:
self._maximum_version = maximum_version
self._set_ctx_options()
def _verify_callback(
cnx: OpenSSL.SSL.Connection,
x509: X509,
err_no: int,
err_depth: int,
return_code: int,
) -> bool:
return err_no == 0

View File

@ -0,0 +1,228 @@
"""
This module contains provisional support for SOCKS proxies from within
urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
SOCKS5. To enable its functionality, either install PySocks or install this
module with the ``socks`` extra.
The SOCKS implementation supports the full range of urllib3 features. It also
supports the following SOCKS features:
- SOCKS4A (``proxy_url='socks4a://...``)
- SOCKS4 (``proxy_url='socks4://...``)
- SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
- SOCKS5 with local DNS (``proxy_url='socks5://...``)
- Usernames and passwords for the SOCKS proxy
.. note::
It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
your ``proxy_url`` to ensure that DNS resolution is done from the remote
server instead of client-side when connecting to a domain name.
SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
supports IPv4, IPv6, and domain names.
When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
will be sent as the ``userid`` section of the SOCKS request:
.. code-block:: python
proxy_url="socks4a://<userid>@proxy-host"
When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
of the ``proxy_url`` will be sent as the username/password to authenticate
with the proxy:
.. code-block:: python
proxy_url="socks5h://<username>:<password>@proxy-host"
"""
from __future__ import annotations
try:
import socks # type: ignore[import-not-found]
except ImportError:
import warnings
from ..exceptions import DependencyWarning
warnings.warn(
(
"SOCKS support in urllib3 requires the installation of optional "
"dependencies: specifically, PySocks. For more information, see "
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies"
),
DependencyWarning,
)
raise
import typing
from socket import timeout as SocketTimeout
from ..connection import HTTPConnection, HTTPSConnection
from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
from ..exceptions import ConnectTimeoutError, NewConnectionError
from ..poolmanager import PoolManager
from ..util.url import parse_url
try:
import ssl
except ImportError:
ssl = None # type: ignore[assignment]
class _TYPE_SOCKS_OPTIONS(typing.TypedDict):
socks_version: int
proxy_host: str | None
proxy_port: str | None
username: str | None
password: str | None
rdns: bool
class SOCKSConnection(HTTPConnection):
"""
A plain-text HTTP connection that connects via a SOCKS proxy.
"""
def __init__(
self,
_socks_options: _TYPE_SOCKS_OPTIONS,
*args: typing.Any,
**kwargs: typing.Any,
) -> None:
self._socks_options = _socks_options
super().__init__(*args, **kwargs)
def _new_conn(self) -> socks.socksocket:
"""
Establish a new connection via the SOCKS proxy.
"""
extra_kw: dict[str, typing.Any] = {}
if self.source_address:
extra_kw["source_address"] = self.source_address
if self.socket_options:
extra_kw["socket_options"] = self.socket_options
try:
conn = socks.create_connection(
(self.host, self.port),
proxy_type=self._socks_options["socks_version"],
proxy_addr=self._socks_options["proxy_host"],
proxy_port=self._socks_options["proxy_port"],
proxy_username=self._socks_options["username"],
proxy_password=self._socks_options["password"],
proxy_rdns=self._socks_options["rdns"],
timeout=self.timeout,
**extra_kw,
)
except SocketTimeout as e:
raise ConnectTimeoutError(
self,
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
except socks.ProxyError as e:
# This is fragile as hell, but it seems to be the only way to raise
# useful errors here.
if e.socket_err:
error = e.socket_err
if isinstance(error, SocketTimeout):
raise ConnectTimeoutError(
self,
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
) from e
else:
# Adding `from e` messes with coverage somehow, so it's omitted.
# See #2386.
raise NewConnectionError(
self, f"Failed to establish a new connection: {error}"
)
else:
raise NewConnectionError(
self, f"Failed to establish a new connection: {e}"
) from e
except OSError as e: # Defensive: PySocks should catch all these.
raise NewConnectionError(
self, f"Failed to establish a new connection: {e}"
) from e
return conn
# We don't need to duplicate the Verified/Unverified distinction from
# urllib3/connection.py here because the HTTPSConnection will already have been
# correctly set to either the Verified or Unverified form by that module. This
# means the SOCKSHTTPSConnection will automatically be the correct type.
class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
pass
class SOCKSHTTPConnectionPool(HTTPConnectionPool):
ConnectionCls = SOCKSConnection
class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
ConnectionCls = SOCKSHTTPSConnection
class SOCKSProxyManager(PoolManager):
"""
A version of the urllib3 ProxyManager that routes connections via the
defined SOCKS proxy.
"""
pool_classes_by_scheme = {
"http": SOCKSHTTPConnectionPool,
"https": SOCKSHTTPSConnectionPool,
}
def __init__(
self,
proxy_url: str,
username: str | None = None,
password: str | None = None,
num_pools: int = 10,
headers: typing.Mapping[str, str] | None = None,
**connection_pool_kw: typing.Any,
):
parsed = parse_url(proxy_url)
if username is None and password is None and parsed.auth is not None:
split = parsed.auth.split(":")
if len(split) == 2:
username, password = split
if parsed.scheme == "socks5":
socks_version = socks.PROXY_TYPE_SOCKS5
rdns = False
elif parsed.scheme == "socks5h":
socks_version = socks.PROXY_TYPE_SOCKS5
rdns = True
elif parsed.scheme == "socks4":
socks_version = socks.PROXY_TYPE_SOCKS4
rdns = False
elif parsed.scheme == "socks4a":
socks_version = socks.PROXY_TYPE_SOCKS4
rdns = True
else:
raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
self.proxy_url = proxy_url
socks_options = {
"socks_version": socks_version,
"proxy_host": parsed.host,
"proxy_port": parsed.port,
"username": username,
"password": password,
"rdns": rdns,
}
connection_pool_kw["_socks_options"] = socks_options
super().__init__(num_pools, headers, **connection_pool_kw)
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme