Update 2025-04-24_11:44:19
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import urllib3.connection
|
||||
|
||||
from ...connectionpool import HTTPConnectionPool, HTTPSConnectionPool
|
||||
from .connection import EmscriptenHTTPConnection, EmscriptenHTTPSConnection
|
||||
|
||||
|
||||
def inject_into_urllib3() -> None:
|
||||
# override connection classes to use emscripten specific classes
|
||||
# n.b. mypy complains about the overriding of classes below
|
||||
# if it isn't ignored
|
||||
HTTPConnectionPool.ConnectionCls = EmscriptenHTTPConnection
|
||||
HTTPSConnectionPool.ConnectionCls = EmscriptenHTTPSConnection
|
||||
urllib3.connection.HTTPConnection = EmscriptenHTTPConnection # type: ignore[misc,assignment]
|
||||
urllib3.connection.HTTPSConnection = EmscriptenHTTPSConnection # type: ignore[misc,assignment]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,255 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import typing
|
||||
|
||||
# use http.client.HTTPException for consistency with non-emscripten
|
||||
from http.client import HTTPException as HTTPException # noqa: F401
|
||||
from http.client import ResponseNotReady
|
||||
|
||||
from ..._base_connection import _TYPE_BODY
|
||||
from ...connection import HTTPConnection, ProxyConfig, port_by_scheme
|
||||
from ...exceptions import TimeoutError
|
||||
from ...response import BaseHTTPResponse
|
||||
from ...util.connection import _TYPE_SOCKET_OPTIONS
|
||||
from ...util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT
|
||||
from ...util.url import Url
|
||||
from .fetch import _RequestError, _TimeoutError, send_request, send_streaming_request
|
||||
from .request import EmscriptenRequest
|
||||
from .response import EmscriptenHttpResponseWrapper, EmscriptenResponse
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
|
||||
|
||||
|
||||
class EmscriptenHTTPConnection:
|
||||
default_port: typing.ClassVar[int] = port_by_scheme["http"]
|
||||
default_socket_options: typing.ClassVar[_TYPE_SOCKET_OPTIONS]
|
||||
|
||||
timeout: None | (float)
|
||||
|
||||
host: str
|
||||
port: int
|
||||
blocksize: int
|
||||
source_address: tuple[str, int] | None
|
||||
socket_options: _TYPE_SOCKET_OPTIONS | None
|
||||
|
||||
proxy: Url | None
|
||||
proxy_config: ProxyConfig | None
|
||||
|
||||
is_verified: bool = False
|
||||
proxy_is_verified: bool | None = None
|
||||
|
||||
_response: EmscriptenResponse | None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host: str,
|
||||
port: int = 0,
|
||||
*,
|
||||
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
|
||||
source_address: tuple[str, int] | None = None,
|
||||
blocksize: int = 8192,
|
||||
socket_options: _TYPE_SOCKET_OPTIONS | None = None,
|
||||
proxy: Url | None = None,
|
||||
proxy_config: ProxyConfig | None = None,
|
||||
) -> None:
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.timeout = timeout if isinstance(timeout, float) else 0.0
|
||||
self.scheme = "http"
|
||||
self._closed = True
|
||||
self._response = None
|
||||
# ignore these things because we don't
|
||||
# have control over that stuff
|
||||
self.proxy = None
|
||||
self.proxy_config = None
|
||||
self.blocksize = blocksize
|
||||
self.source_address = None
|
||||
self.socket_options = None
|
||||
self.is_verified = False
|
||||
|
||||
def set_tunnel(
|
||||
self,
|
||||
host: str,
|
||||
port: int | None = 0,
|
||||
headers: typing.Mapping[str, str] | None = None,
|
||||
scheme: str = "http",
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def connect(self) -> None:
|
||||
pass
|
||||
|
||||
def request(
|
||||
self,
|
||||
method: str,
|
||||
url: str,
|
||||
body: _TYPE_BODY | None = None,
|
||||
headers: typing.Mapping[str, str] | None = None,
|
||||
# We know *at least* botocore is depending on the order of the
|
||||
# first 3 parameters so to be safe we only mark the later ones
|
||||
# as keyword-only to ensure we have space to extend.
|
||||
*,
|
||||
chunked: bool = False,
|
||||
preload_content: bool = True,
|
||||
decode_content: bool = True,
|
||||
enforce_content_length: bool = True,
|
||||
) -> None:
|
||||
self._closed = False
|
||||
if url.startswith("/"):
|
||||
# no scheme / host / port included, make a full url
|
||||
url = f"{self.scheme}://{self.host}:{self.port}" + url
|
||||
request = EmscriptenRequest(
|
||||
url=url,
|
||||
method=method,
|
||||
timeout=self.timeout if self.timeout else 0,
|
||||
decode_content=decode_content,
|
||||
)
|
||||
request.set_body(body)
|
||||
if headers:
|
||||
for k, v in headers.items():
|
||||
request.set_header(k, v)
|
||||
self._response = None
|
||||
try:
|
||||
if not preload_content:
|
||||
self._response = send_streaming_request(request)
|
||||
if self._response is None:
|
||||
self._response = send_request(request)
|
||||
except _TimeoutError as e:
|
||||
raise TimeoutError(e.message) from e
|
||||
except _RequestError as e:
|
||||
raise HTTPException(e.message) from e
|
||||
|
||||
def getresponse(self) -> BaseHTTPResponse:
|
||||
if self._response is not None:
|
||||
return EmscriptenHttpResponseWrapper(
|
||||
internal_response=self._response,
|
||||
url=self._response.request.url,
|
||||
connection=self,
|
||||
)
|
||||
else:
|
||||
raise ResponseNotReady()
|
||||
|
||||
def close(self) -> None:
|
||||
self._closed = True
|
||||
self._response = None
|
||||
|
||||
@property
|
||||
def is_closed(self) -> bool:
|
||||
"""Whether the connection either is brand new or has been previously closed.
|
||||
If this property is True then both ``is_connected`` and ``has_connected_to_proxy``
|
||||
properties must be False.
|
||||
"""
|
||||
return self._closed
|
||||
|
||||
@property
|
||||
def is_connected(self) -> bool:
|
||||
"""Whether the connection is actively connected to any origin (proxy or target)"""
|
||||
return True
|
||||
|
||||
@property
|
||||
def has_connected_to_proxy(self) -> bool:
|
||||
"""Whether the connection has successfully connected to its proxy.
|
||||
This returns False if no proxy is in use. Used to determine whether
|
||||
errors are coming from the proxy layer or from tunnelling to the target origin.
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
class EmscriptenHTTPSConnection(EmscriptenHTTPConnection):
|
||||
default_port = port_by_scheme["https"]
|
||||
# all this is basically ignored, as browser handles https
|
||||
cert_reqs: int | str | None = None
|
||||
ca_certs: str | None = None
|
||||
ca_cert_dir: str | None = None
|
||||
ca_cert_data: None | str | bytes = None
|
||||
cert_file: str | None
|
||||
key_file: str | None
|
||||
key_password: str | None
|
||||
ssl_context: typing.Any | None
|
||||
ssl_version: int | str | None = None
|
||||
ssl_minimum_version: int | None = None
|
||||
ssl_maximum_version: int | None = None
|
||||
assert_hostname: None | str | typing.Literal[False]
|
||||
assert_fingerprint: str | None = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
host: str,
|
||||
port: int = 0,
|
||||
*,
|
||||
timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
|
||||
source_address: tuple[str, int] | None = None,
|
||||
blocksize: int = 16384,
|
||||
socket_options: (
|
||||
None | _TYPE_SOCKET_OPTIONS
|
||||
) = HTTPConnection.default_socket_options,
|
||||
proxy: Url | None = None,
|
||||
proxy_config: ProxyConfig | None = None,
|
||||
cert_reqs: int | str | None = None,
|
||||
assert_hostname: None | str | typing.Literal[False] = None,
|
||||
assert_fingerprint: str | None = None,
|
||||
server_hostname: str | None = None,
|
||||
ssl_context: typing.Any | None = None,
|
||||
ca_certs: str | None = None,
|
||||
ca_cert_dir: str | None = None,
|
||||
ca_cert_data: None | str | bytes = None,
|
||||
ssl_minimum_version: int | None = None,
|
||||
ssl_maximum_version: int | None = None,
|
||||
ssl_version: int | str | None = None, # Deprecated
|
||||
cert_file: str | None = None,
|
||||
key_file: str | None = None,
|
||||
key_password: str | None = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
host,
|
||||
port=port,
|
||||
timeout=timeout,
|
||||
source_address=source_address,
|
||||
blocksize=blocksize,
|
||||
socket_options=socket_options,
|
||||
proxy=proxy,
|
||||
proxy_config=proxy_config,
|
||||
)
|
||||
self.scheme = "https"
|
||||
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.key_password = key_password
|
||||
self.ssl_context = ssl_context
|
||||
self.server_hostname = server_hostname
|
||||
self.assert_hostname = assert_hostname
|
||||
self.assert_fingerprint = assert_fingerprint
|
||||
self.ssl_version = ssl_version
|
||||
self.ssl_minimum_version = ssl_minimum_version
|
||||
self.ssl_maximum_version = ssl_maximum_version
|
||||
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
|
||||
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
|
||||
self.ca_cert_data = ca_cert_data
|
||||
|
||||
self.cert_reqs = None
|
||||
|
||||
# The browser will automatically verify all requests.
|
||||
# We have no control over that setting.
|
||||
self.is_verified = True
|
||||
|
||||
def set_cert(
|
||||
self,
|
||||
key_file: str | None = None,
|
||||
cert_file: str | None = None,
|
||||
cert_reqs: int | str | None = None,
|
||||
key_password: str | None = None,
|
||||
ca_certs: str | None = None,
|
||||
assert_hostname: None | str | typing.Literal[False] = None,
|
||||
assert_fingerprint: str | None = None,
|
||||
ca_cert_dir: str | None = None,
|
||||
ca_cert_data: None | str | bytes = None,
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
|
||||
# verify that this class implements BaseHTTP(s) connection correctly
|
||||
if typing.TYPE_CHECKING:
|
||||
_supports_http_protocol: BaseHTTPConnection = EmscriptenHTTPConnection("", 0)
|
||||
_supports_https_protocol: BaseHTTPSConnection = EmscriptenHTTPSConnection("", 0)
|
@ -0,0 +1,110 @@
|
||||
let Status = {
|
||||
SUCCESS_HEADER: -1,
|
||||
SUCCESS_EOF: -2,
|
||||
ERROR_TIMEOUT: -3,
|
||||
ERROR_EXCEPTION: -4,
|
||||
};
|
||||
|
||||
let connections = {};
|
||||
let nextConnectionID = 1;
|
||||
const encoder = new TextEncoder();
|
||||
|
||||
self.addEventListener("message", async function (event) {
|
||||
if (event.data.close) {
|
||||
let connectionID = event.data.close;
|
||||
delete connections[connectionID];
|
||||
return;
|
||||
} else if (event.data.getMore) {
|
||||
let connectionID = event.data.getMore;
|
||||
let { curOffset, value, reader, intBuffer, byteBuffer } =
|
||||
connections[connectionID];
|
||||
// if we still have some in buffer, then just send it back straight away
|
||||
if (!value || curOffset >= value.length) {
|
||||
// read another buffer if required
|
||||
try {
|
||||
let readResponse = await reader.read();
|
||||
|
||||
if (readResponse.done) {
|
||||
// read everything - clear connection and return
|
||||
delete connections[connectionID];
|
||||
Atomics.store(intBuffer, 0, Status.SUCCESS_EOF);
|
||||
Atomics.notify(intBuffer, 0);
|
||||
// finished reading successfully
|
||||
// return from event handler
|
||||
return;
|
||||
}
|
||||
curOffset = 0;
|
||||
connections[connectionID].value = readResponse.value;
|
||||
value = readResponse.value;
|
||||
} catch (error) {
|
||||
console.log("Request exception:", error);
|
||||
let errorBytes = encoder.encode(error.message);
|
||||
let written = errorBytes.length;
|
||||
byteBuffer.set(errorBytes);
|
||||
intBuffer[1] = written;
|
||||
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
|
||||
Atomics.notify(intBuffer, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// send as much buffer as we can
|
||||
let curLen = value.length - curOffset;
|
||||
if (curLen > byteBuffer.length) {
|
||||
curLen = byteBuffer.length;
|
||||
}
|
||||
byteBuffer.set(value.subarray(curOffset, curOffset + curLen), 0);
|
||||
|
||||
Atomics.store(intBuffer, 0, curLen); // store current length in bytes
|
||||
Atomics.notify(intBuffer, 0);
|
||||
curOffset += curLen;
|
||||
connections[connectionID].curOffset = curOffset;
|
||||
|
||||
return;
|
||||
} else {
|
||||
// start fetch
|
||||
let connectionID = nextConnectionID;
|
||||
nextConnectionID += 1;
|
||||
const intBuffer = new Int32Array(event.data.buffer);
|
||||
const byteBuffer = new Uint8Array(event.data.buffer, 8);
|
||||
try {
|
||||
const response = await fetch(event.data.url, event.data.fetchParams);
|
||||
// return the headers first via textencoder
|
||||
var headers = [];
|
||||
for (const pair of response.headers.entries()) {
|
||||
headers.push([pair[0], pair[1]]);
|
||||
}
|
||||
let headerObj = {
|
||||
headers: headers,
|
||||
status: response.status,
|
||||
connectionID,
|
||||
};
|
||||
const headerText = JSON.stringify(headerObj);
|
||||
let headerBytes = encoder.encode(headerText);
|
||||
let written = headerBytes.length;
|
||||
byteBuffer.set(headerBytes);
|
||||
intBuffer[1] = written;
|
||||
// make a connection
|
||||
connections[connectionID] = {
|
||||
reader: response.body.getReader(),
|
||||
intBuffer: intBuffer,
|
||||
byteBuffer: byteBuffer,
|
||||
value: undefined,
|
||||
curOffset: 0,
|
||||
};
|
||||
// set header ready
|
||||
Atomics.store(intBuffer, 0, Status.SUCCESS_HEADER);
|
||||
Atomics.notify(intBuffer, 0);
|
||||
// all fetching after this goes through a new postmessage call with getMore
|
||||
// this allows for parallel requests
|
||||
} catch (error) {
|
||||
console.log("Request exception:", error);
|
||||
let errorBytes = encoder.encode(error.message);
|
||||
let written = errorBytes.length;
|
||||
byteBuffer.set(errorBytes);
|
||||
intBuffer[1] = written;
|
||||
Atomics.store(intBuffer, 0, Status.ERROR_EXCEPTION);
|
||||
Atomics.notify(intBuffer, 0);
|
||||
}
|
||||
}
|
||||
});
|
||||
self.postMessage({ inited: true });
|
@ -0,0 +1,708 @@
|
||||
"""
|
||||
Support for streaming http requests in emscripten.
|
||||
|
||||
A few caveats -
|
||||
|
||||
If your browser (or Node.js) has WebAssembly JavaScript Promise Integration enabled
|
||||
https://github.com/WebAssembly/js-promise-integration/blob/main/proposals/js-promise-integration/Overview.md
|
||||
*and* you launch pyodide using `pyodide.runPythonAsync`, this will fetch data using the
|
||||
JavaScript asynchronous fetch api (wrapped via `pyodide.ffi.call_sync`). In this case
|
||||
timeouts and streaming should just work.
|
||||
|
||||
Otherwise, it uses a combination of XMLHttpRequest and a web-worker for streaming.
|
||||
|
||||
This approach has several caveats:
|
||||
|
||||
Firstly, you can't do streaming http in the main UI thread, because atomics.wait isn't allowed.
|
||||
Streaming only works if you're running pyodide in a web worker.
|
||||
|
||||
Secondly, this uses an extra web worker and SharedArrayBuffer to do the asynchronous fetch
|
||||
operation, so it requires that you have crossOriginIsolation enabled, by serving over https
|
||||
(or from localhost) with the two headers below set:
|
||||
|
||||
Cross-Origin-Opener-Policy: same-origin
|
||||
Cross-Origin-Embedder-Policy: require-corp
|
||||
|
||||
You can tell if cross origin isolation is successfully enabled by looking at the global crossOriginIsolated variable in
|
||||
JavaScript console. If it isn't, streaming requests will fallback to XMLHttpRequest, i.e. getting the whole
|
||||
request into a buffer and then returning it. it shows a warning in the JavaScript console in this case.
|
||||
|
||||
Finally, the webworker which does the streaming fetch is created on initial import, but will only be started once
|
||||
control is returned to javascript. Call `await wait_for_streaming_ready()` to wait for streaming fetch.
|
||||
|
||||
NB: in this code, there are a lot of JavaScript objects. They are named js_*
|
||||
to make it clear what type of object they are.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
from email.parser import Parser
|
||||
from importlib.resources import files
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import js # type: ignore[import-not-found]
|
||||
from pyodide.ffi import ( # type: ignore[import-not-found]
|
||||
JsArray,
|
||||
JsException,
|
||||
JsProxy,
|
||||
to_js,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Buffer
|
||||
|
||||
from .request import EmscriptenRequest
|
||||
from .response import EmscriptenResponse
|
||||
|
||||
"""
|
||||
There are some headers that trigger unintended CORS preflight requests.
|
||||
See also https://github.com/koenvo/pyodide-http/issues/22
|
||||
"""
|
||||
HEADERS_TO_IGNORE = ("user-agent",)
|
||||
|
||||
SUCCESS_HEADER = -1
|
||||
SUCCESS_EOF = -2
|
||||
ERROR_TIMEOUT = -3
|
||||
ERROR_EXCEPTION = -4
|
||||
|
||||
_STREAMING_WORKER_CODE = (
|
||||
files(__package__)
|
||||
.joinpath("emscripten_fetch_worker.js")
|
||||
.read_text(encoding="utf-8")
|
||||
)
|
||||
|
||||
|
||||
class _RequestError(Exception):
|
||||
def __init__(
|
||||
self,
|
||||
message: str | None = None,
|
||||
*,
|
||||
request: EmscriptenRequest | None = None,
|
||||
response: EmscriptenResponse | None = None,
|
||||
):
|
||||
self.request = request
|
||||
self.response = response
|
||||
self.message = message
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class _StreamingError(_RequestError):
|
||||
pass
|
||||
|
||||
|
||||
class _TimeoutError(_RequestError):
|
||||
pass
|
||||
|
||||
|
||||
def _obj_from_dict(dict_val: dict[str, Any]) -> JsProxy:
|
||||
return to_js(dict_val, dict_converter=js.Object.fromEntries)
|
||||
|
||||
|
||||
class _ReadStream(io.RawIOBase):
|
||||
def __init__(
|
||||
self,
|
||||
int_buffer: JsArray,
|
||||
byte_buffer: JsArray,
|
||||
timeout: float,
|
||||
worker: JsProxy,
|
||||
connection_id: int,
|
||||
request: EmscriptenRequest,
|
||||
):
|
||||
self.int_buffer = int_buffer
|
||||
self.byte_buffer = byte_buffer
|
||||
self.read_pos = 0
|
||||
self.read_len = 0
|
||||
self.connection_id = connection_id
|
||||
self.worker = worker
|
||||
self.timeout = int(1000 * timeout) if timeout > 0 else None
|
||||
self.is_live = True
|
||||
self._is_closed = False
|
||||
self.request: EmscriptenRequest | None = request
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.close()
|
||||
|
||||
# this is compatible with _base_connection
|
||||
def is_closed(self) -> bool:
|
||||
return self._is_closed
|
||||
|
||||
# for compatibility with RawIOBase
|
||||
@property
|
||||
def closed(self) -> bool:
|
||||
return self.is_closed()
|
||||
|
||||
def close(self) -> None:
|
||||
if self.is_closed():
|
||||
return
|
||||
self.read_len = 0
|
||||
self.read_pos = 0
|
||||
self.int_buffer = None
|
||||
self.byte_buffer = None
|
||||
self._is_closed = True
|
||||
self.request = None
|
||||
if self.is_live:
|
||||
self.worker.postMessage(_obj_from_dict({"close": self.connection_id}))
|
||||
self.is_live = False
|
||||
super().close()
|
||||
|
||||
def readable(self) -> bool:
|
||||
return True
|
||||
|
||||
def writable(self) -> bool:
|
||||
return False
|
||||
|
||||
def seekable(self) -> bool:
|
||||
return False
|
||||
|
||||
def readinto(self, byte_obj: Buffer) -> int:
|
||||
if not self.int_buffer:
|
||||
raise _StreamingError(
|
||||
"No buffer for stream in _ReadStream.readinto",
|
||||
request=self.request,
|
||||
response=None,
|
||||
)
|
||||
if self.read_len == 0:
|
||||
# wait for the worker to send something
|
||||
js.Atomics.store(self.int_buffer, 0, ERROR_TIMEOUT)
|
||||
self.worker.postMessage(_obj_from_dict({"getMore": self.connection_id}))
|
||||
if (
|
||||
js.Atomics.wait(self.int_buffer, 0, ERROR_TIMEOUT, self.timeout)
|
||||
== "timed-out"
|
||||
):
|
||||
raise _TimeoutError
|
||||
data_len = self.int_buffer[0]
|
||||
if data_len > 0:
|
||||
self.read_len = data_len
|
||||
self.read_pos = 0
|
||||
elif data_len == ERROR_EXCEPTION:
|
||||
string_len = self.int_buffer[1]
|
||||
# decode the error string
|
||||
js_decoder = js.TextDecoder.new()
|
||||
json_str = js_decoder.decode(self.byte_buffer.slice(0, string_len))
|
||||
raise _StreamingError(
|
||||
f"Exception thrown in fetch: {json_str}",
|
||||
request=self.request,
|
||||
response=None,
|
||||
)
|
||||
else:
|
||||
# EOF, free the buffers and return zero
|
||||
# and free the request
|
||||
self.is_live = False
|
||||
self.close()
|
||||
return 0
|
||||
# copy from int32array to python bytes
|
||||
ret_length = min(self.read_len, len(memoryview(byte_obj)))
|
||||
subarray = self.byte_buffer.subarray(
|
||||
self.read_pos, self.read_pos + ret_length
|
||||
).to_py()
|
||||
memoryview(byte_obj)[0:ret_length] = subarray
|
||||
self.read_len -= ret_length
|
||||
self.read_pos += ret_length
|
||||
return ret_length
|
||||
|
||||
|
||||
class _StreamingFetcher:
|
||||
def __init__(self) -> None:
|
||||
# make web-worker and data buffer on startup
|
||||
self.streaming_ready = False
|
||||
|
||||
js_data_blob = js.Blob.new(
|
||||
to_js([_STREAMING_WORKER_CODE], create_pyproxies=False),
|
||||
_obj_from_dict({"type": "application/javascript"}),
|
||||
)
|
||||
|
||||
def promise_resolver(js_resolve_fn: JsProxy, js_reject_fn: JsProxy) -> None:
|
||||
def onMsg(e: JsProxy) -> None:
|
||||
self.streaming_ready = True
|
||||
js_resolve_fn(e)
|
||||
|
||||
def onErr(e: JsProxy) -> None:
|
||||
js_reject_fn(e) # Defensive: never happens in ci
|
||||
|
||||
self.js_worker.onmessage = onMsg
|
||||
self.js_worker.onerror = onErr
|
||||
|
||||
js_data_url = js.URL.createObjectURL(js_data_blob)
|
||||
self.js_worker = js.globalThis.Worker.new(js_data_url)
|
||||
self.js_worker_ready_promise = js.globalThis.Promise.new(promise_resolver)
|
||||
|
||||
def send(self, request: EmscriptenRequest) -> EmscriptenResponse:
|
||||
headers = {
|
||||
k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE
|
||||
}
|
||||
|
||||
body = request.body
|
||||
fetch_data = {"headers": headers, "body": to_js(body), "method": request.method}
|
||||
# start the request off in the worker
|
||||
timeout = int(1000 * request.timeout) if request.timeout > 0 else None
|
||||
js_shared_buffer = js.SharedArrayBuffer.new(1048576)
|
||||
js_int_buffer = js.Int32Array.new(js_shared_buffer)
|
||||
js_byte_buffer = js.Uint8Array.new(js_shared_buffer, 8)
|
||||
|
||||
js.Atomics.store(js_int_buffer, 0, ERROR_TIMEOUT)
|
||||
js.Atomics.notify(js_int_buffer, 0)
|
||||
js_absolute_url = js.URL.new(request.url, js.location).href
|
||||
self.js_worker.postMessage(
|
||||
_obj_from_dict(
|
||||
{
|
||||
"buffer": js_shared_buffer,
|
||||
"url": js_absolute_url,
|
||||
"fetchParams": fetch_data,
|
||||
}
|
||||
)
|
||||
)
|
||||
# wait for the worker to send something
|
||||
js.Atomics.wait(js_int_buffer, 0, ERROR_TIMEOUT, timeout)
|
||||
if js_int_buffer[0] == ERROR_TIMEOUT:
|
||||
raise _TimeoutError(
|
||||
"Timeout connecting to streaming request",
|
||||
request=request,
|
||||
response=None,
|
||||
)
|
||||
elif js_int_buffer[0] == SUCCESS_HEADER:
|
||||
# got response
|
||||
# header length is in second int of intBuffer
|
||||
string_len = js_int_buffer[1]
|
||||
# decode the rest to a JSON string
|
||||
js_decoder = js.TextDecoder.new()
|
||||
# this does a copy (the slice) because decode can't work on shared array
|
||||
# for some silly reason
|
||||
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
|
||||
# get it as an object
|
||||
response_obj = json.loads(json_str)
|
||||
return EmscriptenResponse(
|
||||
request=request,
|
||||
status_code=response_obj["status"],
|
||||
headers=response_obj["headers"],
|
||||
body=_ReadStream(
|
||||
js_int_buffer,
|
||||
js_byte_buffer,
|
||||
request.timeout,
|
||||
self.js_worker,
|
||||
response_obj["connectionID"],
|
||||
request,
|
||||
),
|
||||
)
|
||||
elif js_int_buffer[0] == ERROR_EXCEPTION:
|
||||
string_len = js_int_buffer[1]
|
||||
# decode the error string
|
||||
js_decoder = js.TextDecoder.new()
|
||||
json_str = js_decoder.decode(js_byte_buffer.slice(0, string_len))
|
||||
raise _StreamingError(
|
||||
f"Exception thrown in fetch: {json_str}", request=request, response=None
|
||||
)
|
||||
else:
|
||||
raise _StreamingError(
|
||||
f"Unknown status from worker in fetch: {js_int_buffer[0]}",
|
||||
request=request,
|
||||
response=None,
|
||||
)
|
||||
|
||||
|
||||
class _JSPIReadStream(io.RawIOBase):
|
||||
"""
|
||||
A read stream that uses pyodide.ffi.run_sync to read from a JavaScript fetch
|
||||
response. This requires support for WebAssembly JavaScript Promise Integration
|
||||
in the containing browser, and for pyodide to be launched via runPythonAsync.
|
||||
|
||||
:param js_read_stream:
|
||||
The JavaScript stream reader
|
||||
|
||||
:param timeout:
|
||||
Timeout in seconds
|
||||
|
||||
:param request:
|
||||
The request we're handling
|
||||
|
||||
:param response:
|
||||
The response this stream relates to
|
||||
|
||||
:param js_abort_controller:
|
||||
A JavaScript AbortController object, used for timeouts
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
js_read_stream: Any,
|
||||
timeout: float,
|
||||
request: EmscriptenRequest,
|
||||
response: EmscriptenResponse,
|
||||
js_abort_controller: Any, # JavaScript AbortController for timeouts
|
||||
):
|
||||
self.js_read_stream = js_read_stream
|
||||
self.timeout = timeout
|
||||
self._is_closed = False
|
||||
self._is_done = False
|
||||
self.request: EmscriptenRequest | None = request
|
||||
self.response: EmscriptenResponse | None = response
|
||||
self.current_buffer = None
|
||||
self.current_buffer_pos = 0
|
||||
self.js_abort_controller = js_abort_controller
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.close()
|
||||
|
||||
# this is compatible with _base_connection
|
||||
def is_closed(self) -> bool:
|
||||
return self._is_closed
|
||||
|
||||
# for compatibility with RawIOBase
|
||||
@property
|
||||
def closed(self) -> bool:
|
||||
return self.is_closed()
|
||||
|
||||
def close(self) -> None:
|
||||
if self.is_closed():
|
||||
return
|
||||
self.read_len = 0
|
||||
self.read_pos = 0
|
||||
self.js_read_stream.cancel()
|
||||
self.js_read_stream = None
|
||||
self._is_closed = True
|
||||
self._is_done = True
|
||||
self.request = None
|
||||
self.response = None
|
||||
super().close()
|
||||
|
||||
def readable(self) -> bool:
|
||||
return True
|
||||
|
||||
def writable(self) -> bool:
|
||||
return False
|
||||
|
||||
def seekable(self) -> bool:
|
||||
return False
|
||||
|
||||
def _get_next_buffer(self) -> bool:
|
||||
result_js = _run_sync_with_timeout(
|
||||
self.js_read_stream.read(),
|
||||
self.timeout,
|
||||
self.js_abort_controller,
|
||||
request=self.request,
|
||||
response=self.response,
|
||||
)
|
||||
if result_js.done:
|
||||
self._is_done = True
|
||||
return False
|
||||
else:
|
||||
self.current_buffer = result_js.value.to_py()
|
||||
self.current_buffer_pos = 0
|
||||
return True
|
||||
|
||||
def readinto(self, byte_obj: Buffer) -> int:
|
||||
if self.current_buffer is None:
|
||||
if not self._get_next_buffer() or self.current_buffer is None:
|
||||
self.close()
|
||||
return 0
|
||||
ret_length = min(
|
||||
len(byte_obj), len(self.current_buffer) - self.current_buffer_pos
|
||||
)
|
||||
byte_obj[0:ret_length] = self.current_buffer[
|
||||
self.current_buffer_pos : self.current_buffer_pos + ret_length
|
||||
]
|
||||
self.current_buffer_pos += ret_length
|
||||
if self.current_buffer_pos == len(self.current_buffer):
|
||||
self.current_buffer = None
|
||||
return ret_length
|
||||
|
||||
|
||||
# check if we are in a worker or not
|
||||
def is_in_browser_main_thread() -> bool:
|
||||
return hasattr(js, "window") and hasattr(js, "self") and js.self == js.window
|
||||
|
||||
|
||||
def is_cross_origin_isolated() -> bool:
|
||||
return hasattr(js, "crossOriginIsolated") and js.crossOriginIsolated
|
||||
|
||||
|
||||
def is_in_node() -> bool:
|
||||
return (
|
||||
hasattr(js, "process")
|
||||
and hasattr(js.process, "release")
|
||||
and hasattr(js.process.release, "name")
|
||||
and js.process.release.name == "node"
|
||||
)
|
||||
|
||||
|
||||
def is_worker_available() -> bool:
|
||||
return hasattr(js, "Worker") and hasattr(js, "Blob")
|
||||
|
||||
|
||||
_fetcher: _StreamingFetcher | None = None
|
||||
|
||||
if is_worker_available() and (
|
||||
(is_cross_origin_isolated() and not is_in_browser_main_thread())
|
||||
and (not is_in_node())
|
||||
):
|
||||
_fetcher = _StreamingFetcher()
|
||||
else:
|
||||
_fetcher = None
|
||||
|
||||
|
||||
NODE_JSPI_ERROR = (
|
||||
"urllib3 only works in Node.js with pyodide.runPythonAsync"
|
||||
" and requires the flag --experimental-wasm-stack-switching in "
|
||||
" versions of node <24."
|
||||
)
|
||||
|
||||
|
||||
def send_streaming_request(request: EmscriptenRequest) -> EmscriptenResponse | None:
|
||||
if has_jspi():
|
||||
return send_jspi_request(request, True)
|
||||
elif is_in_node():
|
||||
raise _RequestError(
|
||||
message=NODE_JSPI_ERROR,
|
||||
request=request,
|
||||
response=None,
|
||||
)
|
||||
|
||||
if _fetcher and streaming_ready():
|
||||
return _fetcher.send(request)
|
||||
else:
|
||||
_show_streaming_warning()
|
||||
return None
|
||||
|
||||
|
||||
_SHOWN_TIMEOUT_WARNING = False
|
||||
|
||||
|
||||
def _show_timeout_warning() -> None:
|
||||
global _SHOWN_TIMEOUT_WARNING
|
||||
if not _SHOWN_TIMEOUT_WARNING:
|
||||
_SHOWN_TIMEOUT_WARNING = True
|
||||
message = "Warning: Timeout is not available on main browser thread"
|
||||
js.console.warn(message)
|
||||
|
||||
|
||||
_SHOWN_STREAMING_WARNING = False
|
||||
|
||||
|
||||
def _show_streaming_warning() -> None:
|
||||
global _SHOWN_STREAMING_WARNING
|
||||
if not _SHOWN_STREAMING_WARNING:
|
||||
_SHOWN_STREAMING_WARNING = True
|
||||
message = "Can't stream HTTP requests because: \n"
|
||||
if not is_cross_origin_isolated():
|
||||
message += " Page is not cross-origin isolated\n"
|
||||
if is_in_browser_main_thread():
|
||||
message += " Python is running in main browser thread\n"
|
||||
if not is_worker_available():
|
||||
message += " Worker or Blob classes are not available in this environment." # Defensive: this is always False in browsers that we test in
|
||||
if streaming_ready() is False:
|
||||
message += """ Streaming fetch worker isn't ready. If you want to be sure that streaming fetch
|
||||
is working, you need to call: 'await urllib3.contrib.emscripten.fetch.wait_for_streaming_ready()`"""
|
||||
from js import console
|
||||
|
||||
console.warn(message)
|
||||
|
||||
|
||||
def send_request(request: EmscriptenRequest) -> EmscriptenResponse:
|
||||
if has_jspi():
|
||||
return send_jspi_request(request, False)
|
||||
elif is_in_node():
|
||||
raise _RequestError(
|
||||
message=NODE_JSPI_ERROR,
|
||||
request=request,
|
||||
response=None,
|
||||
)
|
||||
try:
|
||||
js_xhr = js.XMLHttpRequest.new()
|
||||
|
||||
if not is_in_browser_main_thread():
|
||||
js_xhr.responseType = "arraybuffer"
|
||||
if request.timeout:
|
||||
js_xhr.timeout = int(request.timeout * 1000)
|
||||
else:
|
||||
js_xhr.overrideMimeType("text/plain; charset=ISO-8859-15")
|
||||
if request.timeout:
|
||||
# timeout isn't available on the main thread - show a warning in console
|
||||
# if it is set
|
||||
_show_timeout_warning()
|
||||
|
||||
js_xhr.open(request.method, request.url, False)
|
||||
for name, value in request.headers.items():
|
||||
if name.lower() not in HEADERS_TO_IGNORE:
|
||||
js_xhr.setRequestHeader(name, value)
|
||||
|
||||
js_xhr.send(to_js(request.body))
|
||||
|
||||
headers = dict(Parser().parsestr(js_xhr.getAllResponseHeaders()))
|
||||
|
||||
if not is_in_browser_main_thread():
|
||||
body = js_xhr.response.to_py().tobytes()
|
||||
else:
|
||||
body = js_xhr.response.encode("ISO-8859-15")
|
||||
return EmscriptenResponse(
|
||||
status_code=js_xhr.status, headers=headers, body=body, request=request
|
||||
)
|
||||
except JsException as err:
|
||||
if err.name == "TimeoutError":
|
||||
raise _TimeoutError(err.message, request=request)
|
||||
elif err.name == "NetworkError":
|
||||
raise _RequestError(err.message, request=request)
|
||||
else:
|
||||
# general http error
|
||||
raise _RequestError(err.message, request=request)
|
||||
|
||||
|
||||
def send_jspi_request(
|
||||
request: EmscriptenRequest, streaming: bool
|
||||
) -> EmscriptenResponse:
|
||||
"""
|
||||
Send a request using WebAssembly JavaScript Promise Integration
|
||||
to wrap the asynchronous JavaScript fetch api (experimental).
|
||||
|
||||
:param request:
|
||||
Request to send
|
||||
|
||||
:param streaming:
|
||||
Whether to stream the response
|
||||
|
||||
:return: The response object
|
||||
:rtype: EmscriptenResponse
|
||||
"""
|
||||
timeout = request.timeout
|
||||
js_abort_controller = js.AbortController.new()
|
||||
headers = {k: v for k, v in request.headers.items() if k not in HEADERS_TO_IGNORE}
|
||||
req_body = request.body
|
||||
fetch_data = {
|
||||
"headers": headers,
|
||||
"body": to_js(req_body),
|
||||
"method": request.method,
|
||||
"signal": js_abort_controller.signal,
|
||||
}
|
||||
# Call JavaScript fetch (async api, returns a promise)
|
||||
fetcher_promise_js = js.fetch(request.url, _obj_from_dict(fetch_data))
|
||||
# Now suspend WebAssembly until we resolve that promise
|
||||
# or time out.
|
||||
response_js = _run_sync_with_timeout(
|
||||
fetcher_promise_js,
|
||||
timeout,
|
||||
js_abort_controller,
|
||||
request=request,
|
||||
response=None,
|
||||
)
|
||||
headers = {}
|
||||
header_iter = response_js.headers.entries()
|
||||
while True:
|
||||
iter_value_js = header_iter.next()
|
||||
if getattr(iter_value_js, "done", False):
|
||||
break
|
||||
else:
|
||||
headers[str(iter_value_js.value[0])] = str(iter_value_js.value[1])
|
||||
status_code = response_js.status
|
||||
body: bytes | io.RawIOBase = b""
|
||||
|
||||
response = EmscriptenResponse(
|
||||
status_code=status_code, headers=headers, body=b"", request=request
|
||||
)
|
||||
if streaming:
|
||||
# get via inputstream
|
||||
if response_js.body is not None:
|
||||
# get a reader from the fetch response
|
||||
body_stream_js = response_js.body.getReader()
|
||||
body = _JSPIReadStream(
|
||||
body_stream_js, timeout, request, response, js_abort_controller
|
||||
)
|
||||
else:
|
||||
# get directly via arraybuffer
|
||||
# n.b. this is another async JavaScript call.
|
||||
body = _run_sync_with_timeout(
|
||||
response_js.arrayBuffer(),
|
||||
timeout,
|
||||
js_abort_controller,
|
||||
request=request,
|
||||
response=response,
|
||||
).to_py()
|
||||
response.body = body
|
||||
return response
|
||||
|
||||
|
||||
def _run_sync_with_timeout(
|
||||
promise: Any,
|
||||
timeout: float,
|
||||
js_abort_controller: Any,
|
||||
request: EmscriptenRequest | None,
|
||||
response: EmscriptenResponse | None,
|
||||
) -> Any:
|
||||
"""
|
||||
Await a JavaScript promise synchronously with a timeout which is implemented
|
||||
via the AbortController
|
||||
|
||||
:param promise:
|
||||
Javascript promise to await
|
||||
|
||||
:param timeout:
|
||||
Timeout in seconds
|
||||
|
||||
:param js_abort_controller:
|
||||
A JavaScript AbortController object, used on timeout
|
||||
|
||||
:param request:
|
||||
The request being handled
|
||||
|
||||
:param response:
|
||||
The response being handled (if it exists yet)
|
||||
|
||||
:raises _TimeoutError: If the request times out
|
||||
:raises _RequestError: If the request raises a JavaScript exception
|
||||
|
||||
:return: The result of awaiting the promise.
|
||||
"""
|
||||
timer_id = None
|
||||
if timeout > 0:
|
||||
timer_id = js.setTimeout(
|
||||
js_abort_controller.abort.bind(js_abort_controller), int(timeout * 1000)
|
||||
)
|
||||
try:
|
||||
from pyodide.ffi import run_sync
|
||||
|
||||
# run_sync here uses WebAssembly JavaScript Promise Integration to
|
||||
# suspend python until the JavaScript promise resolves.
|
||||
return run_sync(promise)
|
||||
except JsException as err:
|
||||
if err.name == "AbortError":
|
||||
raise _TimeoutError(
|
||||
message="Request timed out", request=request, response=response
|
||||
)
|
||||
else:
|
||||
raise _RequestError(message=err.message, request=request, response=response)
|
||||
finally:
|
||||
if timer_id is not None:
|
||||
js.clearTimeout(timer_id)
|
||||
|
||||
|
||||
def has_jspi() -> bool:
|
||||
"""
|
||||
Return true if jspi can be used.
|
||||
|
||||
This requires both browser support and also WebAssembly
|
||||
to be in the correct state - i.e. that the javascript
|
||||
call into python was async not sync.
|
||||
|
||||
:return: True if jspi can be used.
|
||||
:rtype: bool
|
||||
"""
|
||||
try:
|
||||
from pyodide.ffi import can_run_sync, run_sync # noqa: F401
|
||||
|
||||
return bool(can_run_sync())
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
def streaming_ready() -> bool | None:
|
||||
if _fetcher:
|
||||
return _fetcher.streaming_ready
|
||||
else:
|
||||
return None # no fetcher, return None to signify that
|
||||
|
||||
|
||||
async def wait_for_streaming_ready() -> bool:
|
||||
if _fetcher:
|
||||
await _fetcher.js_worker_ready_promise
|
||||
return True
|
||||
else:
|
||||
return False
|
@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from ..._base_connection import _TYPE_BODY
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmscriptenRequest:
|
||||
method: str
|
||||
url: str
|
||||
params: dict[str, str] | None = None
|
||||
body: _TYPE_BODY | None = None
|
||||
headers: dict[str, str] = field(default_factory=dict)
|
||||
timeout: float = 0
|
||||
decode_content: bool = True
|
||||
|
||||
def set_header(self, name: str, value: str) -> None:
|
||||
self.headers[name.capitalize()] = value
|
||||
|
||||
def set_body(self, body: _TYPE_BODY | None) -> None:
|
||||
self.body = body
|
@ -0,0 +1,277 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json as _json
|
||||
import logging
|
||||
import typing
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass
|
||||
from http.client import HTTPException as HTTPException
|
||||
from io import BytesIO, IOBase
|
||||
|
||||
from ...exceptions import InvalidHeader, TimeoutError
|
||||
from ...response import BaseHTTPResponse
|
||||
from ...util.retry import Retry
|
||||
from .request import EmscriptenRequest
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from ..._base_connection import BaseHTTPConnection, BaseHTTPSConnection
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmscriptenResponse:
|
||||
status_code: int
|
||||
headers: dict[str, str]
|
||||
body: IOBase | bytes
|
||||
request: EmscriptenRequest
|
||||
|
||||
|
||||
class EmscriptenHttpResponseWrapper(BaseHTTPResponse):
|
||||
def __init__(
|
||||
self,
|
||||
internal_response: EmscriptenResponse,
|
||||
url: str | None = None,
|
||||
connection: BaseHTTPConnection | BaseHTTPSConnection | None = None,
|
||||
):
|
||||
self._pool = None # set by pool class
|
||||
self._body = None
|
||||
self._response = internal_response
|
||||
self._url = url
|
||||
self._connection = connection
|
||||
self._closed = False
|
||||
super().__init__(
|
||||
headers=internal_response.headers,
|
||||
status=internal_response.status_code,
|
||||
request_url=url,
|
||||
version=0,
|
||||
version_string="HTTP/?",
|
||||
reason="",
|
||||
decode_content=True,
|
||||
)
|
||||
self.length_remaining = self._init_length(self._response.request.method)
|
||||
self.length_is_certain = False
|
||||
|
||||
@property
|
||||
def url(self) -> str | None:
|
||||
return self._url
|
||||
|
||||
@url.setter
|
||||
def url(self, url: str | None) -> None:
|
||||
self._url = url
|
||||
|
||||
@property
|
||||
def connection(self) -> BaseHTTPConnection | BaseHTTPSConnection | None:
|
||||
return self._connection
|
||||
|
||||
@property
|
||||
def retries(self) -> Retry | None:
|
||||
return self._retries
|
||||
|
||||
@retries.setter
|
||||
def retries(self, retries: Retry | None) -> None:
|
||||
# Override the request_url if retries has a redirect location.
|
||||
self._retries = retries
|
||||
|
||||
def stream(
|
||||
self, amt: int | None = 2**16, decode_content: bool | None = None
|
||||
) -> typing.Generator[bytes]:
|
||||
"""
|
||||
A generator wrapper for the read() method. A call will block until
|
||||
``amt`` bytes have been read from the connection or until the
|
||||
connection is closed.
|
||||
|
||||
:param amt:
|
||||
How much of the content to read. The generator will return up to
|
||||
much data per iteration, but may return less. This is particularly
|
||||
likely when using compressed data. However, the empty string will
|
||||
never be returned.
|
||||
|
||||
:param decode_content:
|
||||
If True, will attempt to decode the body based on the
|
||||
'content-encoding' header.
|
||||
"""
|
||||
while True:
|
||||
data = self.read(amt=amt, decode_content=decode_content)
|
||||
|
||||
if data:
|
||||
yield data
|
||||
else:
|
||||
break
|
||||
|
||||
def _init_length(self, request_method: str | None) -> int | None:
|
||||
length: int | None
|
||||
content_length: str | None = self.headers.get("content-length")
|
||||
|
||||
if content_length is not None:
|
||||
try:
|
||||
# RFC 7230 section 3.3.2 specifies multiple content lengths can
|
||||
# be sent in a single Content-Length header
|
||||
# (e.g. Content-Length: 42, 42). This line ensures the values
|
||||
# are all valid ints and that as long as the `set` length is 1,
|
||||
# all values are the same. Otherwise, the header is invalid.
|
||||
lengths = {int(val) for val in content_length.split(",")}
|
||||
if len(lengths) > 1:
|
||||
raise InvalidHeader(
|
||||
"Content-Length contained multiple "
|
||||
"unmatching values (%s)" % content_length
|
||||
)
|
||||
length = lengths.pop()
|
||||
except ValueError:
|
||||
length = None
|
||||
else:
|
||||
if length < 0:
|
||||
length = None
|
||||
|
||||
else: # if content_length is None
|
||||
length = None
|
||||
|
||||
# Check for responses that shouldn't include a body
|
||||
if (
|
||||
self.status in (204, 304)
|
||||
or 100 <= self.status < 200
|
||||
or request_method == "HEAD"
|
||||
):
|
||||
length = 0
|
||||
|
||||
return length
|
||||
|
||||
def read(
|
||||
self,
|
||||
amt: int | None = None,
|
||||
decode_content: bool | None = None, # ignored because browser decodes always
|
||||
cache_content: bool = False,
|
||||
) -> bytes:
|
||||
if (
|
||||
self._closed
|
||||
or self._response is None
|
||||
or (isinstance(self._response.body, IOBase) and self._response.body.closed)
|
||||
):
|
||||
return b""
|
||||
|
||||
with self._error_catcher():
|
||||
# body has been preloaded as a string by XmlHttpRequest
|
||||
if not isinstance(self._response.body, IOBase):
|
||||
self.length_remaining = len(self._response.body)
|
||||
self.length_is_certain = True
|
||||
# wrap body in IOStream
|
||||
self._response.body = BytesIO(self._response.body)
|
||||
if amt is not None and amt >= 0:
|
||||
# don't cache partial content
|
||||
cache_content = False
|
||||
data = self._response.body.read(amt)
|
||||
else: # read all we can (and cache it)
|
||||
data = self._response.body.read()
|
||||
if cache_content:
|
||||
self._body = data
|
||||
if self.length_remaining is not None:
|
||||
self.length_remaining = max(self.length_remaining - len(data), 0)
|
||||
if len(data) == 0 or (
|
||||
self.length_is_certain and self.length_remaining == 0
|
||||
):
|
||||
# definitely finished reading, close response stream
|
||||
self._response.body.close()
|
||||
return typing.cast(bytes, data)
|
||||
|
||||
def read_chunked(
|
||||
self,
|
||||
amt: int | None = None,
|
||||
decode_content: bool | None = None,
|
||||
) -> typing.Generator[bytes]:
|
||||
# chunked is handled by browser
|
||||
while True:
|
||||
bytes = self.read(amt, decode_content)
|
||||
if not bytes:
|
||||
break
|
||||
yield bytes
|
||||
|
||||
def release_conn(self) -> None:
|
||||
if not self._pool or not self._connection:
|
||||
return None
|
||||
|
||||
self._pool._put_conn(self._connection)
|
||||
self._connection = None
|
||||
|
||||
def drain_conn(self) -> None:
|
||||
self.close()
|
||||
|
||||
@property
|
||||
def data(self) -> bytes:
|
||||
if self._body:
|
||||
return self._body
|
||||
else:
|
||||
return self.read(cache_content=True)
|
||||
|
||||
def json(self) -> typing.Any:
|
||||
"""
|
||||
Deserializes the body of the HTTP response as a Python object.
|
||||
|
||||
The body of the HTTP response must be encoded using UTF-8, as per
|
||||
`RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
|
||||
|
||||
To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
|
||||
your custom decoder instead.
|
||||
|
||||
If the body of the HTTP response is not decodable to UTF-8, a
|
||||
`UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
|
||||
valid JSON document, a `json.JSONDecodeError` will be raised.
|
||||
|
||||
Read more :ref:`here <json_content>`.
|
||||
|
||||
:returns: The body of the HTTP response as a Python object.
|
||||
"""
|
||||
data = self.data.decode("utf-8")
|
||||
return _json.loads(data)
|
||||
|
||||
def close(self) -> None:
|
||||
if not self._closed:
|
||||
if isinstance(self._response.body, IOBase):
|
||||
self._response.body.close()
|
||||
if self._connection:
|
||||
self._connection.close()
|
||||
self._connection = None
|
||||
self._closed = True
|
||||
|
||||
@contextmanager
|
||||
def _error_catcher(self) -> typing.Generator[None]:
|
||||
"""
|
||||
Catch Emscripten specific exceptions thrown by fetch.py,
|
||||
instead re-raising urllib3 variants, so that low-level exceptions
|
||||
are not leaked in the high-level api.
|
||||
|
||||
On exit, release the connection back to the pool.
|
||||
"""
|
||||
from .fetch import _RequestError, _TimeoutError # avoid circular import
|
||||
|
||||
clean_exit = False
|
||||
|
||||
try:
|
||||
yield
|
||||
# If no exception is thrown, we should avoid cleaning up
|
||||
# unnecessarily.
|
||||
clean_exit = True
|
||||
except _TimeoutError as e:
|
||||
raise TimeoutError(str(e))
|
||||
except _RequestError as e:
|
||||
raise HTTPException(str(e))
|
||||
finally:
|
||||
# If we didn't terminate cleanly, we need to throw away our
|
||||
# connection.
|
||||
if not clean_exit:
|
||||
# The response may not be closed but we're not going to use it
|
||||
# anymore so close it now
|
||||
if (
|
||||
isinstance(self._response.body, IOBase)
|
||||
and not self._response.body.closed
|
||||
):
|
||||
self._response.body.close()
|
||||
# release the connection back to the pool
|
||||
self.release_conn()
|
||||
else:
|
||||
# If we have read everything from the response stream,
|
||||
# return the connection back to the pool.
|
||||
if (
|
||||
isinstance(self._response.body, IOBase)
|
||||
and self._response.body.closed
|
||||
):
|
||||
self.release_conn()
|
564
venv/lib/python3.11/site-packages/urllib3/contrib/pyopenssl.py
Normal file
564
venv/lib/python3.11/site-packages/urllib3/contrib/pyopenssl.py
Normal file
@ -0,0 +1,564 @@
|
||||
"""
|
||||
Module for using pyOpenSSL as a TLS backend. This module was relevant before
|
||||
the standard library ``ssl`` module supported SNI, but now that we've dropped
|
||||
support for Python 2.7 all relevant Python versions support SNI so
|
||||
**this module is no longer recommended**.
|
||||
|
||||
This needs the following packages installed:
|
||||
|
||||
* `pyOpenSSL`_ (tested with 16.0.0)
|
||||
* `cryptography`_ (minimum 1.3.4, from pyopenssl)
|
||||
* `idna`_ (minimum 2.0)
|
||||
|
||||
However, pyOpenSSL depends on cryptography, so while we use all three directly here we
|
||||
end up having relatively few packages required.
|
||||
|
||||
You can install them with the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ python -m pip install pyopenssl cryptography idna
|
||||
|
||||
To activate certificate checking, call
|
||||
:func:`~urllib3.contrib.pyopenssl.inject_into_urllib3` from your Python code
|
||||
before you begin making HTTP requests. This can be done in a ``sitecustomize``
|
||||
module, or at any other time before your application begins using ``urllib3``,
|
||||
like this:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
try:
|
||||
import urllib3.contrib.pyopenssl
|
||||
urllib3.contrib.pyopenssl.inject_into_urllib3()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
.. _pyopenssl: https://www.pyopenssl.org
|
||||
.. _cryptography: https://cryptography.io
|
||||
.. _idna: https://github.com/kjd/idna
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import OpenSSL.SSL # type: ignore[import-untyped]
|
||||
from cryptography import x509
|
||||
|
||||
try:
|
||||
from cryptography.x509 import UnsupportedExtension # type: ignore[attr-defined]
|
||||
except ImportError:
|
||||
# UnsupportedExtension is gone in cryptography >= 2.1.0
|
||||
class UnsupportedExtension(Exception): # type: ignore[no-redef]
|
||||
pass
|
||||
|
||||
|
||||
import logging
|
||||
import ssl
|
||||
import typing
|
||||
from io import BytesIO
|
||||
from socket import socket as socket_cls
|
||||
from socket import timeout
|
||||
|
||||
from .. import util
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from OpenSSL.crypto import X509 # type: ignore[import-untyped]
|
||||
|
||||
|
||||
__all__ = ["inject_into_urllib3", "extract_from_urllib3"]
|
||||
|
||||
# Map from urllib3 to PyOpenSSL compatible parameter-values.
|
||||
_openssl_versions: dict[int, int] = {
|
||||
util.ssl_.PROTOCOL_TLS: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
|
||||
util.ssl_.PROTOCOL_TLS_CLIENT: OpenSSL.SSL.SSLv23_METHOD, # type: ignore[attr-defined]
|
||||
ssl.PROTOCOL_TLSv1: OpenSSL.SSL.TLSv1_METHOD,
|
||||
}
|
||||
|
||||
if hasattr(ssl, "PROTOCOL_TLSv1_1") and hasattr(OpenSSL.SSL, "TLSv1_1_METHOD"):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_1] = OpenSSL.SSL.TLSv1_1_METHOD
|
||||
|
||||
if hasattr(ssl, "PROTOCOL_TLSv1_2") and hasattr(OpenSSL.SSL, "TLSv1_2_METHOD"):
|
||||
_openssl_versions[ssl.PROTOCOL_TLSv1_2] = OpenSSL.SSL.TLSv1_2_METHOD
|
||||
|
||||
|
||||
_stdlib_to_openssl_verify = {
|
||||
ssl.CERT_NONE: OpenSSL.SSL.VERIFY_NONE,
|
||||
ssl.CERT_OPTIONAL: OpenSSL.SSL.VERIFY_PEER,
|
||||
ssl.CERT_REQUIRED: OpenSSL.SSL.VERIFY_PEER
|
||||
+ OpenSSL.SSL.VERIFY_FAIL_IF_NO_PEER_CERT,
|
||||
}
|
||||
_openssl_to_stdlib_verify = {v: k for k, v in _stdlib_to_openssl_verify.items()}
|
||||
|
||||
# The SSLvX values are the most likely to be missing in the future
|
||||
# but we check them all just to be sure.
|
||||
_OP_NO_SSLv2_OR_SSLv3: int = getattr(OpenSSL.SSL, "OP_NO_SSLv2", 0) | getattr(
|
||||
OpenSSL.SSL, "OP_NO_SSLv3", 0
|
||||
)
|
||||
_OP_NO_TLSv1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1", 0)
|
||||
_OP_NO_TLSv1_1: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_1", 0)
|
||||
_OP_NO_TLSv1_2: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_2", 0)
|
||||
_OP_NO_TLSv1_3: int = getattr(OpenSSL.SSL, "OP_NO_TLSv1_3", 0)
|
||||
|
||||
_openssl_to_ssl_minimum_version: dict[int, int] = {
|
||||
ssl.TLSVersion.MINIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
|
||||
ssl.TLSVersion.TLSv1: _OP_NO_SSLv2_OR_SSLv3,
|
||||
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1,
|
||||
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1,
|
||||
ssl.TLSVersion.TLSv1_3: (
|
||||
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
|
||||
),
|
||||
ssl.TLSVersion.MAXIMUM_SUPPORTED: (
|
||||
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2
|
||||
),
|
||||
}
|
||||
_openssl_to_ssl_maximum_version: dict[int, int] = {
|
||||
ssl.TLSVersion.MINIMUM_SUPPORTED: (
|
||||
_OP_NO_SSLv2_OR_SSLv3
|
||||
| _OP_NO_TLSv1
|
||||
| _OP_NO_TLSv1_1
|
||||
| _OP_NO_TLSv1_2
|
||||
| _OP_NO_TLSv1_3
|
||||
),
|
||||
ssl.TLSVersion.TLSv1: (
|
||||
_OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_1 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3
|
||||
),
|
||||
ssl.TLSVersion.TLSv1_1: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_2 | _OP_NO_TLSv1_3,
|
||||
ssl.TLSVersion.TLSv1_2: _OP_NO_SSLv2_OR_SSLv3 | _OP_NO_TLSv1_3,
|
||||
ssl.TLSVersion.TLSv1_3: _OP_NO_SSLv2_OR_SSLv3,
|
||||
ssl.TLSVersion.MAXIMUM_SUPPORTED: _OP_NO_SSLv2_OR_SSLv3,
|
||||
}
|
||||
|
||||
# OpenSSL will only write 16K at a time
|
||||
SSL_WRITE_BLOCKSIZE = 16384
|
||||
|
||||
orig_util_SSLContext = util.ssl_.SSLContext
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def inject_into_urllib3() -> None:
|
||||
"Monkey-patch urllib3 with PyOpenSSL-backed SSL-support."
|
||||
|
||||
_validate_dependencies_met()
|
||||
|
||||
util.SSLContext = PyOpenSSLContext # type: ignore[assignment]
|
||||
util.ssl_.SSLContext = PyOpenSSLContext # type: ignore[assignment]
|
||||
util.IS_PYOPENSSL = True
|
||||
util.ssl_.IS_PYOPENSSL = True
|
||||
|
||||
|
||||
def extract_from_urllib3() -> None:
|
||||
"Undo monkey-patching by :func:`inject_into_urllib3`."
|
||||
|
||||
util.SSLContext = orig_util_SSLContext
|
||||
util.ssl_.SSLContext = orig_util_SSLContext
|
||||
util.IS_PYOPENSSL = False
|
||||
util.ssl_.IS_PYOPENSSL = False
|
||||
|
||||
|
||||
def _validate_dependencies_met() -> None:
|
||||
"""
|
||||
Verifies that PyOpenSSL's package-level dependencies have been met.
|
||||
Throws `ImportError` if they are not met.
|
||||
"""
|
||||
# Method added in `cryptography==1.1`; not available in older versions
|
||||
from cryptography.x509.extensions import Extensions
|
||||
|
||||
if getattr(Extensions, "get_extension_for_class", None) is None:
|
||||
raise ImportError(
|
||||
"'cryptography' module missing required functionality. "
|
||||
"Try upgrading to v1.3.4 or newer."
|
||||
)
|
||||
|
||||
# pyOpenSSL 0.14 and above use cryptography for OpenSSL bindings. The _x509
|
||||
# attribute is only present on those versions.
|
||||
from OpenSSL.crypto import X509
|
||||
|
||||
x509 = X509()
|
||||
if getattr(x509, "_x509", None) is None:
|
||||
raise ImportError(
|
||||
"'pyOpenSSL' module missing required functionality. "
|
||||
"Try upgrading to v0.14 or newer."
|
||||
)
|
||||
|
||||
|
||||
def _dnsname_to_stdlib(name: str) -> str | None:
|
||||
"""
|
||||
Converts a dNSName SubjectAlternativeName field to the form used by the
|
||||
standard library on the given Python version.
|
||||
|
||||
Cryptography produces a dNSName as a unicode string that was idna-decoded
|
||||
from ASCII bytes. We need to idna-encode that string to get it back, and
|
||||
then on Python 3 we also need to convert to unicode via UTF-8 (the stdlib
|
||||
uses PyUnicode_FromStringAndSize on it, which decodes via UTF-8).
|
||||
|
||||
If the name cannot be idna-encoded then we return None signalling that
|
||||
the name given should be skipped.
|
||||
"""
|
||||
|
||||
def idna_encode(name: str) -> bytes | None:
|
||||
"""
|
||||
Borrowed wholesale from the Python Cryptography Project. It turns out
|
||||
that we can't just safely call `idna.encode`: it can explode for
|
||||
wildcard names. This avoids that problem.
|
||||
"""
|
||||
import idna
|
||||
|
||||
try:
|
||||
for prefix in ["*.", "."]:
|
||||
if name.startswith(prefix):
|
||||
name = name[len(prefix) :]
|
||||
return prefix.encode("ascii") + idna.encode(name)
|
||||
return idna.encode(name)
|
||||
except idna.core.IDNAError:
|
||||
return None
|
||||
|
||||
# Don't send IPv6 addresses through the IDNA encoder.
|
||||
if ":" in name:
|
||||
return name
|
||||
|
||||
encoded_name = idna_encode(name)
|
||||
if encoded_name is None:
|
||||
return None
|
||||
return encoded_name.decode("utf-8")
|
||||
|
||||
|
||||
def get_subj_alt_name(peer_cert: X509) -> list[tuple[str, str]]:
|
||||
"""
|
||||
Given an PyOpenSSL certificate, provides all the subject alternative names.
|
||||
"""
|
||||
cert = peer_cert.to_cryptography()
|
||||
|
||||
# We want to find the SAN extension. Ask Cryptography to locate it (it's
|
||||
# faster than looping in Python)
|
||||
try:
|
||||
ext = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName).value
|
||||
except x509.ExtensionNotFound:
|
||||
# No such extension, return the empty list.
|
||||
return []
|
||||
except (
|
||||
x509.DuplicateExtension,
|
||||
UnsupportedExtension,
|
||||
x509.UnsupportedGeneralNameType,
|
||||
UnicodeError,
|
||||
) as e:
|
||||
# A problem has been found with the quality of the certificate. Assume
|
||||
# no SAN field is present.
|
||||
log.warning(
|
||||
"A problem was encountered with the certificate that prevented "
|
||||
"urllib3 from finding the SubjectAlternativeName field. This can "
|
||||
"affect certificate validation. The error was %s",
|
||||
e,
|
||||
)
|
||||
return []
|
||||
|
||||
# We want to return dNSName and iPAddress fields. We need to cast the IPs
|
||||
# back to strings because the match_hostname function wants them as
|
||||
# strings.
|
||||
# Sadly the DNS names need to be idna encoded and then, on Python 3, UTF-8
|
||||
# decoded. This is pretty frustrating, but that's what the standard library
|
||||
# does with certificates, and so we need to attempt to do the same.
|
||||
# We also want to skip over names which cannot be idna encoded.
|
||||
names = [
|
||||
("DNS", name)
|
||||
for name in map(_dnsname_to_stdlib, ext.get_values_for_type(x509.DNSName))
|
||||
if name is not None
|
||||
]
|
||||
names.extend(
|
||||
("IP Address", str(name)) for name in ext.get_values_for_type(x509.IPAddress)
|
||||
)
|
||||
|
||||
return names
|
||||
|
||||
|
||||
class WrappedSocket:
|
||||
"""API-compatibility wrapper for Python OpenSSL's Connection-class."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
connection: OpenSSL.SSL.Connection,
|
||||
socket: socket_cls,
|
||||
suppress_ragged_eofs: bool = True,
|
||||
) -> None:
|
||||
self.connection = connection
|
||||
self.socket = socket
|
||||
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||
self._io_refs = 0
|
||||
self._closed = False
|
||||
|
||||
def fileno(self) -> int:
|
||||
return self.socket.fileno()
|
||||
|
||||
# Copy-pasted from Python 3.5 source code
|
||||
def _decref_socketios(self) -> None:
|
||||
if self._io_refs > 0:
|
||||
self._io_refs -= 1
|
||||
if self._closed:
|
||||
self.close()
|
||||
|
||||
def recv(self, *args: typing.Any, **kwargs: typing.Any) -> bytes:
|
||||
try:
|
||||
data = self.connection.recv(*args, **kwargs)
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
|
||||
return b""
|
||||
else:
|
||||
raise OSError(e.args[0], str(e)) from e
|
||||
except OpenSSL.SSL.ZeroReturnError:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return b""
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError as e:
|
||||
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
|
||||
raise timeout("The read operation timed out") from e
|
||||
else:
|
||||
return self.recv(*args, **kwargs)
|
||||
|
||||
# TLS 1.3 post-handshake authentication
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"read error: {e!r}") from e
|
||||
else:
|
||||
return data # type: ignore[no-any-return]
|
||||
|
||||
def recv_into(self, *args: typing.Any, **kwargs: typing.Any) -> int:
|
||||
try:
|
||||
return self.connection.recv_into(*args, **kwargs) # type: ignore[no-any-return]
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
if self.suppress_ragged_eofs and e.args == (-1, "Unexpected EOF"):
|
||||
return 0
|
||||
else:
|
||||
raise OSError(e.args[0], str(e)) from e
|
||||
except OpenSSL.SSL.ZeroReturnError:
|
||||
if self.connection.get_shutdown() == OpenSSL.SSL.RECEIVED_SHUTDOWN:
|
||||
return 0
|
||||
else:
|
||||
raise
|
||||
except OpenSSL.SSL.WantReadError as e:
|
||||
if not util.wait_for_read(self.socket, self.socket.gettimeout()):
|
||||
raise timeout("The read operation timed out") from e
|
||||
else:
|
||||
return self.recv_into(*args, **kwargs)
|
||||
|
||||
# TLS 1.3 post-handshake authentication
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"read error: {e!r}") from e
|
||||
|
||||
def settimeout(self, timeout: float) -> None:
|
||||
return self.socket.settimeout(timeout)
|
||||
|
||||
def _send_until_done(self, data: bytes) -> int:
|
||||
while True:
|
||||
try:
|
||||
return self.connection.send(data) # type: ignore[no-any-return]
|
||||
except OpenSSL.SSL.WantWriteError as e:
|
||||
if not util.wait_for_write(self.socket, self.socket.gettimeout()):
|
||||
raise timeout() from e
|
||||
continue
|
||||
except OpenSSL.SSL.SysCallError as e:
|
||||
raise OSError(e.args[0], str(e)) from e
|
||||
|
||||
def sendall(self, data: bytes) -> None:
|
||||
total_sent = 0
|
||||
while total_sent < len(data):
|
||||
sent = self._send_until_done(
|
||||
data[total_sent : total_sent + SSL_WRITE_BLOCKSIZE]
|
||||
)
|
||||
total_sent += sent
|
||||
|
||||
def shutdown(self, how: int) -> None:
|
||||
try:
|
||||
self.connection.shutdown()
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"shutdown error: {e!r}") from e
|
||||
|
||||
def close(self) -> None:
|
||||
self._closed = True
|
||||
if self._io_refs <= 0:
|
||||
self._real_close()
|
||||
|
||||
def _real_close(self) -> None:
|
||||
try:
|
||||
return self.connection.close() # type: ignore[no-any-return]
|
||||
except OpenSSL.SSL.Error:
|
||||
return
|
||||
|
||||
def getpeercert(
|
||||
self, binary_form: bool = False
|
||||
) -> dict[str, list[typing.Any]] | None:
|
||||
x509 = self.connection.get_peer_certificate()
|
||||
|
||||
if not x509:
|
||||
return x509 # type: ignore[no-any-return]
|
||||
|
||||
if binary_form:
|
||||
return OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_ASN1, x509) # type: ignore[no-any-return]
|
||||
|
||||
return {
|
||||
"subject": ((("commonName", x509.get_subject().CN),),), # type: ignore[dict-item]
|
||||
"subjectAltName": get_subj_alt_name(x509),
|
||||
}
|
||||
|
||||
def version(self) -> str:
|
||||
return self.connection.get_protocol_version_name() # type: ignore[no-any-return]
|
||||
|
||||
def selected_alpn_protocol(self) -> str | None:
|
||||
alpn_proto = self.connection.get_alpn_proto_negotiated()
|
||||
return alpn_proto.decode() if alpn_proto else None
|
||||
|
||||
|
||||
WrappedSocket.makefile = socket_cls.makefile # type: ignore[attr-defined]
|
||||
|
||||
|
||||
class PyOpenSSLContext:
|
||||
"""
|
||||
I am a wrapper class for the PyOpenSSL ``Context`` object. I am responsible
|
||||
for translating the interface of the standard library ``SSLContext`` object
|
||||
to calls into PyOpenSSL.
|
||||
"""
|
||||
|
||||
def __init__(self, protocol: int) -> None:
|
||||
self.protocol = _openssl_versions[protocol]
|
||||
self._ctx = OpenSSL.SSL.Context(self.protocol)
|
||||
self._options = 0
|
||||
self.check_hostname = False
|
||||
self._minimum_version: int = ssl.TLSVersion.MINIMUM_SUPPORTED
|
||||
self._maximum_version: int = ssl.TLSVersion.MAXIMUM_SUPPORTED
|
||||
self._verify_flags: int = ssl.VERIFY_X509_TRUSTED_FIRST
|
||||
|
||||
@property
|
||||
def options(self) -> int:
|
||||
return self._options
|
||||
|
||||
@options.setter
|
||||
def options(self, value: int) -> None:
|
||||
self._options = value
|
||||
self._set_ctx_options()
|
||||
|
||||
@property
|
||||
def verify_flags(self) -> int:
|
||||
return self._verify_flags
|
||||
|
||||
@verify_flags.setter
|
||||
def verify_flags(self, value: int) -> None:
|
||||
self._verify_flags = value
|
||||
self._ctx.get_cert_store().set_flags(self._verify_flags)
|
||||
|
||||
@property
|
||||
def verify_mode(self) -> int:
|
||||
return _openssl_to_stdlib_verify[self._ctx.get_verify_mode()]
|
||||
|
||||
@verify_mode.setter
|
||||
def verify_mode(self, value: ssl.VerifyMode) -> None:
|
||||
self._ctx.set_verify(_stdlib_to_openssl_verify[value], _verify_callback)
|
||||
|
||||
def set_default_verify_paths(self) -> None:
|
||||
self._ctx.set_default_verify_paths()
|
||||
|
||||
def set_ciphers(self, ciphers: bytes | str) -> None:
|
||||
if isinstance(ciphers, str):
|
||||
ciphers = ciphers.encode("utf-8")
|
||||
self._ctx.set_cipher_list(ciphers)
|
||||
|
||||
def load_verify_locations(
|
||||
self,
|
||||
cafile: str | None = None,
|
||||
capath: str | None = None,
|
||||
cadata: bytes | None = None,
|
||||
) -> None:
|
||||
if cafile is not None:
|
||||
cafile = cafile.encode("utf-8") # type: ignore[assignment]
|
||||
if capath is not None:
|
||||
capath = capath.encode("utf-8") # type: ignore[assignment]
|
||||
try:
|
||||
self._ctx.load_verify_locations(cafile, capath)
|
||||
if cadata is not None:
|
||||
self._ctx.load_verify_locations(BytesIO(cadata))
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"unable to load trusted certificates: {e!r}") from e
|
||||
|
||||
def load_cert_chain(
|
||||
self,
|
||||
certfile: str,
|
||||
keyfile: str | None = None,
|
||||
password: str | None = None,
|
||||
) -> None:
|
||||
try:
|
||||
self._ctx.use_certificate_chain_file(certfile)
|
||||
if password is not None:
|
||||
if not isinstance(password, bytes):
|
||||
password = password.encode("utf-8") # type: ignore[assignment]
|
||||
self._ctx.set_passwd_cb(lambda *_: password)
|
||||
self._ctx.use_privatekey_file(keyfile or certfile)
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"Unable to load certificate chain: {e!r}") from e
|
||||
|
||||
def set_alpn_protocols(self, protocols: list[bytes | str]) -> None:
|
||||
protocols = [util.util.to_bytes(p, "ascii") for p in protocols]
|
||||
return self._ctx.set_alpn_protos(protocols) # type: ignore[no-any-return]
|
||||
|
||||
def wrap_socket(
|
||||
self,
|
||||
sock: socket_cls,
|
||||
server_side: bool = False,
|
||||
do_handshake_on_connect: bool = True,
|
||||
suppress_ragged_eofs: bool = True,
|
||||
server_hostname: bytes | str | None = None,
|
||||
) -> WrappedSocket:
|
||||
cnx = OpenSSL.SSL.Connection(self._ctx, sock)
|
||||
|
||||
# If server_hostname is an IP, don't use it for SNI, per RFC6066 Section 3
|
||||
if server_hostname and not util.ssl_.is_ipaddress(server_hostname):
|
||||
if isinstance(server_hostname, str):
|
||||
server_hostname = server_hostname.encode("utf-8")
|
||||
cnx.set_tlsext_host_name(server_hostname)
|
||||
|
||||
cnx.set_connect_state()
|
||||
|
||||
while True:
|
||||
try:
|
||||
cnx.do_handshake()
|
||||
except OpenSSL.SSL.WantReadError as e:
|
||||
if not util.wait_for_read(sock, sock.gettimeout()):
|
||||
raise timeout("select timed out") from e
|
||||
continue
|
||||
except OpenSSL.SSL.Error as e:
|
||||
raise ssl.SSLError(f"bad handshake: {e!r}") from e
|
||||
break
|
||||
|
||||
return WrappedSocket(cnx, sock)
|
||||
|
||||
def _set_ctx_options(self) -> None:
|
||||
self._ctx.set_options(
|
||||
self._options
|
||||
| _openssl_to_ssl_minimum_version[self._minimum_version]
|
||||
| _openssl_to_ssl_maximum_version[self._maximum_version]
|
||||
)
|
||||
|
||||
@property
|
||||
def minimum_version(self) -> int:
|
||||
return self._minimum_version
|
||||
|
||||
@minimum_version.setter
|
||||
def minimum_version(self, minimum_version: int) -> None:
|
||||
self._minimum_version = minimum_version
|
||||
self._set_ctx_options()
|
||||
|
||||
@property
|
||||
def maximum_version(self) -> int:
|
||||
return self._maximum_version
|
||||
|
||||
@maximum_version.setter
|
||||
def maximum_version(self, maximum_version: int) -> None:
|
||||
self._maximum_version = maximum_version
|
||||
self._set_ctx_options()
|
||||
|
||||
|
||||
def _verify_callback(
|
||||
cnx: OpenSSL.SSL.Connection,
|
||||
x509: X509,
|
||||
err_no: int,
|
||||
err_depth: int,
|
||||
return_code: int,
|
||||
) -> bool:
|
||||
return err_no == 0
|
228
venv/lib/python3.11/site-packages/urllib3/contrib/socks.py
Normal file
228
venv/lib/python3.11/site-packages/urllib3/contrib/socks.py
Normal file
@ -0,0 +1,228 @@
|
||||
"""
|
||||
This module contains provisional support for SOCKS proxies from within
|
||||
urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
|
||||
SOCKS5. To enable its functionality, either install PySocks or install this
|
||||
module with the ``socks`` extra.
|
||||
|
||||
The SOCKS implementation supports the full range of urllib3 features. It also
|
||||
supports the following SOCKS features:
|
||||
|
||||
- SOCKS4A (``proxy_url='socks4a://...``)
|
||||
- SOCKS4 (``proxy_url='socks4://...``)
|
||||
- SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
|
||||
- SOCKS5 with local DNS (``proxy_url='socks5://...``)
|
||||
- Usernames and passwords for the SOCKS proxy
|
||||
|
||||
.. note::
|
||||
It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
|
||||
your ``proxy_url`` to ensure that DNS resolution is done from the remote
|
||||
server instead of client-side when connecting to a domain name.
|
||||
|
||||
SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
|
||||
supports IPv4, IPv6, and domain names.
|
||||
|
||||
When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
|
||||
will be sent as the ``userid`` section of the SOCKS request:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
proxy_url="socks4a://<userid>@proxy-host"
|
||||
|
||||
When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
|
||||
of the ``proxy_url`` will be sent as the username/password to authenticate
|
||||
with the proxy:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
proxy_url="socks5h://<username>:<password>@proxy-host"
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
try:
|
||||
import socks # type: ignore[import-not-found]
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
from ..exceptions import DependencyWarning
|
||||
|
||||
warnings.warn(
|
||||
(
|
||||
"SOCKS support in urllib3 requires the installation of optional "
|
||||
"dependencies: specifically, PySocks. For more information, see "
|
||||
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html#socks-proxies"
|
||||
),
|
||||
DependencyWarning,
|
||||
)
|
||||
raise
|
||||
|
||||
import typing
|
||||
from socket import timeout as SocketTimeout
|
||||
|
||||
from ..connection import HTTPConnection, HTTPSConnection
|
||||
from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
|
||||
from ..exceptions import ConnectTimeoutError, NewConnectionError
|
||||
from ..poolmanager import PoolManager
|
||||
from ..util.url import parse_url
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
ssl = None # type: ignore[assignment]
|
||||
|
||||
|
||||
class _TYPE_SOCKS_OPTIONS(typing.TypedDict):
|
||||
socks_version: int
|
||||
proxy_host: str | None
|
||||
proxy_port: str | None
|
||||
username: str | None
|
||||
password: str | None
|
||||
rdns: bool
|
||||
|
||||
|
||||
class SOCKSConnection(HTTPConnection):
|
||||
"""
|
||||
A plain-text HTTP connection that connects via a SOCKS proxy.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
_socks_options: _TYPE_SOCKS_OPTIONS,
|
||||
*args: typing.Any,
|
||||
**kwargs: typing.Any,
|
||||
) -> None:
|
||||
self._socks_options = _socks_options
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def _new_conn(self) -> socks.socksocket:
|
||||
"""
|
||||
Establish a new connection via the SOCKS proxy.
|
||||
"""
|
||||
extra_kw: dict[str, typing.Any] = {}
|
||||
if self.source_address:
|
||||
extra_kw["source_address"] = self.source_address
|
||||
|
||||
if self.socket_options:
|
||||
extra_kw["socket_options"] = self.socket_options
|
||||
|
||||
try:
|
||||
conn = socks.create_connection(
|
||||
(self.host, self.port),
|
||||
proxy_type=self._socks_options["socks_version"],
|
||||
proxy_addr=self._socks_options["proxy_host"],
|
||||
proxy_port=self._socks_options["proxy_port"],
|
||||
proxy_username=self._socks_options["username"],
|
||||
proxy_password=self._socks_options["password"],
|
||||
proxy_rdns=self._socks_options["rdns"],
|
||||
timeout=self.timeout,
|
||||
**extra_kw,
|
||||
)
|
||||
|
||||
except SocketTimeout as e:
|
||||
raise ConnectTimeoutError(
|
||||
self,
|
||||
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
|
||||
) from e
|
||||
|
||||
except socks.ProxyError as e:
|
||||
# This is fragile as hell, but it seems to be the only way to raise
|
||||
# useful errors here.
|
||||
if e.socket_err:
|
||||
error = e.socket_err
|
||||
if isinstance(error, SocketTimeout):
|
||||
raise ConnectTimeoutError(
|
||||
self,
|
||||
f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
|
||||
) from e
|
||||
else:
|
||||
# Adding `from e` messes with coverage somehow, so it's omitted.
|
||||
# See #2386.
|
||||
raise NewConnectionError(
|
||||
self, f"Failed to establish a new connection: {error}"
|
||||
)
|
||||
else:
|
||||
raise NewConnectionError(
|
||||
self, f"Failed to establish a new connection: {e}"
|
||||
) from e
|
||||
|
||||
except OSError as e: # Defensive: PySocks should catch all these.
|
||||
raise NewConnectionError(
|
||||
self, f"Failed to establish a new connection: {e}"
|
||||
) from e
|
||||
|
||||
return conn
|
||||
|
||||
|
||||
# We don't need to duplicate the Verified/Unverified distinction from
|
||||
# urllib3/connection.py here because the HTTPSConnection will already have been
|
||||
# correctly set to either the Verified or Unverified form by that module. This
|
||||
# means the SOCKSHTTPSConnection will automatically be the correct type.
|
||||
class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
|
||||
pass
|
||||
|
||||
|
||||
class SOCKSHTTPConnectionPool(HTTPConnectionPool):
|
||||
ConnectionCls = SOCKSConnection
|
||||
|
||||
|
||||
class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
|
||||
ConnectionCls = SOCKSHTTPSConnection
|
||||
|
||||
|
||||
class SOCKSProxyManager(PoolManager):
|
||||
"""
|
||||
A version of the urllib3 ProxyManager that routes connections via the
|
||||
defined SOCKS proxy.
|
||||
"""
|
||||
|
||||
pool_classes_by_scheme = {
|
||||
"http": SOCKSHTTPConnectionPool,
|
||||
"https": SOCKSHTTPSConnectionPool,
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
proxy_url: str,
|
||||
username: str | None = None,
|
||||
password: str | None = None,
|
||||
num_pools: int = 10,
|
||||
headers: typing.Mapping[str, str] | None = None,
|
||||
**connection_pool_kw: typing.Any,
|
||||
):
|
||||
parsed = parse_url(proxy_url)
|
||||
|
||||
if username is None and password is None and parsed.auth is not None:
|
||||
split = parsed.auth.split(":")
|
||||
if len(split) == 2:
|
||||
username, password = split
|
||||
if parsed.scheme == "socks5":
|
||||
socks_version = socks.PROXY_TYPE_SOCKS5
|
||||
rdns = False
|
||||
elif parsed.scheme == "socks5h":
|
||||
socks_version = socks.PROXY_TYPE_SOCKS5
|
||||
rdns = True
|
||||
elif parsed.scheme == "socks4":
|
||||
socks_version = socks.PROXY_TYPE_SOCKS4
|
||||
rdns = False
|
||||
elif parsed.scheme == "socks4a":
|
||||
socks_version = socks.PROXY_TYPE_SOCKS4
|
||||
rdns = True
|
||||
else:
|
||||
raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
|
||||
|
||||
self.proxy_url = proxy_url
|
||||
|
||||
socks_options = {
|
||||
"socks_version": socks_version,
|
||||
"proxy_host": parsed.host,
|
||||
"proxy_port": parsed.port,
|
||||
"username": username,
|
||||
"password": password,
|
||||
"rdns": rdns,
|
||||
}
|
||||
connection_pool_kw["_socks_options"] = socks_options
|
||||
|
||||
super().__init__(num_pools, headers, **connection_pool_kw)
|
||||
|
||||
self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme
|
Reference in New Issue
Block a user