Update 2025-04-13_16:25:39

This commit is contained in:
root
2025-04-13 16:25:41 +02:00
commit 4c711360d3
2979 changed files with 666585 additions and 0 deletions

View File

@ -0,0 +1,62 @@
# A highish-level implementation of the HTTP/1.1 wire protocol (RFC 7230),
# containing no networking code at all, loosely modelled on hyper-h2's generic
# implementation of HTTP/2 (and in particular the h2.connection.H2Connection
# class). There's still a bunch of subtle details you need to get right if you
# want to make this actually useful, because it doesn't implement all the
# semantics to check that what you're asking to write to the wire is sensible,
# but at least it gets you out of dealing with the wire itself.
from h11._connection import Connection, NEED_DATA, PAUSED
from h11._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from h11._state import (
CLIENT,
CLOSED,
DONE,
ERROR,
IDLE,
MIGHT_SWITCH_PROTOCOL,
MUST_CLOSE,
SEND_BODY,
SEND_RESPONSE,
SERVER,
SWITCHED_PROTOCOL,
)
from h11._util import LocalProtocolError, ProtocolError, RemoteProtocolError
from h11._version import __version__
PRODUCT_ID = "python-h11/" + __version__
__all__ = (
"Connection",
"NEED_DATA",
"PAUSED",
"ConnectionClosed",
"Data",
"EndOfMessage",
"Event",
"InformationalResponse",
"Request",
"Response",
"CLIENT",
"CLOSED",
"DONE",
"ERROR",
"IDLE",
"MUST_CLOSE",
"SEND_BODY",
"SEND_RESPONSE",
"SERVER",
"SWITCHED_PROTOCOL",
"ProtocolError",
"LocalProtocolError",
"RemoteProtocolError",
)

View File

@ -0,0 +1,132 @@
# We use native strings for all the re patterns, to take advantage of string
# formatting, and then convert to bytestrings when compiling the final re
# objects.
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#whitespace
# OWS = *( SP / HTAB )
# ; optional whitespace
OWS = r"[ \t]*"
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.token.separators
# token = 1*tchar
#
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# / DIGIT / ALPHA
# ; any VCHAR, except delimiters
token = r"[-!#$%&'*+.^_`|~0-9a-zA-Z]+"
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#header.fields
# field-name = token
field_name = token
# The standard says:
#
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text
# obs-fold = CRLF 1*( SP / HTAB )
# ; obsolete line folding
# ; see Section 3.2.4
#
# https://tools.ietf.org/html/rfc5234#appendix-B.1
#
# VCHAR = %x21-7E
# ; visible (printing) characters
#
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#rule.quoted-string
# obs-text = %x80-FF
#
# However, the standard definition of field-content is WRONG! It disallows
# fields containing a single visible character surrounded by whitespace,
# e.g. "foo a bar".
#
# See: https://www.rfc-editor.org/errata_search.php?rfc=7230&eid=4189
#
# So our definition of field_content attempts to fix it up...
#
# Also, we allow lots of control characters, because apparently people assume
# that they're legal in practice (e.g., google analytics makes cookies with
# \x01 in them!):
# https://github.com/python-hyper/h11/issues/57
# We still don't allow NUL or whitespace, because those are often treated as
# meta-characters and letting them through can lead to nasty issues like SSRF.
vchar = r"[\x21-\x7e]"
vchar_or_obs_text = r"[^\x00\s]"
field_vchar = vchar_or_obs_text
field_content = r"{field_vchar}+(?:[ \t]+{field_vchar}+)*".format(**globals())
# We handle obs-fold at a different level, and our fixed-up field_content
# already grows to swallow the whole value, so ? instead of *
field_value = r"({field_content})?".format(**globals())
# header-field = field-name ":" OWS field-value OWS
header_field = (
r"(?P<field_name>{field_name})"
r":"
r"{OWS}"
r"(?P<field_value>{field_value})"
r"{OWS}".format(**globals())
)
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#request.line
#
# request-line = method SP request-target SP HTTP-version CRLF
# method = token
# HTTP-version = HTTP-name "/" DIGIT "." DIGIT
# HTTP-name = %x48.54.54.50 ; "HTTP", case-sensitive
#
# request-target is complicated (see RFC 7230 sec 5.3) -- could be path, full
# URL, host+port (for connect), or even "*", but in any case we are guaranteed
# that it contists of the visible printing characters.
method = token
request_target = r"{vchar}+".format(**globals())
http_version = r"HTTP/(?P<http_version>[0-9]\.[0-9])"
request_line = (
r"(?P<method>{method})"
r" "
r"(?P<target>{request_target})"
r" "
r"{http_version}".format(**globals())
)
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#status.line
#
# status-line = HTTP-version SP status-code SP reason-phrase CRLF
# status-code = 3DIGIT
# reason-phrase = *( HTAB / SP / VCHAR / obs-text )
status_code = r"[0-9]{3}"
reason_phrase = r"([ \t]|{vchar_or_obs_text})*".format(**globals())
status_line = (
r"{http_version}"
r" "
r"(?P<status_code>{status_code})"
# However, there are apparently a few too many servers out there that just
# leave out the reason phrase:
# https://github.com/scrapy/scrapy/issues/345#issuecomment-281756036
# https://github.com/seanmonstar/httparse/issues/29
# so make it optional. ?: is a non-capturing group.
r"(?: (?P<reason>{reason_phrase}))?".format(**globals())
)
HEXDIG = r"[0-9A-Fa-f]"
# Actually
#
# chunk-size = 1*HEXDIG
#
# but we impose an upper-limit to avoid ridiculosity. len(str(2**64)) == 20
chunk_size = r"({HEXDIG}){{1,20}}".format(**globals())
# Actually
#
# chunk-ext = *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
#
# but we aren't parsing the things so we don't really care.
chunk_ext = r";.*"
chunk_header = (
r"(?P<chunk_size>{chunk_size})"
r"(?P<chunk_ext>{chunk_ext})?"
r"{OWS}\r\n".format(
**globals()
) # Even though the specification does not allow for extra whitespaces,
# we are lenient with trailing whitespaces because some servers on the wild use it.
)

View File

@ -0,0 +1,633 @@
# This contains the main Connection class. Everything in h11 revolves around
# this.
from typing import Any, Callable, cast, Dict, List, Optional, Tuple, Type, Union
from ._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from ._headers import get_comma_header, has_expect_100_continue, set_comma_header
from ._readers import READERS, ReadersType
from ._receivebuffer import ReceiveBuffer
from ._state import (
_SWITCH_CONNECT,
_SWITCH_UPGRADE,
CLIENT,
ConnectionState,
DONE,
ERROR,
MIGHT_SWITCH_PROTOCOL,
SEND_BODY,
SERVER,
SWITCHED_PROTOCOL,
)
from ._util import ( # Import the internal things we need
LocalProtocolError,
RemoteProtocolError,
Sentinel,
)
from ._writers import WRITERS, WritersType
# Everything in __all__ gets re-exported as part of the h11 public API.
__all__ = ["Connection", "NEED_DATA", "PAUSED"]
class NEED_DATA(Sentinel, metaclass=Sentinel):
pass
class PAUSED(Sentinel, metaclass=Sentinel):
pass
# If we ever have this much buffered without it making a complete parseable
# event, we error out. The only time we really buffer is when reading the
# request/response line + headers together, so this is effectively the limit on
# the size of that.
#
# Some precedents for defaults:
# - node.js: 80 * 1024
# - tomcat: 8 * 1024
# - IIS: 16 * 1024
# - Apache: <8 KiB per line>
DEFAULT_MAX_INCOMPLETE_EVENT_SIZE = 16 * 1024
# RFC 7230's rules for connection lifecycles:
# - If either side says they want to close the connection, then the connection
# must close.
# - HTTP/1.1 defaults to keep-alive unless someone says Connection: close
# - HTTP/1.0 defaults to close unless both sides say Connection: keep-alive
# (and even this is a mess -- e.g. if you're implementing a proxy then
# sending Connection: keep-alive is forbidden).
#
# We simplify life by simply not supporting keep-alive with HTTP/1.0 peers. So
# our rule is:
# - If someone says Connection: close, we will close
# - If someone uses HTTP/1.0, we will close.
def _keep_alive(event: Union[Request, Response]) -> bool:
connection = get_comma_header(event.headers, b"connection")
if b"close" in connection:
return False
if getattr(event, "http_version", b"1.1") < b"1.1":
return False
return True
def _body_framing(
request_method: bytes, event: Union[Request, Response]
) -> Tuple[str, Union[Tuple[()], Tuple[int]]]:
# Called when we enter SEND_BODY to figure out framing information for
# this body.
#
# These are the only two events that can trigger a SEND_BODY state:
assert type(event) in (Request, Response)
# Returns one of:
#
# ("content-length", count)
# ("chunked", ())
# ("http/1.0", ())
#
# which are (lookup key, *args) for constructing body reader/writer
# objects.
#
# Reference: https://tools.ietf.org/html/rfc7230#section-3.3.3
#
# Step 1: some responses always have an empty body, regardless of what the
# headers say.
if type(event) is Response:
if (
event.status_code in (204, 304)
or request_method == b"HEAD"
or (request_method == b"CONNECT" and 200 <= event.status_code < 300)
):
return ("content-length", (0,))
# Section 3.3.3 also lists another case -- responses with status_code
# < 200. For us these are InformationalResponses, not Responses, so
# they can't get into this function in the first place.
assert event.status_code >= 200
# Step 2: check for Transfer-Encoding (T-E beats C-L):
transfer_encodings = get_comma_header(event.headers, b"transfer-encoding")
if transfer_encodings:
assert transfer_encodings == [b"chunked"]
return ("chunked", ())
# Step 3: check for Content-Length
content_lengths = get_comma_header(event.headers, b"content-length")
if content_lengths:
return ("content-length", (int(content_lengths[0]),))
# Step 4: no applicable headers; fallback/default depends on type
if type(event) is Request:
return ("content-length", (0,))
else:
return ("http/1.0", ())
################################################################
#
# The main Connection class
#
################################################################
class Connection:
"""An object encapsulating the state of an HTTP connection.
Args:
our_role: If you're implementing a client, pass :data:`h11.CLIENT`. If
you're implementing a server, pass :data:`h11.SERVER`.
max_incomplete_event_size (int):
The maximum number of bytes we're willing to buffer of an
incomplete event. In practice this mostly sets a limit on the
maximum size of the request/response line + headers. If this is
exceeded, then :meth:`next_event` will raise
:exc:`RemoteProtocolError`.
"""
def __init__(
self,
our_role: Type[Sentinel],
max_incomplete_event_size: int = DEFAULT_MAX_INCOMPLETE_EVENT_SIZE,
) -> None:
self._max_incomplete_event_size = max_incomplete_event_size
# State and role tracking
if our_role not in (CLIENT, SERVER):
raise ValueError("expected CLIENT or SERVER, not {!r}".format(our_role))
self.our_role = our_role
self.their_role: Type[Sentinel]
if our_role is CLIENT:
self.their_role = SERVER
else:
self.their_role = CLIENT
self._cstate = ConnectionState()
# Callables for converting data->events or vice-versa given the
# current state
self._writer = self._get_io_object(self.our_role, None, WRITERS)
self._reader = self._get_io_object(self.their_role, None, READERS)
# Holds any unprocessed received data
self._receive_buffer = ReceiveBuffer()
# If this is true, then it indicates that the incoming connection was
# closed *after* the end of whatever's in self._receive_buffer:
self._receive_buffer_closed = False
# Extra bits of state that don't fit into the state machine.
#
# These two are only used to interpret framing headers for figuring
# out how to read/write response bodies. their_http_version is also
# made available as a convenient public API.
self.their_http_version: Optional[bytes] = None
self._request_method: Optional[bytes] = None
# This is pure flow-control and doesn't at all affect the set of legal
# transitions, so no need to bother ConnectionState with it:
self.client_is_waiting_for_100_continue = False
@property
def states(self) -> Dict[Type[Sentinel], Type[Sentinel]]:
"""A dictionary like::
{CLIENT: <client state>, SERVER: <server state>}
See :ref:`state-machine` for details.
"""
return dict(self._cstate.states)
@property
def our_state(self) -> Type[Sentinel]:
"""The current state of whichever role we are playing. See
:ref:`state-machine` for details.
"""
return self._cstate.states[self.our_role]
@property
def their_state(self) -> Type[Sentinel]:
"""The current state of whichever role we are NOT playing. See
:ref:`state-machine` for details.
"""
return self._cstate.states[self.their_role]
@property
def they_are_waiting_for_100_continue(self) -> bool:
return self.their_role is CLIENT and self.client_is_waiting_for_100_continue
def start_next_cycle(self) -> None:
"""Attempt to reset our connection state for a new request/response
cycle.
If both client and server are in :data:`DONE` state, then resets them
both to :data:`IDLE` state in preparation for a new request/response
cycle on this same connection. Otherwise, raises a
:exc:`LocalProtocolError`.
See :ref:`keepalive-and-pipelining`.
"""
old_states = dict(self._cstate.states)
self._cstate.start_next_cycle()
self._request_method = None
# self.their_http_version gets left alone, since it presumably lasts
# beyond a single request/response cycle
assert not self.client_is_waiting_for_100_continue
self._respond_to_state_changes(old_states)
def _process_error(self, role: Type[Sentinel]) -> None:
old_states = dict(self._cstate.states)
self._cstate.process_error(role)
self._respond_to_state_changes(old_states)
def _server_switch_event(self, event: Event) -> Optional[Type[Sentinel]]:
if type(event) is InformationalResponse and event.status_code == 101:
return _SWITCH_UPGRADE
if type(event) is Response:
if (
_SWITCH_CONNECT in self._cstate.pending_switch_proposals
and 200 <= event.status_code < 300
):
return _SWITCH_CONNECT
return None
# All events go through here
def _process_event(self, role: Type[Sentinel], event: Event) -> None:
# First, pass the event through the state machine to make sure it
# succeeds.
old_states = dict(self._cstate.states)
if role is CLIENT and type(event) is Request:
if event.method == b"CONNECT":
self._cstate.process_client_switch_proposal(_SWITCH_CONNECT)
if get_comma_header(event.headers, b"upgrade"):
self._cstate.process_client_switch_proposal(_SWITCH_UPGRADE)
server_switch_event = None
if role is SERVER:
server_switch_event = self._server_switch_event(event)
self._cstate.process_event(role, type(event), server_switch_event)
# Then perform the updates triggered by it.
if type(event) is Request:
self._request_method = event.method
if role is self.their_role and type(event) in (
Request,
Response,
InformationalResponse,
):
event = cast(Union[Request, Response, InformationalResponse], event)
self.their_http_version = event.http_version
# Keep alive handling
#
# RFC 7230 doesn't really say what one should do if Connection: close
# shows up on a 1xx InformationalResponse. I think the idea is that
# this is not supposed to happen. In any case, if it does happen, we
# ignore it.
if type(event) in (Request, Response) and not _keep_alive(
cast(Union[Request, Response], event)
):
self._cstate.process_keep_alive_disabled()
# 100-continue
if type(event) is Request and has_expect_100_continue(event):
self.client_is_waiting_for_100_continue = True
if type(event) in (InformationalResponse, Response):
self.client_is_waiting_for_100_continue = False
if role is CLIENT and type(event) in (Data, EndOfMessage):
self.client_is_waiting_for_100_continue = False
self._respond_to_state_changes(old_states, event)
def _get_io_object(
self,
role: Type[Sentinel],
event: Optional[Event],
io_dict: Union[ReadersType, WritersType],
) -> Optional[Callable[..., Any]]:
# event may be None; it's only used when entering SEND_BODY
state = self._cstate.states[role]
if state is SEND_BODY:
# Special case: the io_dict has a dict of reader/writer factories
# that depend on the request/response framing.
framing_type, args = _body_framing(
cast(bytes, self._request_method), cast(Union[Request, Response], event)
)
return io_dict[SEND_BODY][framing_type](*args) # type: ignore[index]
else:
# General case: the io_dict just has the appropriate reader/writer
# for this state
return io_dict.get((role, state)) # type: ignore[return-value]
# This must be called after any action that might have caused
# self._cstate.states to change.
def _respond_to_state_changes(
self,
old_states: Dict[Type[Sentinel], Type[Sentinel]],
event: Optional[Event] = None,
) -> None:
# Update reader/writer
if self.our_state != old_states[self.our_role]:
self._writer = self._get_io_object(self.our_role, event, WRITERS)
if self.their_state != old_states[self.their_role]:
self._reader = self._get_io_object(self.their_role, event, READERS)
@property
def trailing_data(self) -> Tuple[bytes, bool]:
"""Data that has been received, but not yet processed, represented as
a tuple with two elements, where the first is a byte-string containing
the unprocessed data itself, and the second is a bool that is True if
the receive connection was closed.
See :ref:`switching-protocols` for discussion of why you'd want this.
"""
return (bytes(self._receive_buffer), self._receive_buffer_closed)
def receive_data(self, data: bytes) -> None:
"""Add data to our internal receive buffer.
This does not actually do any processing on the data, just stores
it. To trigger processing, you have to call :meth:`next_event`.
Args:
data (:term:`bytes-like object`):
The new data that was just received.
Special case: If *data* is an empty byte-string like ``b""``,
then this indicates that the remote side has closed the
connection (end of file). Normally this is convenient, because
standard Python APIs like :meth:`file.read` or
:meth:`socket.recv` use ``b""`` to indicate end-of-file, while
other failures to read are indicated using other mechanisms
like raising :exc:`TimeoutError`. When using such an API you
can just blindly pass through whatever you get from ``read``
to :meth:`receive_data`, and everything will work.
But, if you have an API where reading an empty string is a
valid non-EOF condition, then you need to be aware of this and
make sure to check for such strings and avoid passing them to
:meth:`receive_data`.
Returns:
Nothing, but after calling this you should call :meth:`next_event`
to parse the newly received data.
Raises:
RuntimeError:
Raised if you pass an empty *data*, indicating EOF, and then
pass a non-empty *data*, indicating more data that somehow
arrived after the EOF.
(Calling ``receive_data(b"")`` multiple times is fine,
and equivalent to calling it once.)
"""
if data:
if self._receive_buffer_closed:
raise RuntimeError("received close, then received more data?")
self._receive_buffer += data
else:
self._receive_buffer_closed = True
def _extract_next_receive_event(
self,
) -> Union[Event, Type[NEED_DATA], Type[PAUSED]]:
state = self.their_state
# We don't pause immediately when they enter DONE, because even in
# DONE state we can still process a ConnectionClosed() event. But
# if we have data in our buffer, then we definitely aren't getting
# a ConnectionClosed() immediately and we need to pause.
if state is DONE and self._receive_buffer:
return PAUSED
if state is MIGHT_SWITCH_PROTOCOL or state is SWITCHED_PROTOCOL:
return PAUSED
assert self._reader is not None
event = self._reader(self._receive_buffer)
if event is None:
if not self._receive_buffer and self._receive_buffer_closed:
# In some unusual cases (basically just HTTP/1.0 bodies), EOF
# triggers an actual protocol event; in that case, we want to
# return that event, and then the state will change and we'll
# get called again to generate the actual ConnectionClosed().
if hasattr(self._reader, "read_eof"):
event = self._reader.read_eof() # type: ignore[attr-defined]
else:
event = ConnectionClosed()
if event is None:
event = NEED_DATA
return event # type: ignore[no-any-return]
def next_event(self) -> Union[Event, Type[NEED_DATA], Type[PAUSED]]:
"""Parse the next event out of our receive buffer, update our internal
state, and return it.
This is a mutating operation -- think of it like calling :func:`next`
on an iterator.
Returns:
: One of three things:
1) An event object -- see :ref:`events`.
2) The special constant :data:`NEED_DATA`, which indicates that
you need to read more data from your socket and pass it to
:meth:`receive_data` before this method will be able to return
any more events.
3) The special constant :data:`PAUSED`, which indicates that we
are not in a state where we can process incoming data (usually
because the peer has finished their part of the current
request/response cycle, and you have not yet called
:meth:`start_next_cycle`). See :ref:`flow-control` for details.
Raises:
RemoteProtocolError:
The peer has misbehaved. You should close the connection
(possibly after sending some kind of 4xx response).
Once this method returns :class:`ConnectionClosed` once, then all
subsequent calls will also return :class:`ConnectionClosed`.
If this method raises any exception besides :exc:`RemoteProtocolError`
then that's a bug -- if it happens please file a bug report!
If this method raises any exception then it also sets
:attr:`Connection.their_state` to :data:`ERROR` -- see
:ref:`error-handling` for discussion.
"""
if self.their_state is ERROR:
raise RemoteProtocolError("Can't receive data when peer state is ERROR")
try:
event = self._extract_next_receive_event()
if event not in [NEED_DATA, PAUSED]:
self._process_event(self.their_role, cast(Event, event))
if event is NEED_DATA:
if len(self._receive_buffer) > self._max_incomplete_event_size:
# 431 is "Request header fields too large" which is pretty
# much the only situation where we can get here
raise RemoteProtocolError(
"Receive buffer too long", error_status_hint=431
)
if self._receive_buffer_closed:
# We're still trying to complete some event, but that's
# never going to happen because no more data is coming
raise RemoteProtocolError("peer unexpectedly closed connection")
return event
except BaseException as exc:
self._process_error(self.their_role)
if isinstance(exc, LocalProtocolError):
exc._reraise_as_remote_protocol_error()
else:
raise
def send(self, event: Event) -> Optional[bytes]:
"""Convert a high-level event into bytes that can be sent to the peer,
while updating our internal state machine.
Args:
event: The :ref:`event <events>` to send.
Returns:
If ``type(event) is ConnectionClosed``, then returns
``None``. Otherwise, returns a :term:`bytes-like object`.
Raises:
LocalProtocolError:
Sending this event at this time would violate our
understanding of the HTTP/1.1 protocol.
If this method raises any exception then it also sets
:attr:`Connection.our_state` to :data:`ERROR` -- see
:ref:`error-handling` for discussion.
"""
data_list = self.send_with_data_passthrough(event)
if data_list is None:
return None
else:
return b"".join(data_list)
def send_with_data_passthrough(self, event: Event) -> Optional[List[bytes]]:
"""Identical to :meth:`send`, except that in situations where
:meth:`send` returns a single :term:`bytes-like object`, this instead
returns a list of them -- and when sending a :class:`Data` event, this
list is guaranteed to contain the exact object you passed in as
:attr:`Data.data`. See :ref:`sendfile` for discussion.
"""
if self.our_state is ERROR:
raise LocalProtocolError("Can't send data when our state is ERROR")
try:
if type(event) is Response:
event = self._clean_up_response_headers_for_sending(event)
# We want to call _process_event before calling the writer,
# because if someone tries to do something invalid then this will
# give a sensible error message, while our writers all just assume
# they will only receive valid events. But, _process_event might
# change self._writer. So we have to do a little dance:
writer = self._writer
self._process_event(self.our_role, event)
if type(event) is ConnectionClosed:
return None
else:
# In any situation where writer is None, process_event should
# have raised ProtocolError
assert writer is not None
data_list: List[bytes] = []
writer(event, data_list.append)
return data_list
except:
self._process_error(self.our_role)
raise
def send_failed(self) -> None:
"""Notify the state machine that we failed to send the data it gave
us.
This causes :attr:`Connection.our_state` to immediately become
:data:`ERROR` -- see :ref:`error-handling` for discussion.
"""
self._process_error(self.our_role)
# When sending a Response, we take responsibility for a few things:
#
# - Sometimes you MUST set Connection: close. We take care of those
# times. (You can also set it yourself if you want, and if you do then
# we'll respect that and close the connection at the right time. But you
# don't have to worry about that unless you want to.)
#
# - The user has to set Content-Length if they want it. Otherwise, for
# responses that have bodies (e.g. not HEAD), then we will automatically
# select the right mechanism for streaming a body of unknown length,
# which depends on depending on the peer's HTTP version.
#
# This function's *only* responsibility is making sure headers are set up
# right -- everything downstream just looks at the headers. There are no
# side channels.
def _clean_up_response_headers_for_sending(self, response: Response) -> Response:
assert type(response) is Response
headers = response.headers
need_close = False
# HEAD requests need some special handling: they always act like they
# have Content-Length: 0, and that's how _body_framing treats
# them. But their headers are supposed to match what we would send if
# the request was a GET. (Technically there is one deviation allowed:
# we're allowed to leave out the framing headers -- see
# https://tools.ietf.org/html/rfc7231#section-4.3.2 . But it's just as
# easy to get them right.)
method_for_choosing_headers = cast(bytes, self._request_method)
if method_for_choosing_headers == b"HEAD":
method_for_choosing_headers = b"GET"
framing_type, _ = _body_framing(method_for_choosing_headers, response)
if framing_type in ("chunked", "http/1.0"):
# This response has a body of unknown length.
# If our peer is HTTP/1.1, we use Transfer-Encoding: chunked
# If our peer is HTTP/1.0, we use no framing headers, and close the
# connection afterwards.
#
# Make sure to clear Content-Length (in principle user could have
# set both and then we ignored Content-Length b/c
# Transfer-Encoding overwrote it -- this would be naughty of them,
# but the HTTP spec says that if our peer does this then we have
# to fix it instead of erroring out, so we'll accord the user the
# same respect).
headers = set_comma_header(headers, b"content-length", [])
if self.their_http_version is None or self.their_http_version < b"1.1":
# Either we never got a valid request and are sending back an
# error (their_http_version is None), so we assume the worst;
# or else we did get a valid HTTP/1.0 request, so we know that
# they don't understand chunked encoding.
headers = set_comma_header(headers, b"transfer-encoding", [])
# This is actually redundant ATM, since currently we
# unconditionally disable keep-alive when talking to HTTP/1.0
# peers. But let's be defensive just in case we add
# Connection: keep-alive support later:
if self._request_method != b"HEAD":
need_close = True
else:
headers = set_comma_header(headers, b"transfer-encoding", [b"chunked"])
if not self._cstate.keep_alive or need_close:
# Make sure Connection: close is set
connection = set(get_comma_header(headers, b"connection"))
connection.discard(b"keep-alive")
connection.add(b"close")
headers = set_comma_header(headers, b"connection", sorted(connection))
return Response(
headers=headers,
status_code=response.status_code,
http_version=response.http_version,
reason=response.reason,
)

View File

@ -0,0 +1,369 @@
# High level events that make up HTTP/1.1 conversations. Loosely inspired by
# the corresponding events in hyper-h2:
#
# http://python-hyper.org/h2/en/stable/api.html#events
#
# Don't subclass these. Stuff will break.
import re
from abc import ABC
from dataclasses import dataclass, field
from typing import Any, cast, Dict, List, Tuple, Union
from ._abnf import method, request_target
from ._headers import Headers, normalize_and_validate
from ._util import bytesify, LocalProtocolError, validate
# Everything in __all__ gets re-exported as part of the h11 public API.
__all__ = [
"Event",
"Request",
"InformationalResponse",
"Response",
"Data",
"EndOfMessage",
"ConnectionClosed",
]
method_re = re.compile(method.encode("ascii"))
request_target_re = re.compile(request_target.encode("ascii"))
class Event(ABC):
"""
Base class for h11 events.
"""
__slots__ = ()
@dataclass(init=False, frozen=True)
class Request(Event):
"""The beginning of an HTTP request.
Fields:
.. attribute:: method
An HTTP method, e.g. ``b"GET"`` or ``b"POST"``. Always a byte
string. :term:`Bytes-like objects <bytes-like object>` and native
strings containing only ascii characters will be automatically
converted to byte strings.
.. attribute:: target
The target of an HTTP request, e.g. ``b"/index.html"``, or one of the
more exotic formats described in `RFC 7320, section 5.3
<https://tools.ietf.org/html/rfc7230#section-5.3>`_. Always a byte
string. :term:`Bytes-like objects <bytes-like object>` and native
strings containing only ascii characters will be automatically
converted to byte strings.
.. attribute:: headers
Request headers, represented as a list of (name, value) pairs. See
:ref:`the header normalization rules <headers-format>` for details.
.. attribute:: http_version
The HTTP protocol version, represented as a byte string like
``b"1.1"``. See :ref:`the HTTP version normalization rules
<http_version-format>` for details.
"""
__slots__ = ("method", "headers", "target", "http_version")
method: bytes
headers: Headers
target: bytes
http_version: bytes
def __init__(
self,
*,
method: Union[bytes, str],
headers: Union[Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]]],
target: Union[bytes, str],
http_version: Union[bytes, str] = b"1.1",
_parsed: bool = False,
) -> None:
super().__init__()
if isinstance(headers, Headers):
object.__setattr__(self, "headers", headers)
else:
object.__setattr__(
self, "headers", normalize_and_validate(headers, _parsed=_parsed)
)
if not _parsed:
object.__setattr__(self, "method", bytesify(method))
object.__setattr__(self, "target", bytesify(target))
object.__setattr__(self, "http_version", bytesify(http_version))
else:
object.__setattr__(self, "method", method)
object.__setattr__(self, "target", target)
object.__setattr__(self, "http_version", http_version)
# "A server MUST respond with a 400 (Bad Request) status code to any
# HTTP/1.1 request message that lacks a Host header field and to any
# request message that contains more than one Host header field or a
# Host header field with an invalid field-value."
# -- https://tools.ietf.org/html/rfc7230#section-5.4
host_count = 0
for name, value in self.headers:
if name == b"host":
host_count += 1
if self.http_version == b"1.1" and host_count == 0:
raise LocalProtocolError("Missing mandatory Host: header")
if host_count > 1:
raise LocalProtocolError("Found multiple Host: headers")
validate(method_re, self.method, "Illegal method characters")
validate(request_target_re, self.target, "Illegal target characters")
# This is an unhashable type.
__hash__ = None # type: ignore
@dataclass(init=False, frozen=True)
class _ResponseBase(Event):
__slots__ = ("headers", "http_version", "reason", "status_code")
headers: Headers
http_version: bytes
reason: bytes
status_code: int
def __init__(
self,
*,
headers: Union[Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]]],
status_code: int,
http_version: Union[bytes, str] = b"1.1",
reason: Union[bytes, str] = b"",
_parsed: bool = False,
) -> None:
super().__init__()
if isinstance(headers, Headers):
object.__setattr__(self, "headers", headers)
else:
object.__setattr__(
self, "headers", normalize_and_validate(headers, _parsed=_parsed)
)
if not _parsed:
object.__setattr__(self, "reason", bytesify(reason))
object.__setattr__(self, "http_version", bytesify(http_version))
if not isinstance(status_code, int):
raise LocalProtocolError("status code must be integer")
# Because IntEnum objects are instances of int, but aren't
# duck-compatible (sigh), see gh-72.
object.__setattr__(self, "status_code", int(status_code))
else:
object.__setattr__(self, "reason", reason)
object.__setattr__(self, "http_version", http_version)
object.__setattr__(self, "status_code", status_code)
self.__post_init__()
def __post_init__(self) -> None:
pass
# This is an unhashable type.
__hash__ = None # type: ignore
@dataclass(init=False, frozen=True)
class InformationalResponse(_ResponseBase):
"""An HTTP informational response.
Fields:
.. attribute:: status_code
The status code of this response, as an integer. For an
:class:`InformationalResponse`, this is always in the range [100,
200).
.. attribute:: headers
Request headers, represented as a list of (name, value) pairs. See
:ref:`the header normalization rules <headers-format>` for
details.
.. attribute:: http_version
The HTTP protocol version, represented as a byte string like
``b"1.1"``. See :ref:`the HTTP version normalization rules
<http_version-format>` for details.
.. attribute:: reason
The reason phrase of this response, as a byte string. For example:
``b"OK"``, or ``b"Not Found"``.
"""
def __post_init__(self) -> None:
if not (100 <= self.status_code < 200):
raise LocalProtocolError(
"InformationalResponse status_code should be in range "
"[100, 200), not {}".format(self.status_code)
)
# This is an unhashable type.
__hash__ = None # type: ignore
@dataclass(init=False, frozen=True)
class Response(_ResponseBase):
"""The beginning of an HTTP response.
Fields:
.. attribute:: status_code
The status code of this response, as an integer. For an
:class:`Response`, this is always in the range [200,
1000).
.. attribute:: headers
Request headers, represented as a list of (name, value) pairs. See
:ref:`the header normalization rules <headers-format>` for details.
.. attribute:: http_version
The HTTP protocol version, represented as a byte string like
``b"1.1"``. See :ref:`the HTTP version normalization rules
<http_version-format>` for details.
.. attribute:: reason
The reason phrase of this response, as a byte string. For example:
``b"OK"``, or ``b"Not Found"``.
"""
def __post_init__(self) -> None:
if not (200 <= self.status_code < 1000):
raise LocalProtocolError(
"Response status_code should be in range [200, 1000), not {}".format(
self.status_code
)
)
# This is an unhashable type.
__hash__ = None # type: ignore
@dataclass(init=False, frozen=True)
class Data(Event):
"""Part of an HTTP message body.
Fields:
.. attribute:: data
A :term:`bytes-like object` containing part of a message body. Or, if
using the ``combine=False`` argument to :meth:`Connection.send`, then
any object that your socket writing code knows what to do with, and for
which calling :func:`len` returns the number of bytes that will be
written -- see :ref:`sendfile` for details.
.. attribute:: chunk_start
A marker that indicates whether this data object is from the start of a
chunked transfer encoding chunk. This field is ignored when when a Data
event is provided to :meth:`Connection.send`: it is only valid on
events emitted from :meth:`Connection.next_event`. You probably
shouldn't use this attribute at all; see
:ref:`chunk-delimiters-are-bad` for details.
.. attribute:: chunk_end
A marker that indicates whether this data object is the last for a
given chunked transfer encoding chunk. This field is ignored when when
a Data event is provided to :meth:`Connection.send`: it is only valid
on events emitted from :meth:`Connection.next_event`. You probably
shouldn't use this attribute at all; see
:ref:`chunk-delimiters-are-bad` for details.
"""
__slots__ = ("data", "chunk_start", "chunk_end")
data: bytes
chunk_start: bool
chunk_end: bool
def __init__(
self, data: bytes, chunk_start: bool = False, chunk_end: bool = False
) -> None:
object.__setattr__(self, "data", data)
object.__setattr__(self, "chunk_start", chunk_start)
object.__setattr__(self, "chunk_end", chunk_end)
# This is an unhashable type.
__hash__ = None # type: ignore
# XX FIXME: "A recipient MUST ignore (or consider as an error) any fields that
# are forbidden to be sent in a trailer, since processing them as if they were
# present in the header section might bypass external security filters."
# https://svn.tools.ietf.org/svn/wg/httpbis/specs/rfc7230.html#chunked.trailer.part
# Unfortunately, the list of forbidden fields is long and vague :-/
@dataclass(init=False, frozen=True)
class EndOfMessage(Event):
"""The end of an HTTP message.
Fields:
.. attribute:: headers
Default value: ``[]``
Any trailing headers attached to this message, represented as a list of
(name, value) pairs. See :ref:`the header normalization rules
<headers-format>` for details.
Must be empty unless ``Transfer-Encoding: chunked`` is in use.
"""
__slots__ = ("headers",)
headers: Headers
def __init__(
self,
*,
headers: Union[
Headers, List[Tuple[bytes, bytes]], List[Tuple[str, str]], None
] = None,
_parsed: bool = False,
) -> None:
super().__init__()
if headers is None:
headers = Headers([])
elif not isinstance(headers, Headers):
headers = normalize_and_validate(headers, _parsed=_parsed)
object.__setattr__(self, "headers", headers)
# This is an unhashable type.
__hash__ = None # type: ignore
@dataclass(frozen=True)
class ConnectionClosed(Event):
"""This event indicates that the sender has closed their outgoing
connection.
Note that this does not necessarily mean that they can't *receive* further
data, because TCP connections are composed to two one-way channels which
can be closed independently. See :ref:`closing` for details.
No fields.
"""
pass

View File

@ -0,0 +1,278 @@
import re
from typing import AnyStr, cast, List, overload, Sequence, Tuple, TYPE_CHECKING, Union
from ._abnf import field_name, field_value
from ._util import bytesify, LocalProtocolError, validate
if TYPE_CHECKING:
from ._events import Request
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore
# Facts
# -----
#
# Headers are:
# keys: case-insensitive ascii
# values: mixture of ascii and raw bytes
#
# "Historically, HTTP has allowed field content with text in the ISO-8859-1
# charset [ISO-8859-1], supporting other charsets only through use of
# [RFC2047] encoding. In practice, most HTTP header field values use only a
# subset of the US-ASCII charset [USASCII]. Newly defined header fields SHOULD
# limit their field values to US-ASCII octets. A recipient SHOULD treat other
# octets in field content (obs-text) as opaque data."
# And it deprecates all non-ascii values
#
# Leading/trailing whitespace in header names is forbidden
#
# Values get leading/trailing whitespace stripped
#
# Content-Disposition actually needs to contain unicode semantically; to
# accomplish this it has a terrifically weird way of encoding the filename
# itself as ascii (and even this still has lots of cross-browser
# incompatibilities)
#
# Order is important:
# "a proxy MUST NOT change the order of these field values when forwarding a
# message"
# (and there are several headers where the order indicates a preference)
#
# Multiple occurences of the same header:
# "A sender MUST NOT generate multiple header fields with the same field name
# in a message unless either the entire field value for that header field is
# defined as a comma-separated list [or the header is Set-Cookie which gets a
# special exception]" - RFC 7230. (cookies are in RFC 6265)
#
# So every header aside from Set-Cookie can be merged by b", ".join if it
# occurs repeatedly. But, of course, they can't necessarily be split by
# .split(b","), because quoting.
#
# Given all this mess (case insensitive, duplicates allowed, order is
# important, ...), there doesn't appear to be any standard way to handle
# headers in Python -- they're almost like dicts, but... actually just
# aren't. For now we punt and just use a super simple representation: headers
# are a list of pairs
#
# [(name1, value1), (name2, value2), ...]
#
# where all entries are bytestrings, names are lowercase and have no
# leading/trailing whitespace, and values are bytestrings with no
# leading/trailing whitespace. Searching and updating are done via naive O(n)
# methods.
#
# Maybe a dict-of-lists would be better?
_content_length_re = re.compile(rb"[0-9]+")
_field_name_re = re.compile(field_name.encode("ascii"))
_field_value_re = re.compile(field_value.encode("ascii"))
class Headers(Sequence[Tuple[bytes, bytes]]):
"""
A list-like interface that allows iterating over headers as byte-pairs
of (lowercased-name, value).
Internally we actually store the representation as three-tuples,
including both the raw original casing, in order to preserve casing
over-the-wire, and the lowercased name, for case-insensitive comparisions.
r = Request(
method="GET",
target="/",
headers=[("Host", "example.org"), ("Connection", "keep-alive")],
http_version="1.1",
)
assert r.headers == [
(b"host", b"example.org"),
(b"connection", b"keep-alive")
]
assert r.headers.raw_items() == [
(b"Host", b"example.org"),
(b"Connection", b"keep-alive")
]
"""
__slots__ = "_full_items"
def __init__(self, full_items: List[Tuple[bytes, bytes, bytes]]) -> None:
self._full_items = full_items
def __bool__(self) -> bool:
return bool(self._full_items)
def __eq__(self, other: object) -> bool:
return list(self) == list(other) # type: ignore
def __len__(self) -> int:
return len(self._full_items)
def __repr__(self) -> str:
return "<Headers(%s)>" % repr(list(self))
def __getitem__(self, idx: int) -> Tuple[bytes, bytes]: # type: ignore[override]
_, name, value = self._full_items[idx]
return (name, value)
def raw_items(self) -> List[Tuple[bytes, bytes]]:
return [(raw_name, value) for raw_name, _, value in self._full_items]
HeaderTypes = Union[
List[Tuple[bytes, bytes]],
List[Tuple[bytes, str]],
List[Tuple[str, bytes]],
List[Tuple[str, str]],
]
@overload
def normalize_and_validate(headers: Headers, _parsed: Literal[True]) -> Headers:
...
@overload
def normalize_and_validate(headers: HeaderTypes, _parsed: Literal[False]) -> Headers:
...
@overload
def normalize_and_validate(
headers: Union[Headers, HeaderTypes], _parsed: bool = False
) -> Headers:
...
def normalize_and_validate(
headers: Union[Headers, HeaderTypes], _parsed: bool = False
) -> Headers:
new_headers = []
seen_content_length = None
saw_transfer_encoding = False
for name, value in headers:
# For headers coming out of the parser, we can safely skip some steps,
# because it always returns bytes and has already run these regexes
# over the data:
if not _parsed:
name = bytesify(name)
value = bytesify(value)
validate(_field_name_re, name, "Illegal header name {!r}", name)
validate(_field_value_re, value, "Illegal header value {!r}", value)
assert isinstance(name, bytes)
assert isinstance(value, bytes)
raw_name = name
name = name.lower()
if name == b"content-length":
lengths = {length.strip() for length in value.split(b",")}
if len(lengths) != 1:
raise LocalProtocolError("conflicting Content-Length headers")
value = lengths.pop()
validate(_content_length_re, value, "bad Content-Length")
if seen_content_length is None:
seen_content_length = value
new_headers.append((raw_name, name, value))
elif seen_content_length != value:
raise LocalProtocolError("conflicting Content-Length headers")
elif name == b"transfer-encoding":
# "A server that receives a request message with a transfer coding
# it does not understand SHOULD respond with 501 (Not
# Implemented)."
# https://tools.ietf.org/html/rfc7230#section-3.3.1
if saw_transfer_encoding:
raise LocalProtocolError(
"multiple Transfer-Encoding headers", error_status_hint=501
)
# "All transfer-coding names are case-insensitive"
# -- https://tools.ietf.org/html/rfc7230#section-4
value = value.lower()
if value != b"chunked":
raise LocalProtocolError(
"Only Transfer-Encoding: chunked is supported",
error_status_hint=501,
)
saw_transfer_encoding = True
new_headers.append((raw_name, name, value))
else:
new_headers.append((raw_name, name, value))
return Headers(new_headers)
def get_comma_header(headers: Headers, name: bytes) -> List[bytes]:
# Should only be used for headers whose value is a list of
# comma-separated, case-insensitive values.
#
# The header name `name` is expected to be lower-case bytes.
#
# Connection: meets these criteria (including cast insensitivity).
#
# Content-Length: technically is just a single value (1*DIGIT), but the
# standard makes reference to implementations that do multiple values, and
# using this doesn't hurt. Ditto, case insensitivity doesn't things either
# way.
#
# Transfer-Encoding: is more complex (allows for quoted strings), so
# splitting on , is actually wrong. For example, this is legal:
#
# Transfer-Encoding: foo; options="1,2", chunked
#
# and should be parsed as
#
# foo; options="1,2"
# chunked
#
# but this naive function will parse it as
#
# foo; options="1
# 2"
# chunked
#
# However, this is okay because the only thing we are going to do with
# any Transfer-Encoding is reject ones that aren't just "chunked", so
# both of these will be treated the same anyway.
#
# Expect: the only legal value is the literal string
# "100-continue". Splitting on commas is harmless. Case insensitive.
#
out: List[bytes] = []
for _, found_name, found_raw_value in headers._full_items:
if found_name == name:
found_raw_value = found_raw_value.lower()
for found_split_value in found_raw_value.split(b","):
found_split_value = found_split_value.strip()
if found_split_value:
out.append(found_split_value)
return out
def set_comma_header(headers: Headers, name: bytes, new_values: List[bytes]) -> Headers:
# The header name `name` is expected to be lower-case bytes.
#
# Note that when we store the header we use title casing for the header
# names, in order to match the conventional HTTP header style.
#
# Simply calling `.title()` is a blunt approach, but it's correct
# here given the cases where we're using `set_comma_header`...
#
# Connection, Content-Length, Transfer-Encoding.
new_headers: List[Tuple[bytes, bytes]] = []
for found_raw_name, found_name, found_raw_value in headers._full_items:
if found_name != name:
new_headers.append((found_raw_name, found_raw_value))
for new_value in new_values:
new_headers.append((name.title(), new_value))
return normalize_and_validate(new_headers)
def has_expect_100_continue(request: "Request") -> bool:
# https://tools.ietf.org/html/rfc7231#section-5.1.1
# "A server that receives a 100-continue expectation in an HTTP/1.0 request
# MUST ignore that expectation."
if request.http_version < b"1.1":
return False
expect = get_comma_header(request.headers, b"expect")
return b"100-continue" in expect

View File

@ -0,0 +1,247 @@
# Code to read HTTP data
#
# Strategy: each reader is a callable which takes a ReceiveBuffer object, and
# either:
# 1) consumes some of it and returns an Event
# 2) raises a LocalProtocolError (for consistency -- e.g. we call validate()
# and it might raise a LocalProtocolError, so simpler just to always use
# this)
# 3) returns None, meaning "I need more data"
#
# If they have a .read_eof attribute, then this will be called if an EOF is
# received -- but this is optional. Either way, the actual ConnectionClosed
# event will be generated afterwards.
#
# READERS is a dict describing how to pick a reader. It maps states to either:
# - a reader
# - or, for body readers, a dict of per-framing reader factories
import re
from typing import Any, Callable, Dict, Iterable, NoReturn, Optional, Tuple, Type, Union
from ._abnf import chunk_header, header_field, request_line, status_line
from ._events import Data, EndOfMessage, InformationalResponse, Request, Response
from ._receivebuffer import ReceiveBuffer
from ._state import (
CLIENT,
CLOSED,
DONE,
IDLE,
MUST_CLOSE,
SEND_BODY,
SEND_RESPONSE,
SERVER,
)
from ._util import LocalProtocolError, RemoteProtocolError, Sentinel, validate
__all__ = ["READERS"]
header_field_re = re.compile(header_field.encode("ascii"))
obs_fold_re = re.compile(rb"[ \t]+")
def _obsolete_line_fold(lines: Iterable[bytes]) -> Iterable[bytes]:
it = iter(lines)
last: Optional[bytes] = None
for line in it:
match = obs_fold_re.match(line)
if match:
if last is None:
raise LocalProtocolError("continuation line at start of headers")
if not isinstance(last, bytearray):
# Cast to a mutable type, avoiding copy on append to ensure O(n) time
last = bytearray(last)
last += b" "
last += line[match.end() :]
else:
if last is not None:
yield last
last = line
if last is not None:
yield last
def _decode_header_lines(
lines: Iterable[bytes],
) -> Iterable[Tuple[bytes, bytes]]:
for line in _obsolete_line_fold(lines):
matches = validate(header_field_re, line, "illegal header line: {!r}", line)
yield (matches["field_name"], matches["field_value"])
request_line_re = re.compile(request_line.encode("ascii"))
def maybe_read_from_IDLE_client(buf: ReceiveBuffer) -> Optional[Request]:
lines = buf.maybe_extract_lines()
if lines is None:
if buf.is_next_line_obviously_invalid_request_line():
raise LocalProtocolError("illegal request line")
return None
if not lines:
raise LocalProtocolError("no request line received")
matches = validate(
request_line_re, lines[0], "illegal request line: {!r}", lines[0]
)
return Request(
headers=list(_decode_header_lines(lines[1:])), _parsed=True, **matches
)
status_line_re = re.compile(status_line.encode("ascii"))
def maybe_read_from_SEND_RESPONSE_server(
buf: ReceiveBuffer,
) -> Union[InformationalResponse, Response, None]:
lines = buf.maybe_extract_lines()
if lines is None:
if buf.is_next_line_obviously_invalid_request_line():
raise LocalProtocolError("illegal request line")
return None
if not lines:
raise LocalProtocolError("no response line received")
matches = validate(status_line_re, lines[0], "illegal status line: {!r}", lines[0])
http_version = (
b"1.1" if matches["http_version"] is None else matches["http_version"]
)
reason = b"" if matches["reason"] is None else matches["reason"]
status_code = int(matches["status_code"])
class_: Union[Type[InformationalResponse], Type[Response]] = (
InformationalResponse if status_code < 200 else Response
)
return class_(
headers=list(_decode_header_lines(lines[1:])),
_parsed=True,
status_code=status_code,
reason=reason,
http_version=http_version,
)
class ContentLengthReader:
def __init__(self, length: int) -> None:
self._length = length
self._remaining = length
def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
if self._remaining == 0:
return EndOfMessage()
data = buf.maybe_extract_at_most(self._remaining)
if data is None:
return None
self._remaining -= len(data)
return Data(data=data)
def read_eof(self) -> NoReturn:
raise RemoteProtocolError(
"peer closed connection without sending complete message body "
"(received {} bytes, expected {})".format(
self._length - self._remaining, self._length
)
)
chunk_header_re = re.compile(chunk_header.encode("ascii"))
class ChunkedReader:
def __init__(self) -> None:
self._bytes_in_chunk = 0
# After reading a chunk, we have to throw away the trailing \r\n; if
# this is >0 then we discard that many bytes before resuming regular
# de-chunkification.
self._bytes_to_discard = 0
self._reading_trailer = False
def __call__(self, buf: ReceiveBuffer) -> Union[Data, EndOfMessage, None]:
if self._reading_trailer:
lines = buf.maybe_extract_lines()
if lines is None:
return None
return EndOfMessage(headers=list(_decode_header_lines(lines)))
if self._bytes_to_discard > 0:
data = buf.maybe_extract_at_most(self._bytes_to_discard)
if data is None:
return None
self._bytes_to_discard -= len(data)
if self._bytes_to_discard > 0:
return None
# else, fall through and read some more
assert self._bytes_to_discard == 0
if self._bytes_in_chunk == 0:
# We need to refill our chunk count
chunk_header = buf.maybe_extract_next_line()
if chunk_header is None:
return None
matches = validate(
chunk_header_re,
chunk_header,
"illegal chunk header: {!r}",
chunk_header,
)
# XX FIXME: we discard chunk extensions. Does anyone care?
self._bytes_in_chunk = int(matches["chunk_size"], base=16)
if self._bytes_in_chunk == 0:
self._reading_trailer = True
return self(buf)
chunk_start = True
else:
chunk_start = False
assert self._bytes_in_chunk > 0
data = buf.maybe_extract_at_most(self._bytes_in_chunk)
if data is None:
return None
self._bytes_in_chunk -= len(data)
if self._bytes_in_chunk == 0:
self._bytes_to_discard = 2
chunk_end = True
else:
chunk_end = False
return Data(data=data, chunk_start=chunk_start, chunk_end=chunk_end)
def read_eof(self) -> NoReturn:
raise RemoteProtocolError(
"peer closed connection without sending complete message body "
"(incomplete chunked read)"
)
class Http10Reader:
def __call__(self, buf: ReceiveBuffer) -> Optional[Data]:
data = buf.maybe_extract_at_most(999999999)
if data is None:
return None
return Data(data=data)
def read_eof(self) -> EndOfMessage:
return EndOfMessage()
def expect_nothing(buf: ReceiveBuffer) -> None:
if buf:
raise LocalProtocolError("Got data when expecting EOF")
return None
ReadersType = Dict[
Union[Type[Sentinel], Tuple[Type[Sentinel], Type[Sentinel]]],
Union[Callable[..., Any], Dict[str, Callable[..., Any]]],
]
READERS: ReadersType = {
(CLIENT, IDLE): maybe_read_from_IDLE_client,
(SERVER, IDLE): maybe_read_from_SEND_RESPONSE_server,
(SERVER, SEND_RESPONSE): maybe_read_from_SEND_RESPONSE_server,
(CLIENT, DONE): expect_nothing,
(CLIENT, MUST_CLOSE): expect_nothing,
(CLIENT, CLOSED): expect_nothing,
(SERVER, DONE): expect_nothing,
(SERVER, MUST_CLOSE): expect_nothing,
(SERVER, CLOSED): expect_nothing,
SEND_BODY: {
"chunked": ChunkedReader,
"content-length": ContentLengthReader,
"http/1.0": Http10Reader,
},
}

View File

@ -0,0 +1,153 @@
import re
import sys
from typing import List, Optional, Union
__all__ = ["ReceiveBuffer"]
# Operations we want to support:
# - find next \r\n or \r\n\r\n (\n or \n\n are also acceptable),
# or wait until there is one
# - read at-most-N bytes
# Goals:
# - on average, do this fast
# - worst case, do this in O(n) where n is the number of bytes processed
# Plan:
# - store bytearray, offset, how far we've searched for a separator token
# - use the how-far-we've-searched data to avoid rescanning
# - while doing a stream of uninterrupted processing, advance offset instead
# of constantly copying
# WARNING:
# - I haven't benchmarked or profiled any of this yet.
#
# Note that starting in Python 3.4, deleting the initial n bytes from a
# bytearray is amortized O(n), thanks to some excellent work by Antoine
# Martin:
#
# https://bugs.python.org/issue19087
#
# This means that if we only supported 3.4+, we could get rid of the code here
# involving self._start and self.compress, because it's doing exactly the same
# thing that bytearray now does internally.
#
# BUT unfortunately, we still support 2.7, and reading short segments out of a
# long buffer MUST be O(bytes read) to avoid DoS issues, so we can't actually
# delete this code. Yet:
#
# https://pythonclock.org/
#
# (Two things to double-check first though: make sure PyPy also has the
# optimization, and benchmark to make sure it's a win, since we do have a
# slightly clever thing where we delay calling compress() until we've
# processed a whole event, which could in theory be slightly more efficient
# than the internal bytearray support.)
blank_line_regex = re.compile(b"\n\r?\n", re.MULTILINE)
class ReceiveBuffer:
def __init__(self) -> None:
self._data = bytearray()
self._next_line_search = 0
self._multiple_lines_search = 0
def __iadd__(self, byteslike: Union[bytes, bytearray]) -> "ReceiveBuffer":
self._data += byteslike
return self
def __bool__(self) -> bool:
return bool(len(self))
def __len__(self) -> int:
return len(self._data)
# for @property unprocessed_data
def __bytes__(self) -> bytes:
return bytes(self._data)
def _extract(self, count: int) -> bytearray:
# extracting an initial slice of the data buffer and return it
out = self._data[:count]
del self._data[:count]
self._next_line_search = 0
self._multiple_lines_search = 0
return out
def maybe_extract_at_most(self, count: int) -> Optional[bytearray]:
"""
Extract a fixed number of bytes from the buffer.
"""
out = self._data[:count]
if not out:
return None
return self._extract(count)
def maybe_extract_next_line(self) -> Optional[bytearray]:
"""
Extract the first line, if it is completed in the buffer.
"""
# Only search in buffer space that we've not already looked at.
search_start_index = max(0, self._next_line_search - 1)
partial_idx = self._data.find(b"\r\n", search_start_index)
if partial_idx == -1:
self._next_line_search = len(self._data)
return None
# + 2 is to compensate len(b"\r\n")
idx = partial_idx + 2
return self._extract(idx)
def maybe_extract_lines(self) -> Optional[List[bytearray]]:
"""
Extract everything up to the first blank line, and return a list of lines.
"""
# Handle the case where we have an immediate empty line.
if self._data[:1] == b"\n":
self._extract(1)
return []
if self._data[:2] == b"\r\n":
self._extract(2)
return []
# Only search in buffer space that we've not already looked at.
match = blank_line_regex.search(self._data, self._multiple_lines_search)
if match is None:
self._multiple_lines_search = max(0, len(self._data) - 2)
return None
# Truncate the buffer and return it.
idx = match.span(0)[-1]
out = self._extract(idx)
lines = out.split(b"\n")
for line in lines:
if line.endswith(b"\r"):
del line[-1]
assert lines[-2] == lines[-1] == b""
del lines[-2:]
return lines
# In theory we should wait until `\r\n` before starting to validate
# incoming data. However it's interesting to detect (very) invalid data
# early given they might not even contain `\r\n` at all (hence only
# timeout will get rid of them).
# This is not a 100% effective detection but more of a cheap sanity check
# allowing for early abort in some useful cases.
# This is especially interesting when peer is messing up with HTTPS and
# sent us a TLS stream where we were expecting plain HTTP given all
# versions of TLS so far start handshake with a 0x16 message type code.
def is_next_line_obviously_invalid_request_line(self) -> bool:
try:
# HTTP header line must not contain non-printable characters
# and should not start with a space
return self._data[0] < 0x21
except IndexError:
return False

View File

@ -0,0 +1,367 @@
################################################################
# The core state machine
################################################################
#
# Rule 1: everything that affects the state machine and state transitions must
# live here in this file. As much as possible goes into the table-based
# representation, but for the bits that don't quite fit, the actual code and
# state must nonetheless live here.
#
# Rule 2: this file does not know about what role we're playing; it only knows
# about HTTP request/response cycles in the abstract. This ensures that we
# don't cheat and apply different rules to local and remote parties.
#
#
# Theory of operation
# ===================
#
# Possibly the simplest way to think about this is that we actually have 5
# different state machines here. Yes, 5. These are:
#
# 1) The client state, with its complicated automaton (see the docs)
# 2) The server state, with its complicated automaton (see the docs)
# 3) The keep-alive state, with possible states {True, False}
# 4) The SWITCH_CONNECT state, with possible states {False, True}
# 5) The SWITCH_UPGRADE state, with possible states {False, True}
#
# For (3)-(5), the first state listed is the initial state.
#
# (1)-(3) are stored explicitly in member variables. The last
# two are stored implicitly in the pending_switch_proposals set as:
# (state of 4) == (_SWITCH_CONNECT in pending_switch_proposals)
# (state of 5) == (_SWITCH_UPGRADE in pending_switch_proposals)
#
# And each of these machines has two different kinds of transitions:
#
# a) Event-triggered
# b) State-triggered
#
# Event triggered is the obvious thing that you'd think it is: some event
# happens, and if it's the right event at the right time then a transition
# happens. But there are somewhat complicated rules for which machines can
# "see" which events. (As a rule of thumb, if a machine "sees" an event, this
# means two things: the event can affect the machine, and if the machine is
# not in a state where it expects that event then it's an error.) These rules
# are:
#
# 1) The client machine sees all h11.events objects emitted by the client.
#
# 2) The server machine sees all h11.events objects emitted by the server.
#
# It also sees the client's Request event.
#
# And sometimes, server events are annotated with a _SWITCH_* event. For
# example, we can have a (Response, _SWITCH_CONNECT) event, which is
# different from a regular Response event.
#
# 3) The keep-alive machine sees the process_keep_alive_disabled() event
# (which is derived from Request/Response events), and this event
# transitions it from True -> False, or from False -> False. There's no way
# to transition back.
#
# 4&5) The _SWITCH_* machines transition from False->True when we get a
# Request that proposes the relevant type of switch (via
# process_client_switch_proposals), and they go from True->False when we
# get a Response that has no _SWITCH_* annotation.
#
# So that's event-triggered transitions.
#
# State-triggered transitions are less standard. What they do here is couple
# the machines together. The way this works is, when certain *joint*
# configurations of states are achieved, then we automatically transition to a
# new *joint* state. So, for example, if we're ever in a joint state with
#
# client: DONE
# keep-alive: False
#
# then the client state immediately transitions to:
#
# client: MUST_CLOSE
#
# This is fundamentally different from an event-based transition, because it
# doesn't matter how we arrived at the {client: DONE, keep-alive: False} state
# -- maybe the client transitioned SEND_BODY -> DONE, or keep-alive
# transitioned True -> False. Either way, once this precondition is satisfied,
# this transition is immediately triggered.
#
# What if two conflicting state-based transitions get enabled at the same
# time? In practice there's only one case where this arises (client DONE ->
# MIGHT_SWITCH_PROTOCOL versus DONE -> MUST_CLOSE), and we resolve it by
# explicitly prioritizing the DONE -> MIGHT_SWITCH_PROTOCOL transition.
#
# Implementation
# --------------
#
# The event-triggered transitions for the server and client machines are all
# stored explicitly in a table. Ditto for the state-triggered transitions that
# involve just the server and client state.
#
# The transitions for the other machines, and the state-triggered transitions
# that involve the other machines, are written out as explicit Python code.
#
# It'd be nice if there were some cleaner way to do all this. This isn't
# *too* terrible, but I feel like it could probably be better.
#
# WARNING
# -------
#
# The script that generates the state machine diagrams for the docs knows how
# to read out the EVENT_TRIGGERED_TRANSITIONS and STATE_TRIGGERED_TRANSITIONS
# tables. But it can't automatically read the transitions that are written
# directly in Python code. So if you touch those, you need to also update the
# script to keep it in sync!
from typing import cast, Dict, Optional, Set, Tuple, Type, Union
from ._events import *
from ._util import LocalProtocolError, Sentinel
# Everything in __all__ gets re-exported as part of the h11 public API.
__all__ = [
"CLIENT",
"SERVER",
"IDLE",
"SEND_RESPONSE",
"SEND_BODY",
"DONE",
"MUST_CLOSE",
"CLOSED",
"MIGHT_SWITCH_PROTOCOL",
"SWITCHED_PROTOCOL",
"ERROR",
]
class CLIENT(Sentinel, metaclass=Sentinel):
pass
class SERVER(Sentinel, metaclass=Sentinel):
pass
# States
class IDLE(Sentinel, metaclass=Sentinel):
pass
class SEND_RESPONSE(Sentinel, metaclass=Sentinel):
pass
class SEND_BODY(Sentinel, metaclass=Sentinel):
pass
class DONE(Sentinel, metaclass=Sentinel):
pass
class MUST_CLOSE(Sentinel, metaclass=Sentinel):
pass
class CLOSED(Sentinel, metaclass=Sentinel):
pass
class ERROR(Sentinel, metaclass=Sentinel):
pass
# Switch types
class MIGHT_SWITCH_PROTOCOL(Sentinel, metaclass=Sentinel):
pass
class SWITCHED_PROTOCOL(Sentinel, metaclass=Sentinel):
pass
class _SWITCH_UPGRADE(Sentinel, metaclass=Sentinel):
pass
class _SWITCH_CONNECT(Sentinel, metaclass=Sentinel):
pass
EventTransitionType = Dict[
Type[Sentinel],
Dict[
Type[Sentinel],
Dict[Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]], Type[Sentinel]],
],
]
EVENT_TRIGGERED_TRANSITIONS: EventTransitionType = {
CLIENT: {
IDLE: {Request: SEND_BODY, ConnectionClosed: CLOSED},
SEND_BODY: {Data: SEND_BODY, EndOfMessage: DONE},
DONE: {ConnectionClosed: CLOSED},
MUST_CLOSE: {ConnectionClosed: CLOSED},
CLOSED: {ConnectionClosed: CLOSED},
MIGHT_SWITCH_PROTOCOL: {},
SWITCHED_PROTOCOL: {},
ERROR: {},
},
SERVER: {
IDLE: {
ConnectionClosed: CLOSED,
Response: SEND_BODY,
# Special case: server sees client Request events, in this form
(Request, CLIENT): SEND_RESPONSE,
},
SEND_RESPONSE: {
InformationalResponse: SEND_RESPONSE,
Response: SEND_BODY,
(InformationalResponse, _SWITCH_UPGRADE): SWITCHED_PROTOCOL,
(Response, _SWITCH_CONNECT): SWITCHED_PROTOCOL,
},
SEND_BODY: {Data: SEND_BODY, EndOfMessage: DONE},
DONE: {ConnectionClosed: CLOSED},
MUST_CLOSE: {ConnectionClosed: CLOSED},
CLOSED: {ConnectionClosed: CLOSED},
SWITCHED_PROTOCOL: {},
ERROR: {},
},
}
StateTransitionType = Dict[
Tuple[Type[Sentinel], Type[Sentinel]], Dict[Type[Sentinel], Type[Sentinel]]
]
# NB: there are also some special-case state-triggered transitions hard-coded
# into _fire_state_triggered_transitions below.
STATE_TRIGGERED_TRANSITIONS: StateTransitionType = {
# (Client state, Server state) -> new states
# Protocol negotiation
(MIGHT_SWITCH_PROTOCOL, SWITCHED_PROTOCOL): {CLIENT: SWITCHED_PROTOCOL},
# Socket shutdown
(CLOSED, DONE): {SERVER: MUST_CLOSE},
(CLOSED, IDLE): {SERVER: MUST_CLOSE},
(ERROR, DONE): {SERVER: MUST_CLOSE},
(DONE, CLOSED): {CLIENT: MUST_CLOSE},
(IDLE, CLOSED): {CLIENT: MUST_CLOSE},
(DONE, ERROR): {CLIENT: MUST_CLOSE},
}
class ConnectionState:
def __init__(self) -> None:
# Extra bits of state that don't quite fit into the state model.
# If this is False then it enables the automatic DONE -> MUST_CLOSE
# transition. Don't set this directly; call .keep_alive_disabled()
self.keep_alive = True
# This is a subset of {UPGRADE, CONNECT}, containing the proposals
# made by the client for switching protocols.
self.pending_switch_proposals: Set[Type[Sentinel]] = set()
self.states: Dict[Type[Sentinel], Type[Sentinel]] = {CLIENT: IDLE, SERVER: IDLE}
def process_error(self, role: Type[Sentinel]) -> None:
self.states[role] = ERROR
self._fire_state_triggered_transitions()
def process_keep_alive_disabled(self) -> None:
self.keep_alive = False
self._fire_state_triggered_transitions()
def process_client_switch_proposal(self, switch_event: Type[Sentinel]) -> None:
self.pending_switch_proposals.add(switch_event)
self._fire_state_triggered_transitions()
def process_event(
self,
role: Type[Sentinel],
event_type: Type[Event],
server_switch_event: Optional[Type[Sentinel]] = None,
) -> None:
_event_type: Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]] = event_type
if server_switch_event is not None:
assert role is SERVER
if server_switch_event not in self.pending_switch_proposals:
raise LocalProtocolError(
"Received server {} event without a pending proposal".format(
server_switch_event
)
)
_event_type = (event_type, server_switch_event)
if server_switch_event is None and _event_type is Response:
self.pending_switch_proposals = set()
self._fire_event_triggered_transitions(role, _event_type)
# Special case: the server state does get to see Request
# events.
if _event_type is Request:
assert role is CLIENT
self._fire_event_triggered_transitions(SERVER, (Request, CLIENT))
self._fire_state_triggered_transitions()
def _fire_event_triggered_transitions(
self,
role: Type[Sentinel],
event_type: Union[Type[Event], Tuple[Type[Event], Type[Sentinel]]],
) -> None:
state = self.states[role]
try:
new_state = EVENT_TRIGGERED_TRANSITIONS[role][state][event_type]
except KeyError:
event_type = cast(Type[Event], event_type)
raise LocalProtocolError(
"can't handle event type {} when role={} and state={}".format(
event_type.__name__, role, self.states[role]
)
) from None
self.states[role] = new_state
def _fire_state_triggered_transitions(self) -> None:
# We apply these rules repeatedly until converging on a fixed point
while True:
start_states = dict(self.states)
# It could happen that both these special-case transitions are
# enabled at the same time:
#
# DONE -> MIGHT_SWITCH_PROTOCOL
# DONE -> MUST_CLOSE
#
# For example, this will always be true of a HTTP/1.0 client
# requesting CONNECT. If this happens, the protocol switch takes
# priority. From there the client will either go to
# SWITCHED_PROTOCOL, in which case it's none of our business when
# they close the connection, or else the server will deny the
# request, in which case the client will go back to DONE and then
# from there to MUST_CLOSE.
if self.pending_switch_proposals:
if self.states[CLIENT] is DONE:
self.states[CLIENT] = MIGHT_SWITCH_PROTOCOL
if not self.pending_switch_proposals:
if self.states[CLIENT] is MIGHT_SWITCH_PROTOCOL:
self.states[CLIENT] = DONE
if not self.keep_alive:
for role in (CLIENT, SERVER):
if self.states[role] is DONE:
self.states[role] = MUST_CLOSE
# Tabular state-triggered transitions
joint_state = (self.states[CLIENT], self.states[SERVER])
changes = STATE_TRIGGERED_TRANSITIONS.get(joint_state, {})
self.states.update(changes)
if self.states == start_states:
# Fixed point reached
return
def start_next_cycle(self) -> None:
if self.states != {CLIENT: DONE, SERVER: DONE}:
raise LocalProtocolError(
"not in a reusable state. self.states={}".format(self.states)
)
# Can't reach DONE/DONE with any of these active, but still, let's be
# sure.
assert self.keep_alive
assert not self.pending_switch_proposals
self.states = {CLIENT: IDLE, SERVER: IDLE}

View File

@ -0,0 +1,135 @@
from typing import Any, Dict, NoReturn, Pattern, Tuple, Type, TypeVar, Union
__all__ = [
"ProtocolError",
"LocalProtocolError",
"RemoteProtocolError",
"validate",
"bytesify",
]
class ProtocolError(Exception):
"""Exception indicating a violation of the HTTP/1.1 protocol.
This as an abstract base class, with two concrete base classes:
:exc:`LocalProtocolError`, which indicates that you tried to do something
that HTTP/1.1 says is illegal, and :exc:`RemoteProtocolError`, which
indicates that the remote peer tried to do something that HTTP/1.1 says is
illegal. See :ref:`error-handling` for details.
In addition to the normal :exc:`Exception` features, it has one attribute:
.. attribute:: error_status_hint
This gives a suggestion as to what status code a server might use if
this error occurred as part of a request.
For a :exc:`RemoteProtocolError`, this is useful as a suggestion for
how you might want to respond to a misbehaving peer, if you're
implementing a server.
For a :exc:`LocalProtocolError`, this can be taken as a suggestion for
how your peer might have responded to *you* if h11 had allowed you to
continue.
The default is 400 Bad Request, a generic catch-all for protocol
violations.
"""
def __init__(self, msg: str, error_status_hint: int = 400) -> None:
if type(self) is ProtocolError:
raise TypeError("tried to directly instantiate ProtocolError")
Exception.__init__(self, msg)
self.error_status_hint = error_status_hint
# Strategy: there are a number of public APIs where a LocalProtocolError can
# be raised (send(), all the different event constructors, ...), and only one
# public API where RemoteProtocolError can be raised
# (receive_data()). Therefore we always raise LocalProtocolError internally,
# and then receive_data will translate this into a RemoteProtocolError.
#
# Internally:
# LocalProtocolError is the generic "ProtocolError".
# Externally:
# LocalProtocolError is for local errors and RemoteProtocolError is for
# remote errors.
class LocalProtocolError(ProtocolError):
def _reraise_as_remote_protocol_error(self) -> NoReturn:
# After catching a LocalProtocolError, use this method to re-raise it
# as a RemoteProtocolError. This method must be called from inside an
# except: block.
#
# An easy way to get an equivalent RemoteProtocolError is just to
# modify 'self' in place.
self.__class__ = RemoteProtocolError # type: ignore
# But the re-raising is somewhat non-trivial -- you might think that
# now that we've modified the in-flight exception object, that just
# doing 'raise' to re-raise it would be enough. But it turns out that
# this doesn't work, because Python tracks the exception type
# (exc_info[0]) separately from the exception object (exc_info[1]),
# and we only modified the latter. So we really do need to re-raise
# the new type explicitly.
# On py3, the traceback is part of the exception object, so our
# in-place modification preserved it and we can just re-raise:
raise self
class RemoteProtocolError(ProtocolError):
pass
def validate(
regex: Pattern[bytes], data: bytes, msg: str = "malformed data", *format_args: Any
) -> Dict[str, bytes]:
match = regex.fullmatch(data)
if not match:
if format_args:
msg = msg.format(*format_args)
raise LocalProtocolError(msg)
return match.groupdict()
# Sentinel values
#
# - Inherit identity-based comparison and hashing from object
# - Have a nice repr
# - Have a *bonus property*: type(sentinel) is sentinel
#
# The bonus property is useful if you want to take the return value from
# next_event() and do some sort of dispatch based on type(event).
_T_Sentinel = TypeVar("_T_Sentinel", bound="Sentinel")
class Sentinel(type):
def __new__(
cls: Type[_T_Sentinel],
name: str,
bases: Tuple[type, ...],
namespace: Dict[str, Any],
**kwds: Any
) -> _T_Sentinel:
assert bases == (Sentinel,)
v = super().__new__(cls, name, bases, namespace, **kwds)
v.__class__ = v # type: ignore
return v
def __repr__(self) -> str:
return self.__name__
# Used for methods, request targets, HTTP versions, header names, and header
# values. Accepts ascii-strings, or bytes/bytearray/memoryview/..., and always
# returns bytes.
def bytesify(s: Union[bytes, bytearray, memoryview, int, str]) -> bytes:
# Fast-path:
if type(s) is bytes:
return s
if isinstance(s, str):
s = s.encode("ascii")
if isinstance(s, int):
raise TypeError("expected bytes-like object, not int")
return bytes(s)

View File

@ -0,0 +1,16 @@
# This file must be kept very simple, because it is consumed from several
# places -- it is imported by h11/__init__.py, execfile'd by setup.py, etc.
# We use a simple scheme:
# 1.0.0 -> 1.0.0+dev -> 1.1.0 -> 1.1.0+dev
# where the +dev versions are never released into the wild, they're just what
# we stick into the VCS in between releases.
#
# This is compatible with PEP 440:
# http://legacy.python.org/dev/peps/pep-0440/
# via the use of the "local suffix" "+dev", which is disallowed on index
# servers and causes 1.0.0+dev to sort after plain 1.0.0, which is what we
# want. (Contrast with the special suffix 1.0.0.dev, which sorts *before*
# 1.0.0.)
__version__ = "0.14.0"

View File

@ -0,0 +1,145 @@
# Code to read HTTP data
#
# Strategy: each writer takes an event + a write-some-bytes function, which is
# calls.
#
# WRITERS is a dict describing how to pick a reader. It maps states to either:
# - a writer
# - or, for body writers, a dict of framin-dependent writer factories
from typing import Any, Callable, Dict, List, Tuple, Type, Union
from ._events import Data, EndOfMessage, Event, InformationalResponse, Request, Response
from ._headers import Headers
from ._state import CLIENT, IDLE, SEND_BODY, SEND_RESPONSE, SERVER
from ._util import LocalProtocolError, Sentinel
__all__ = ["WRITERS"]
Writer = Callable[[bytes], Any]
def write_headers(headers: Headers, write: Writer) -> None:
# "Since the Host field-value is critical information for handling a
# request, a user agent SHOULD generate Host as the first header field
# following the request-line." - RFC 7230
raw_items = headers._full_items
for raw_name, name, value in raw_items:
if name == b"host":
write(b"%s: %s\r\n" % (raw_name, value))
for raw_name, name, value in raw_items:
if name != b"host":
write(b"%s: %s\r\n" % (raw_name, value))
write(b"\r\n")
def write_request(request: Request, write: Writer) -> None:
if request.http_version != b"1.1":
raise LocalProtocolError("I only send HTTP/1.1")
write(b"%s %s HTTP/1.1\r\n" % (request.method, request.target))
write_headers(request.headers, write)
# Shared between InformationalResponse and Response
def write_any_response(
response: Union[InformationalResponse, Response], write: Writer
) -> None:
if response.http_version != b"1.1":
raise LocalProtocolError("I only send HTTP/1.1")
status_bytes = str(response.status_code).encode("ascii")
# We don't bother sending ascii status messages like "OK"; they're
# optional and ignored by the protocol. (But the space after the numeric
# status code is mandatory.)
#
# XX FIXME: could at least make an effort to pull out the status message
# from stdlib's http.HTTPStatus table. Or maybe just steal their enums
# (either by import or copy/paste). We already accept them as status codes
# since they're of type IntEnum < int.
write(b"HTTP/1.1 %s %s\r\n" % (status_bytes, response.reason))
write_headers(response.headers, write)
class BodyWriter:
def __call__(self, event: Event, write: Writer) -> None:
if type(event) is Data:
self.send_data(event.data, write)
elif type(event) is EndOfMessage:
self.send_eom(event.headers, write)
else: # pragma: no cover
assert False
def send_data(self, data: bytes, write: Writer) -> None:
pass
def send_eom(self, headers: Headers, write: Writer) -> None:
pass
#
# These are all careful not to do anything to 'data' except call len(data) and
# write(data). This allows us to transparently pass-through funny objects,
# like placeholder objects referring to files on disk that will be sent via
# sendfile(2).
#
class ContentLengthWriter(BodyWriter):
def __init__(self, length: int) -> None:
self._length = length
def send_data(self, data: bytes, write: Writer) -> None:
self._length -= len(data)
if self._length < 0:
raise LocalProtocolError("Too much data for declared Content-Length")
write(data)
def send_eom(self, headers: Headers, write: Writer) -> None:
if self._length != 0:
raise LocalProtocolError("Too little data for declared Content-Length")
if headers:
raise LocalProtocolError("Content-Length and trailers don't mix")
class ChunkedWriter(BodyWriter):
def send_data(self, data: bytes, write: Writer) -> None:
# if we encoded 0-length data in the naive way, it would look like an
# end-of-message.
if not data:
return
write(b"%x\r\n" % len(data))
write(data)
write(b"\r\n")
def send_eom(self, headers: Headers, write: Writer) -> None:
write(b"0\r\n")
write_headers(headers, write)
class Http10Writer(BodyWriter):
def send_data(self, data: bytes, write: Writer) -> None:
write(data)
def send_eom(self, headers: Headers, write: Writer) -> None:
if headers:
raise LocalProtocolError("can't send trailers to HTTP/1.0 client")
# no need to close the socket ourselves, that will be taken care of by
# Connection: close machinery
WritersType = Dict[
Union[Tuple[Type[Sentinel], Type[Sentinel]], Type[Sentinel]],
Union[
Dict[str, Type[BodyWriter]],
Callable[[Union[InformationalResponse, Response], Writer], None],
Callable[[Request, Writer], None],
],
]
WRITERS: WritersType = {
(CLIENT, IDLE): write_request,
(SERVER, IDLE): write_any_response,
(SERVER, SEND_RESPONSE): write_any_response,
SEND_BODY: {
"chunked": ChunkedWriter,
"content-length": ContentLengthWriter,
"http/1.0": Http10Writer,
},
}

View File

@ -0,0 +1 @@
Marker

View File

@ -0,0 +1 @@
92b12bc045050b55b848d37167a1a63947c364579889ce1d39788e45e9fac9e5

View File

@ -0,0 +1,101 @@
from typing import cast, List, Type, Union, ValuesView
from .._connection import Connection, NEED_DATA, PAUSED
from .._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from .._state import CLIENT, CLOSED, DONE, MUST_CLOSE, SERVER
from .._util import Sentinel
try:
from typing import Literal
except ImportError:
from typing_extensions import Literal # type: ignore
def get_all_events(conn: Connection) -> List[Event]:
got_events = []
while True:
event = conn.next_event()
if event in (NEED_DATA, PAUSED):
break
event = cast(Event, event)
got_events.append(event)
if type(event) is ConnectionClosed:
break
return got_events
def receive_and_get(conn: Connection, data: bytes) -> List[Event]:
conn.receive_data(data)
return get_all_events(conn)
# Merges adjacent Data events, converts payloads to bytestrings, and removes
# chunk boundaries.
def normalize_data_events(in_events: List[Event]) -> List[Event]:
out_events: List[Event] = []
for event in in_events:
if type(event) is Data:
event = Data(data=bytes(event.data), chunk_start=False, chunk_end=False)
if out_events and type(out_events[-1]) is type(event) is Data:
out_events[-1] = Data(
data=out_events[-1].data + event.data,
chunk_start=out_events[-1].chunk_start,
chunk_end=out_events[-1].chunk_end,
)
else:
out_events.append(event)
return out_events
# Given that we want to write tests that push some events through a Connection
# and check that its state updates appropriately... we might as make a habit
# of pushing them through two Connections with a fake network link in
# between.
class ConnectionPair:
def __init__(self) -> None:
self.conn = {CLIENT: Connection(CLIENT), SERVER: Connection(SERVER)}
self.other = {CLIENT: SERVER, SERVER: CLIENT}
@property
def conns(self) -> ValuesView[Connection]:
return self.conn.values()
# expect="match" if expect=send_events; expect=[...] to say what expected
def send(
self,
role: Type[Sentinel],
send_events: Union[List[Event], Event],
expect: Union[List[Event], Event, Literal["match"]] = "match",
) -> bytes:
if not isinstance(send_events, list):
send_events = [send_events]
data = b""
closed = False
for send_event in send_events:
new_data = self.conn[role].send(send_event)
if new_data is None:
closed = True
else:
data += new_data
# send uses b"" to mean b"", and None to mean closed
# receive uses b"" to mean closed, and None to mean "try again"
# so we have to translate between the two conventions
if data:
self.conn[self.other[role]].receive_data(data)
if closed:
self.conn[self.other[role]].receive_data(b"")
got_events = get_all_events(self.conn[self.other[role]])
if expect == "match":
expect = send_events
if not isinstance(expect, list):
expect = [expect]
assert got_events == expect
return data

View File

@ -0,0 +1,115 @@
import json
import os.path
import socket
import socketserver
import threading
from contextlib import closing, contextmanager
from http.server import SimpleHTTPRequestHandler
from typing import Callable, Generator
from urllib.request import urlopen
import h11
@contextmanager
def socket_server(
handler: Callable[..., socketserver.BaseRequestHandler]
) -> Generator[socketserver.TCPServer, None, None]:
httpd = socketserver.TCPServer(("127.0.0.1", 0), handler)
thread = threading.Thread(
target=httpd.serve_forever, kwargs={"poll_interval": 0.01}
)
thread.daemon = True
try:
thread.start()
yield httpd
finally:
httpd.shutdown()
test_file_path = os.path.join(os.path.dirname(__file__), "data/test-file")
with open(test_file_path, "rb") as f:
test_file_data = f.read()
class SingleMindedRequestHandler(SimpleHTTPRequestHandler):
def translate_path(self, path: str) -> str:
return test_file_path
def test_h11_as_client() -> None:
with socket_server(SingleMindedRequestHandler) as httpd:
with closing(socket.create_connection(httpd.server_address)) as s:
c = h11.Connection(h11.CLIENT)
s.sendall(
c.send( # type: ignore[arg-type]
h11.Request(
method="GET", target="/foo", headers=[("Host", "localhost")]
)
)
)
s.sendall(c.send(h11.EndOfMessage())) # type: ignore[arg-type]
data = bytearray()
while True:
event = c.next_event()
print(event)
if event is h11.NEED_DATA:
# Use a small read buffer to make things more challenging
# and exercise more paths :-)
c.receive_data(s.recv(10))
continue
if type(event) is h11.Response:
assert event.status_code == 200
if type(event) is h11.Data:
data += event.data
if type(event) is h11.EndOfMessage:
break
assert bytes(data) == test_file_data
class H11RequestHandler(socketserver.BaseRequestHandler):
def handle(self) -> None:
with closing(self.request) as s:
c = h11.Connection(h11.SERVER)
request = None
while True:
event = c.next_event()
if event is h11.NEED_DATA:
# Use a small read buffer to make things more challenging
# and exercise more paths :-)
c.receive_data(s.recv(10))
continue
if type(event) is h11.Request:
request = event
if type(event) is h11.EndOfMessage:
break
assert request is not None
info = json.dumps(
{
"method": request.method.decode("ascii"),
"target": request.target.decode("ascii"),
"headers": {
name.decode("ascii"): value.decode("ascii")
for (name, value) in request.headers
},
}
)
s.sendall(c.send(h11.Response(status_code=200, headers=[]))) # type: ignore[arg-type]
s.sendall(c.send(h11.Data(data=info.encode("ascii"))))
s.sendall(c.send(h11.EndOfMessage()))
def test_h11_as_server() -> None:
with socket_server(H11RequestHandler) as httpd:
host, port = httpd.server_address
url = "http://{}:{}/some-path".format(host, port)
with closing(urlopen(url)) as f:
assert f.getcode() == 200
data = f.read()
info = json.loads(data.decode("ascii"))
print(info)
assert info["method"] == "GET"
assert info["target"] == "/some-path"
assert "urllib" in info["headers"]["user-agent"]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,150 @@
from http import HTTPStatus
import pytest
from .. import _events
from .._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from .._util import LocalProtocolError
def test_events() -> None:
with pytest.raises(LocalProtocolError):
# Missing Host:
req = Request(
method="GET", target="/", headers=[("a", "b")], http_version="1.1"
)
# But this is okay (HTTP/1.0)
req = Request(method="GET", target="/", headers=[("a", "b")], http_version="1.0")
# fields are normalized
assert req.method == b"GET"
assert req.target == b"/"
assert req.headers == [(b"a", b"b")]
assert req.http_version == b"1.0"
# This is also okay -- has a Host (with weird capitalization, which is ok)
req = Request(
method="GET",
target="/",
headers=[("a", "b"), ("hOSt", "example.com")],
http_version="1.1",
)
# we normalize header capitalization
assert req.headers == [(b"a", b"b"), (b"host", b"example.com")]
# Multiple host is bad too
with pytest.raises(LocalProtocolError):
req = Request(
method="GET",
target="/",
headers=[("Host", "a"), ("Host", "a")],
http_version="1.1",
)
# Even for HTTP/1.0
with pytest.raises(LocalProtocolError):
req = Request(
method="GET",
target="/",
headers=[("Host", "a"), ("Host", "a")],
http_version="1.0",
)
# Header values are validated
for bad_char in "\x00\r\n\f\v":
with pytest.raises(LocalProtocolError):
req = Request(
method="GET",
target="/",
headers=[("Host", "a"), ("Foo", "asd" + bad_char)],
http_version="1.0",
)
# But for compatibility we allow non-whitespace control characters, even
# though they're forbidden by the spec.
Request(
method="GET",
target="/",
headers=[("Host", "a"), ("Foo", "asd\x01\x02\x7f")],
http_version="1.0",
)
# Request target is validated
for bad_byte in b"\x00\x20\x7f\xee":
target = bytearray(b"/")
target.append(bad_byte)
with pytest.raises(LocalProtocolError):
Request(
method="GET", target=target, headers=[("Host", "a")], http_version="1.1"
)
# Request method is validated
with pytest.raises(LocalProtocolError):
Request(
method="GET / HTTP/1.1",
target=target,
headers=[("Host", "a")],
http_version="1.1",
)
ir = InformationalResponse(status_code=100, headers=[("Host", "a")])
assert ir.status_code == 100
assert ir.headers == [(b"host", b"a")]
assert ir.http_version == b"1.1"
with pytest.raises(LocalProtocolError):
InformationalResponse(status_code=200, headers=[("Host", "a")])
resp = Response(status_code=204, headers=[], http_version="1.0") # type: ignore[arg-type]
assert resp.status_code == 204
assert resp.headers == []
assert resp.http_version == b"1.0"
with pytest.raises(LocalProtocolError):
resp = Response(status_code=100, headers=[], http_version="1.0") # type: ignore[arg-type]
with pytest.raises(LocalProtocolError):
Response(status_code="100", headers=[], http_version="1.0") # type: ignore[arg-type]
with pytest.raises(LocalProtocolError):
InformationalResponse(status_code=b"100", headers=[], http_version="1.0") # type: ignore[arg-type]
d = Data(data=b"asdf")
assert d.data == b"asdf"
eom = EndOfMessage()
assert eom.headers == []
cc = ConnectionClosed()
assert repr(cc) == "ConnectionClosed()"
def test_intenum_status_code() -> None:
# https://github.com/python-hyper/h11/issues/72
r = Response(status_code=HTTPStatus.OK, headers=[], http_version="1.0") # type: ignore[arg-type]
assert r.status_code == HTTPStatus.OK
assert type(r.status_code) is not type(HTTPStatus.OK)
assert type(r.status_code) is int
def test_header_casing() -> None:
r = Request(
method="GET",
target="/",
headers=[("Host", "example.org"), ("Connection", "keep-alive")],
http_version="1.1",
)
assert len(r.headers) == 2
assert r.headers[0] == (b"host", b"example.org")
assert r.headers == [(b"host", b"example.org"), (b"connection", b"keep-alive")]
assert r.headers.raw_items() == [
(b"Host", b"example.org"),
(b"Connection", b"keep-alive"),
]

View File

@ -0,0 +1,157 @@
import pytest
from .._events import Request
from .._headers import (
get_comma_header,
has_expect_100_continue,
Headers,
normalize_and_validate,
set_comma_header,
)
from .._util import LocalProtocolError
def test_normalize_and_validate() -> None:
assert normalize_and_validate([("foo", "bar")]) == [(b"foo", b"bar")]
assert normalize_and_validate([(b"foo", b"bar")]) == [(b"foo", b"bar")]
# no leading/trailing whitespace in names
with pytest.raises(LocalProtocolError):
normalize_and_validate([(b"foo ", "bar")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([(b" foo", "bar")])
# no weird characters in names
with pytest.raises(LocalProtocolError) as excinfo:
normalize_and_validate([(b"foo bar", b"baz")])
assert "foo bar" in str(excinfo.value)
with pytest.raises(LocalProtocolError):
normalize_and_validate([(b"foo\x00bar", b"baz")])
# Not even 8-bit characters:
with pytest.raises(LocalProtocolError):
normalize_and_validate([(b"foo\xffbar", b"baz")])
# And not even the control characters we allow in values:
with pytest.raises(LocalProtocolError):
normalize_and_validate([(b"foo\x01bar", b"baz")])
# no return or NUL characters in values
with pytest.raises(LocalProtocolError) as excinfo:
normalize_and_validate([("foo", "bar\rbaz")])
assert "bar\\rbaz" in str(excinfo.value)
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", "bar\nbaz")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", "bar\x00baz")])
# no leading/trailing whitespace
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", "barbaz ")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", " barbaz")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", "barbaz\t")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("foo", "\tbarbaz")])
# content-length
assert normalize_and_validate([("Content-Length", "1")]) == [
(b"content-length", b"1")
]
with pytest.raises(LocalProtocolError):
normalize_and_validate([("Content-Length", "asdf")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("Content-Length", "1x")])
with pytest.raises(LocalProtocolError):
normalize_and_validate([("Content-Length", "1"), ("Content-Length", "2")])
assert normalize_and_validate(
[("Content-Length", "0"), ("Content-Length", "0")]
) == [(b"content-length", b"0")]
assert normalize_and_validate([("Content-Length", "0 , 0")]) == [
(b"content-length", b"0")
]
with pytest.raises(LocalProtocolError):
normalize_and_validate(
[("Content-Length", "1"), ("Content-Length", "1"), ("Content-Length", "2")]
)
with pytest.raises(LocalProtocolError):
normalize_and_validate([("Content-Length", "1 , 1,2")])
# transfer-encoding
assert normalize_and_validate([("Transfer-Encoding", "chunked")]) == [
(b"transfer-encoding", b"chunked")
]
assert normalize_and_validate([("Transfer-Encoding", "cHuNkEd")]) == [
(b"transfer-encoding", b"chunked")
]
with pytest.raises(LocalProtocolError) as excinfo:
normalize_and_validate([("Transfer-Encoding", "gzip")])
assert excinfo.value.error_status_hint == 501 # Not Implemented
with pytest.raises(LocalProtocolError) as excinfo:
normalize_and_validate(
[("Transfer-Encoding", "chunked"), ("Transfer-Encoding", "gzip")]
)
assert excinfo.value.error_status_hint == 501 # Not Implemented
def test_get_set_comma_header() -> None:
headers = normalize_and_validate(
[
("Connection", "close"),
("whatever", "something"),
("connectiON", "fOo,, , BAR"),
]
)
assert get_comma_header(headers, b"connection") == [b"close", b"foo", b"bar"]
headers = set_comma_header(headers, b"newthing", ["a", "b"]) # type: ignore
with pytest.raises(LocalProtocolError):
set_comma_header(headers, b"newthing", [" a", "b"]) # type: ignore
assert headers == [
(b"connection", b"close"),
(b"whatever", b"something"),
(b"connection", b"fOo,, , BAR"),
(b"newthing", b"a"),
(b"newthing", b"b"),
]
headers = set_comma_header(headers, b"whatever", ["different thing"]) # type: ignore
assert headers == [
(b"connection", b"close"),
(b"connection", b"fOo,, , BAR"),
(b"newthing", b"a"),
(b"newthing", b"b"),
(b"whatever", b"different thing"),
]
def test_has_100_continue() -> None:
assert has_expect_100_continue(
Request(
method="GET",
target="/",
headers=[("Host", "example.com"), ("Expect", "100-continue")],
)
)
assert not has_expect_100_continue(
Request(method="GET", target="/", headers=[("Host", "example.com")])
)
# Case insensitive
assert has_expect_100_continue(
Request(
method="GET",
target="/",
headers=[("Host", "example.com"), ("Expect", "100-Continue")],
)
)
# Doesn't work in HTTP/1.0
assert not has_expect_100_continue(
Request(
method="GET",
target="/",
headers=[("Host", "example.com"), ("Expect", "100-continue")],
http_version="1.0",
)
)

View File

@ -0,0 +1,32 @@
from .._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from .helpers import normalize_data_events
def test_normalize_data_events() -> None:
assert normalize_data_events(
[
Data(data=bytearray(b"1")),
Data(data=b"2"),
Response(status_code=200, headers=[]), # type: ignore[arg-type]
Data(data=b"3"),
Data(data=b"4"),
EndOfMessage(),
Data(data=b"5"),
Data(data=b"6"),
Data(data=b"7"),
]
) == [
Data(data=b"12"),
Response(status_code=200, headers=[]), # type: ignore[arg-type]
Data(data=b"34"),
EndOfMessage(),
Data(data=b"567"),
]

View File

@ -0,0 +1,572 @@
from typing import Any, Callable, Generator, List
import pytest
from .._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from .._headers import Headers, normalize_and_validate
from .._readers import (
_obsolete_line_fold,
ChunkedReader,
ContentLengthReader,
Http10Reader,
READERS,
)
from .._receivebuffer import ReceiveBuffer
from .._state import (
CLIENT,
CLOSED,
DONE,
IDLE,
MIGHT_SWITCH_PROTOCOL,
MUST_CLOSE,
SEND_BODY,
SEND_RESPONSE,
SERVER,
SWITCHED_PROTOCOL,
)
from .._util import LocalProtocolError
from .._writers import (
ChunkedWriter,
ContentLengthWriter,
Http10Writer,
write_any_response,
write_headers,
write_request,
WRITERS,
)
from .helpers import normalize_data_events
SIMPLE_CASES = [
(
(CLIENT, IDLE),
Request(
method="GET",
target="/a",
headers=[("Host", "foo"), ("Connection", "close")],
),
b"GET /a HTTP/1.1\r\nHost: foo\r\nConnection: close\r\n\r\n",
),
(
(SERVER, SEND_RESPONSE),
Response(status_code=200, headers=[("Connection", "close")], reason=b"OK"),
b"HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n",
),
(
(SERVER, SEND_RESPONSE),
Response(status_code=200, headers=[], reason=b"OK"), # type: ignore[arg-type]
b"HTTP/1.1 200 OK\r\n\r\n",
),
(
(SERVER, SEND_RESPONSE),
InformationalResponse(
status_code=101, headers=[("Upgrade", "websocket")], reason=b"Upgrade"
),
b"HTTP/1.1 101 Upgrade\r\nUpgrade: websocket\r\n\r\n",
),
(
(SERVER, SEND_RESPONSE),
InformationalResponse(status_code=101, headers=[], reason=b"Upgrade"), # type: ignore[arg-type]
b"HTTP/1.1 101 Upgrade\r\n\r\n",
),
]
def dowrite(writer: Callable[..., None], obj: Any) -> bytes:
got_list: List[bytes] = []
writer(obj, got_list.append)
return b"".join(got_list)
def tw(writer: Any, obj: Any, expected: Any) -> None:
got = dowrite(writer, obj)
assert got == expected
def makebuf(data: bytes) -> ReceiveBuffer:
buf = ReceiveBuffer()
buf += data
return buf
def tr(reader: Any, data: bytes, expected: Any) -> None:
def check(got: Any) -> None:
assert got == expected
# Headers should always be returned as bytes, not e.g. bytearray
# https://github.com/python-hyper/wsproto/pull/54#issuecomment-377709478
for name, value in getattr(got, "headers", []):
assert type(name) is bytes
assert type(value) is bytes
# Simple: consume whole thing
buf = makebuf(data)
check(reader(buf))
assert not buf
# Incrementally growing buffer
buf = ReceiveBuffer()
for i in range(len(data)):
assert reader(buf) is None
buf += data[i : i + 1]
check(reader(buf))
# Trailing data
buf = makebuf(data)
buf += b"trailing"
check(reader(buf))
assert bytes(buf) == b"trailing"
def test_writers_simple() -> None:
for ((role, state), event, binary) in SIMPLE_CASES:
tw(WRITERS[role, state], event, binary)
def test_readers_simple() -> None:
for ((role, state), event, binary) in SIMPLE_CASES:
tr(READERS[role, state], binary, event)
def test_writers_unusual() -> None:
# Simple test of the write_headers utility routine
tw(
write_headers,
normalize_and_validate([("foo", "bar"), ("baz", "quux")]),
b"foo: bar\r\nbaz: quux\r\n\r\n",
)
tw(write_headers, Headers([]), b"\r\n")
# We understand HTTP/1.0, but we don't speak it
with pytest.raises(LocalProtocolError):
tw(
write_request,
Request(
method="GET",
target="/",
headers=[("Host", "foo"), ("Connection", "close")],
http_version="1.0",
),
None,
)
with pytest.raises(LocalProtocolError):
tw(
write_any_response,
Response(
status_code=200, headers=[("Connection", "close")], http_version="1.0"
),
None,
)
def test_readers_unusual() -> None:
# Reading HTTP/1.0
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.0\r\nSome: header\r\n\r\n",
Request(
method="HEAD",
target="/foo",
headers=[("Some", "header")],
http_version="1.0",
),
)
# check no-headers, since it's only legal with HTTP/1.0
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.0\r\n\r\n",
Request(method="HEAD", target="/foo", headers=[], http_version="1.0"), # type: ignore[arg-type]
)
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.0 200 OK\r\nSome: header\r\n\r\n",
Response(
status_code=200,
headers=[("Some", "header")],
http_version="1.0",
reason=b"OK",
),
)
# single-character header values (actually disallowed by the ABNF in RFC
# 7230 -- this is a bug in the standard that we originally copied...)
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.0 200 OK\r\n" b"Foo: a a a a a \r\n\r\n",
Response(
status_code=200,
headers=[("Foo", "a a a a a")],
http_version="1.0",
reason=b"OK",
),
)
# Empty headers -- also legal
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.0 200 OK\r\n" b"Foo:\r\n\r\n",
Response(
status_code=200, headers=[("Foo", "")], http_version="1.0", reason=b"OK"
),
)
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.0 200 OK\r\n" b"Foo: \t \t \r\n\r\n",
Response(
status_code=200, headers=[("Foo", "")], http_version="1.0", reason=b"OK"
),
)
# Tolerate broken servers that leave off the response code
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.0 200\r\n" b"Foo: bar\r\n\r\n",
Response(
status_code=200, headers=[("Foo", "bar")], http_version="1.0", reason=b""
),
)
# Tolerate headers line endings (\r\n and \n)
# \n\r\b between headers and body
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.1 200 OK\r\nSomeHeader: val\n\r\n",
Response(
status_code=200,
headers=[("SomeHeader", "val")],
http_version="1.1",
reason="OK",
),
)
# delimited only with \n
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.1 200 OK\nSomeHeader1: val1\nSomeHeader2: val2\n\n",
Response(
status_code=200,
headers=[("SomeHeader1", "val1"), ("SomeHeader2", "val2")],
http_version="1.1",
reason="OK",
),
)
# mixed \r\n and \n
tr(
READERS[SERVER, SEND_RESPONSE],
b"HTTP/1.1 200 OK\r\nSomeHeader1: val1\nSomeHeader2: val2\n\r\n",
Response(
status_code=200,
headers=[("SomeHeader1", "val1"), ("SomeHeader2", "val2")],
http_version="1.1",
reason="OK",
),
)
# obsolete line folding
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n"
b"Host: example.com\r\n"
b"Some: multi-line\r\n"
b" header\r\n"
b"\tnonsense\r\n"
b" \t \t\tI guess\r\n"
b"Connection: close\r\n"
b"More-nonsense: in the\r\n"
b" last header \r\n\r\n",
Request(
method="HEAD",
target="/foo",
headers=[
("Host", "example.com"),
("Some", "multi-line header nonsense I guess"),
("Connection", "close"),
("More-nonsense", "in the last header"),
],
),
)
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n" b" folded: line\r\n\r\n",
None,
)
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n" b"foo : line\r\n\r\n",
None,
)
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n" b"foo\t: line\r\n\r\n",
None,
)
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n" b"foo\t: line\r\n\r\n",
None,
)
with pytest.raises(LocalProtocolError):
tr(READERS[CLIENT, IDLE], b"HEAD /foo HTTP/1.1\r\n" b": line\r\n\r\n", None)
def test__obsolete_line_fold_bytes() -> None:
# _obsolete_line_fold has a defensive cast to bytearray, which is
# necessary to protect against O(n^2) behavior in case anyone ever passes
# in regular bytestrings... but right now we never pass in regular
# bytestrings. so this test just exists to get some coverage on that
# defensive cast.
assert list(_obsolete_line_fold([b"aaa", b"bbb", b" ccc", b"ddd"])) == [
b"aaa",
bytearray(b"bbb ccc"),
b"ddd",
]
def _run_reader_iter(
reader: Any, buf: bytes, do_eof: bool
) -> Generator[Any, None, None]:
while True:
event = reader(buf)
if event is None:
break
yield event
# body readers have undefined behavior after returning EndOfMessage,
# because this changes the state so they don't get called again
if type(event) is EndOfMessage:
break
if do_eof:
assert not buf
yield reader.read_eof()
def _run_reader(*args: Any) -> List[Event]:
events = list(_run_reader_iter(*args))
return normalize_data_events(events)
def t_body_reader(thunk: Any, data: bytes, expected: Any, do_eof: bool = False) -> None:
# Simple: consume whole thing
print("Test 1")
buf = makebuf(data)
assert _run_reader(thunk(), buf, do_eof) == expected
# Incrementally growing buffer
print("Test 2")
reader = thunk()
buf = ReceiveBuffer()
events = []
for i in range(len(data)):
events += _run_reader(reader, buf, False)
buf += data[i : i + 1]
events += _run_reader(reader, buf, do_eof)
assert normalize_data_events(events) == expected
is_complete = any(type(event) is EndOfMessage for event in expected)
if is_complete and not do_eof:
buf = makebuf(data + b"trailing")
assert _run_reader(thunk(), buf, False) == expected
def test_ContentLengthReader() -> None:
t_body_reader(lambda: ContentLengthReader(0), b"", [EndOfMessage()])
t_body_reader(
lambda: ContentLengthReader(10),
b"0123456789",
[Data(data=b"0123456789"), EndOfMessage()],
)
def test_Http10Reader() -> None:
t_body_reader(Http10Reader, b"", [EndOfMessage()], do_eof=True)
t_body_reader(Http10Reader, b"asdf", [Data(data=b"asdf")], do_eof=False)
t_body_reader(
Http10Reader, b"asdf", [Data(data=b"asdf"), EndOfMessage()], do_eof=True
)
def test_ChunkedReader() -> None:
t_body_reader(ChunkedReader, b"0\r\n\r\n", [EndOfMessage()])
t_body_reader(
ChunkedReader,
b"0\r\nSome: header\r\n\r\n",
[EndOfMessage(headers=[("Some", "header")])],
)
t_body_reader(
ChunkedReader,
b"5\r\n01234\r\n"
+ b"10\r\n0123456789abcdef\r\n"
+ b"0\r\n"
+ b"Some: header\r\n\r\n",
[
Data(data=b"012340123456789abcdef"),
EndOfMessage(headers=[("Some", "header")]),
],
)
t_body_reader(
ChunkedReader,
b"5\r\n01234\r\n" + b"10\r\n0123456789abcdef\r\n" + b"0\r\n\r\n",
[Data(data=b"012340123456789abcdef"), EndOfMessage()],
)
# handles upper and lowercase hex
t_body_reader(
ChunkedReader,
b"aA\r\n" + b"x" * 0xAA + b"\r\n" + b"0\r\n\r\n",
[Data(data=b"x" * 0xAA), EndOfMessage()],
)
# refuses arbitrarily long chunk integers
with pytest.raises(LocalProtocolError):
# Technically this is legal HTTP/1.1, but we refuse to process chunk
# sizes that don't fit into 20 characters of hex
t_body_reader(ChunkedReader, b"9" * 100 + b"\r\nxxx", [Data(data=b"xxx")])
# refuses garbage in the chunk count
with pytest.raises(LocalProtocolError):
t_body_reader(ChunkedReader, b"10\x00\r\nxxx", None)
# handles (and discards) "chunk extensions" omg wtf
t_body_reader(
ChunkedReader,
b"5; hello=there\r\n"
+ b"xxxxx"
+ b"\r\n"
+ b'0; random="junk"; some=more; canbe=lonnnnngg\r\n\r\n',
[Data(data=b"xxxxx"), EndOfMessage()],
)
t_body_reader(
ChunkedReader,
b"5 \r\n01234\r\n" + b"0\r\n\r\n",
[Data(data=b"01234"), EndOfMessage()],
)
def test_ContentLengthWriter() -> None:
w = ContentLengthWriter(5)
assert dowrite(w, Data(data=b"123")) == b"123"
assert dowrite(w, Data(data=b"45")) == b"45"
assert dowrite(w, EndOfMessage()) == b""
w = ContentLengthWriter(5)
with pytest.raises(LocalProtocolError):
dowrite(w, Data(data=b"123456"))
w = ContentLengthWriter(5)
dowrite(w, Data(data=b"123"))
with pytest.raises(LocalProtocolError):
dowrite(w, Data(data=b"456"))
w = ContentLengthWriter(5)
dowrite(w, Data(data=b"123"))
with pytest.raises(LocalProtocolError):
dowrite(w, EndOfMessage())
w = ContentLengthWriter(5)
dowrite(w, Data(data=b"123")) == b"123"
dowrite(w, Data(data=b"45")) == b"45"
with pytest.raises(LocalProtocolError):
dowrite(w, EndOfMessage(headers=[("Etag", "asdf")]))
def test_ChunkedWriter() -> None:
w = ChunkedWriter()
assert dowrite(w, Data(data=b"aaa")) == b"3\r\naaa\r\n"
assert dowrite(w, Data(data=b"a" * 20)) == b"14\r\n" + b"a" * 20 + b"\r\n"
assert dowrite(w, Data(data=b"")) == b""
assert dowrite(w, EndOfMessage()) == b"0\r\n\r\n"
assert (
dowrite(w, EndOfMessage(headers=[("Etag", "asdf"), ("a", "b")]))
== b"0\r\nEtag: asdf\r\na: b\r\n\r\n"
)
def test_Http10Writer() -> None:
w = Http10Writer()
assert dowrite(w, Data(data=b"1234")) == b"1234"
assert dowrite(w, EndOfMessage()) == b""
with pytest.raises(LocalProtocolError):
dowrite(w, EndOfMessage(headers=[("Etag", "asdf")]))
def test_reject_garbage_after_request_line() -> None:
with pytest.raises(LocalProtocolError):
tr(READERS[SERVER, SEND_RESPONSE], b"HTTP/1.0 200 OK\x00xxxx\r\n\r\n", None)
def test_reject_garbage_after_response_line() -> None:
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1 xxxxxx\r\n" b"Host: a\r\n\r\n",
None,
)
def test_reject_garbage_in_header_line() -> None:
with pytest.raises(LocalProtocolError):
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n" b"Host: foo\x00bar\r\n\r\n",
None,
)
def test_reject_non_vchar_in_path() -> None:
for bad_char in b"\x00\x20\x7f\xee":
message = bytearray(b"HEAD /")
message.append(bad_char)
message.extend(b" HTTP/1.1\r\nHost: foobar\r\n\r\n")
with pytest.raises(LocalProtocolError):
tr(READERS[CLIENT, IDLE], message, None)
# https://github.com/python-hyper/h11/issues/57
def test_allow_some_garbage_in_cookies() -> None:
tr(
READERS[CLIENT, IDLE],
b"HEAD /foo HTTP/1.1\r\n"
b"Host: foo\r\n"
b"Set-Cookie: ___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900\r\n"
b"\r\n",
Request(
method="HEAD",
target="/foo",
headers=[
("Host", "foo"),
("Set-Cookie", "___utmvafIumyLc=kUd\x01UpAt; path=/; Max-Age=900"),
],
),
)
def test_host_comes_first() -> None:
tw(
write_headers,
normalize_and_validate([("foo", "bar"), ("Host", "example.com")]),
b"Host: example.com\r\nfoo: bar\r\n\r\n",
)

View File

@ -0,0 +1,135 @@
import re
from typing import Tuple
import pytest
from .._receivebuffer import ReceiveBuffer
def test_receivebuffer() -> None:
b = ReceiveBuffer()
assert not b
assert len(b) == 0
assert bytes(b) == b""
b += b"123"
assert b
assert len(b) == 3
assert bytes(b) == b"123"
assert bytes(b) == b"123"
assert b.maybe_extract_at_most(2) == b"12"
assert b
assert len(b) == 1
assert bytes(b) == b"3"
assert bytes(b) == b"3"
assert b.maybe_extract_at_most(10) == b"3"
assert bytes(b) == b""
assert b.maybe_extract_at_most(10) is None
assert not b
################################################################
# maybe_extract_until_next
################################################################
b += b"123\n456\r\n789\r\n"
assert b.maybe_extract_next_line() == b"123\n456\r\n"
assert bytes(b) == b"789\r\n"
assert b.maybe_extract_next_line() == b"789\r\n"
assert bytes(b) == b""
b += b"12\r"
assert b.maybe_extract_next_line() is None
assert bytes(b) == b"12\r"
b += b"345\n\r"
assert b.maybe_extract_next_line() is None
assert bytes(b) == b"12\r345\n\r"
# here we stopped at the middle of b"\r\n" delimiter
b += b"\n6789aaa123\r\n"
assert b.maybe_extract_next_line() == b"12\r345\n\r\n"
assert b.maybe_extract_next_line() == b"6789aaa123\r\n"
assert b.maybe_extract_next_line() is None
assert bytes(b) == b""
################################################################
# maybe_extract_lines
################################################################
b += b"123\r\na: b\r\nfoo:bar\r\n\r\ntrailing"
lines = b.maybe_extract_lines()
assert lines == [b"123", b"a: b", b"foo:bar"]
assert bytes(b) == b"trailing"
assert b.maybe_extract_lines() is None
b += b"\r\n\r"
assert b.maybe_extract_lines() is None
assert b.maybe_extract_at_most(100) == b"trailing\r\n\r"
assert not b
# Empty body case (as happens at the end of chunked encoding if there are
# no trailing headers, e.g.)
b += b"\r\ntrailing"
assert b.maybe_extract_lines() == []
assert bytes(b) == b"trailing"
@pytest.mark.parametrize(
"data",
[
pytest.param(
(
b"HTTP/1.1 200 OK\r\n",
b"Content-type: text/plain\r\n",
b"Connection: close\r\n",
b"\r\n",
b"Some body",
),
id="with_crlf_delimiter",
),
pytest.param(
(
b"HTTP/1.1 200 OK\n",
b"Content-type: text/plain\n",
b"Connection: close\n",
b"\n",
b"Some body",
),
id="with_lf_only_delimiter",
),
pytest.param(
(
b"HTTP/1.1 200 OK\n",
b"Content-type: text/plain\r\n",
b"Connection: close\n",
b"\n",
b"Some body",
),
id="with_mixed_crlf_and_lf",
),
],
)
def test_receivebuffer_for_invalid_delimiter(data: Tuple[bytes]) -> None:
b = ReceiveBuffer()
for line in data:
b += line
lines = b.maybe_extract_lines()
assert lines == [
b"HTTP/1.1 200 OK",
b"Content-type: text/plain",
b"Connection: close",
]
assert bytes(b) == b"Some body"

View File

@ -0,0 +1,271 @@
import pytest
from .._events import (
ConnectionClosed,
Data,
EndOfMessage,
Event,
InformationalResponse,
Request,
Response,
)
from .._state import (
_SWITCH_CONNECT,
_SWITCH_UPGRADE,
CLIENT,
CLOSED,
ConnectionState,
DONE,
IDLE,
MIGHT_SWITCH_PROTOCOL,
MUST_CLOSE,
SEND_BODY,
SEND_RESPONSE,
SERVER,
SWITCHED_PROTOCOL,
)
from .._util import LocalProtocolError
def test_ConnectionState() -> None:
cs = ConnectionState()
# Basic event-triggered transitions
assert cs.states == {CLIENT: IDLE, SERVER: IDLE}
cs.process_event(CLIENT, Request)
# The SERVER-Request special case:
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
# Illegal transitions raise an error and nothing happens
with pytest.raises(LocalProtocolError):
cs.process_event(CLIENT, Request)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
cs.process_event(SERVER, InformationalResponse)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
cs.process_event(SERVER, Response)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_BODY}
cs.process_event(CLIENT, EndOfMessage)
cs.process_event(SERVER, EndOfMessage)
assert cs.states == {CLIENT: DONE, SERVER: DONE}
# State-triggered transition
cs.process_event(SERVER, ConnectionClosed)
assert cs.states == {CLIENT: MUST_CLOSE, SERVER: CLOSED}
def test_ConnectionState_keep_alive() -> None:
# keep_alive = False
cs = ConnectionState()
cs.process_event(CLIENT, Request)
cs.process_keep_alive_disabled()
cs.process_event(CLIENT, EndOfMessage)
assert cs.states == {CLIENT: MUST_CLOSE, SERVER: SEND_RESPONSE}
cs.process_event(SERVER, Response)
cs.process_event(SERVER, EndOfMessage)
assert cs.states == {CLIENT: MUST_CLOSE, SERVER: MUST_CLOSE}
def test_ConnectionState_keep_alive_in_DONE() -> None:
# Check that if keep_alive is disabled when the CLIENT is already in DONE,
# then this is sufficient to immediately trigger the DONE -> MUST_CLOSE
# transition
cs = ConnectionState()
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
assert cs.states[CLIENT] is DONE
cs.process_keep_alive_disabled()
assert cs.states[CLIENT] is MUST_CLOSE
def test_ConnectionState_switch_denied() -> None:
for switch_type in (_SWITCH_CONNECT, _SWITCH_UPGRADE):
for deny_early in (True, False):
cs = ConnectionState()
cs.process_client_switch_proposal(switch_type)
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, Data)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
assert switch_type in cs.pending_switch_proposals
if deny_early:
# before client reaches DONE
cs.process_event(SERVER, Response)
assert not cs.pending_switch_proposals
cs.process_event(CLIENT, EndOfMessage)
if deny_early:
assert cs.states == {CLIENT: DONE, SERVER: SEND_BODY}
else:
assert cs.states == {
CLIENT: MIGHT_SWITCH_PROTOCOL,
SERVER: SEND_RESPONSE,
}
cs.process_event(SERVER, InformationalResponse)
assert cs.states == {
CLIENT: MIGHT_SWITCH_PROTOCOL,
SERVER: SEND_RESPONSE,
}
cs.process_event(SERVER, Response)
assert cs.states == {CLIENT: DONE, SERVER: SEND_BODY}
assert not cs.pending_switch_proposals
_response_type_for_switch = {
_SWITCH_UPGRADE: InformationalResponse,
_SWITCH_CONNECT: Response,
None: Response,
}
def test_ConnectionState_protocol_switch_accepted() -> None:
for switch_event in [_SWITCH_UPGRADE, _SWITCH_CONNECT]:
cs = ConnectionState()
cs.process_client_switch_proposal(switch_event)
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, Data)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
cs.process_event(CLIENT, EndOfMessage)
assert cs.states == {CLIENT: MIGHT_SWITCH_PROTOCOL, SERVER: SEND_RESPONSE}
cs.process_event(SERVER, InformationalResponse)
assert cs.states == {CLIENT: MIGHT_SWITCH_PROTOCOL, SERVER: SEND_RESPONSE}
cs.process_event(SERVER, _response_type_for_switch[switch_event], switch_event)
assert cs.states == {CLIENT: SWITCHED_PROTOCOL, SERVER: SWITCHED_PROTOCOL}
def test_ConnectionState_double_protocol_switch() -> None:
# CONNECT + Upgrade is legal! Very silly, but legal. So we support
# it. Because sometimes doing the silly thing is easier than not.
for server_switch in [None, _SWITCH_UPGRADE, _SWITCH_CONNECT]:
cs = ConnectionState()
cs.process_client_switch_proposal(_SWITCH_UPGRADE)
cs.process_client_switch_proposal(_SWITCH_CONNECT)
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
assert cs.states == {CLIENT: MIGHT_SWITCH_PROTOCOL, SERVER: SEND_RESPONSE}
cs.process_event(
SERVER, _response_type_for_switch[server_switch], server_switch
)
if server_switch is None:
assert cs.states == {CLIENT: DONE, SERVER: SEND_BODY}
else:
assert cs.states == {CLIENT: SWITCHED_PROTOCOL, SERVER: SWITCHED_PROTOCOL}
def test_ConnectionState_inconsistent_protocol_switch() -> None:
for client_switches, server_switch in [
([], _SWITCH_CONNECT),
([], _SWITCH_UPGRADE),
([_SWITCH_UPGRADE], _SWITCH_CONNECT),
([_SWITCH_CONNECT], _SWITCH_UPGRADE),
]:
cs = ConnectionState()
for client_switch in client_switches: # type: ignore[attr-defined]
cs.process_client_switch_proposal(client_switch)
cs.process_event(CLIENT, Request)
with pytest.raises(LocalProtocolError):
cs.process_event(SERVER, Response, server_switch)
def test_ConnectionState_keepalive_protocol_switch_interaction() -> None:
# keep_alive=False + pending_switch_proposals
cs = ConnectionState()
cs.process_client_switch_proposal(_SWITCH_UPGRADE)
cs.process_event(CLIENT, Request)
cs.process_keep_alive_disabled()
cs.process_event(CLIENT, Data)
assert cs.states == {CLIENT: SEND_BODY, SERVER: SEND_RESPONSE}
# the protocol switch "wins"
cs.process_event(CLIENT, EndOfMessage)
assert cs.states == {CLIENT: MIGHT_SWITCH_PROTOCOL, SERVER: SEND_RESPONSE}
# but when the server denies the request, keep_alive comes back into play
cs.process_event(SERVER, Response)
assert cs.states == {CLIENT: MUST_CLOSE, SERVER: SEND_BODY}
def test_ConnectionState_reuse() -> None:
cs = ConnectionState()
with pytest.raises(LocalProtocolError):
cs.start_next_cycle()
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
with pytest.raises(LocalProtocolError):
cs.start_next_cycle()
cs.process_event(SERVER, Response)
cs.process_event(SERVER, EndOfMessage)
cs.start_next_cycle()
assert cs.states == {CLIENT: IDLE, SERVER: IDLE}
# No keepalive
cs.process_event(CLIENT, Request)
cs.process_keep_alive_disabled()
cs.process_event(CLIENT, EndOfMessage)
cs.process_event(SERVER, Response)
cs.process_event(SERVER, EndOfMessage)
with pytest.raises(LocalProtocolError):
cs.start_next_cycle()
# One side closed
cs = ConnectionState()
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
cs.process_event(CLIENT, ConnectionClosed)
cs.process_event(SERVER, Response)
cs.process_event(SERVER, EndOfMessage)
with pytest.raises(LocalProtocolError):
cs.start_next_cycle()
# Succesful protocol switch
cs = ConnectionState()
cs.process_client_switch_proposal(_SWITCH_UPGRADE)
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
cs.process_event(SERVER, InformationalResponse, _SWITCH_UPGRADE)
with pytest.raises(LocalProtocolError):
cs.start_next_cycle()
# Failed protocol switch
cs = ConnectionState()
cs.process_client_switch_proposal(_SWITCH_UPGRADE)
cs.process_event(CLIENT, Request)
cs.process_event(CLIENT, EndOfMessage)
cs.process_event(SERVER, Response)
cs.process_event(SERVER, EndOfMessage)
cs.start_next_cycle()
assert cs.states == {CLIENT: IDLE, SERVER: IDLE}
def test_server_request_is_illegal() -> None:
# There used to be a bug in how we handled the Request special case that
# made this allowed...
cs = ConnectionState()
with pytest.raises(LocalProtocolError):
cs.process_event(SERVER, Request)

View File

@ -0,0 +1,112 @@
import re
import sys
import traceback
from typing import NoReturn
import pytest
from .._util import (
bytesify,
LocalProtocolError,
ProtocolError,
RemoteProtocolError,
Sentinel,
validate,
)
def test_ProtocolError() -> None:
with pytest.raises(TypeError):
ProtocolError("abstract base class")
def test_LocalProtocolError() -> None:
try:
raise LocalProtocolError("foo")
except LocalProtocolError as e:
assert str(e) == "foo"
assert e.error_status_hint == 400
try:
raise LocalProtocolError("foo", error_status_hint=418)
except LocalProtocolError as e:
assert str(e) == "foo"
assert e.error_status_hint == 418
def thunk() -> NoReturn:
raise LocalProtocolError("a", error_status_hint=420)
try:
try:
thunk()
except LocalProtocolError as exc1:
orig_traceback = "".join(traceback.format_tb(sys.exc_info()[2]))
exc1._reraise_as_remote_protocol_error()
except RemoteProtocolError as exc2:
assert type(exc2) is RemoteProtocolError
assert exc2.args == ("a",)
assert exc2.error_status_hint == 420
new_traceback = "".join(traceback.format_tb(sys.exc_info()[2]))
assert new_traceback.endswith(orig_traceback)
def test_validate() -> None:
my_re = re.compile(rb"(?P<group1>[0-9]+)\.(?P<group2>[0-9]+)")
with pytest.raises(LocalProtocolError):
validate(my_re, b"0.")
groups = validate(my_re, b"0.1")
assert groups == {"group1": b"0", "group2": b"1"}
# successful partial matches are an error - must match whole string
with pytest.raises(LocalProtocolError):
validate(my_re, b"0.1xx")
with pytest.raises(LocalProtocolError):
validate(my_re, b"0.1\n")
def test_validate_formatting() -> None:
my_re = re.compile(rb"foo")
with pytest.raises(LocalProtocolError) as excinfo:
validate(my_re, b"", "oops")
assert "oops" in str(excinfo.value)
with pytest.raises(LocalProtocolError) as excinfo:
validate(my_re, b"", "oops {}")
assert "oops {}" in str(excinfo.value)
with pytest.raises(LocalProtocolError) as excinfo:
validate(my_re, b"", "oops {} xx", 10)
assert "oops 10 xx" in str(excinfo.value)
def test_make_sentinel() -> None:
class S(Sentinel, metaclass=Sentinel):
pass
assert repr(S) == "S"
assert S == S
assert type(S).__name__ == "S"
assert S in {S}
assert type(S) is S
class S2(Sentinel, metaclass=Sentinel):
pass
assert repr(S2) == "S2"
assert S != S2
assert S not in {S2}
assert type(S) is not type(S2)
def test_bytesify() -> None:
assert bytesify(b"123") == b"123"
assert bytesify(bytearray(b"123")) == b"123"
assert bytesify("123") == b"123"
with pytest.raises(UnicodeEncodeError):
bytesify("\u1234")
with pytest.raises(TypeError):
bytesify(10)