Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new NotHttpProtocol exception to differentiate invalid methods and gibberish #10067

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/10067.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a new ``NotHttpProtocol`` internal exception to better differentiate invalid HTTP methods and gibberish on the wire -- by :user:`bdraco`.
3 changes: 3 additions & 0 deletions aiohttp/_http_parser.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ from .http_exceptions import (
InvalidHeader,
InvalidURLError,
LineTooLong,
NotHttpProtocol,
PayloadEncodingError,
TransferEncodingError,
)
Expand Down Expand Up @@ -833,6 +834,8 @@ cdef parser_error_from_errno(cparser.llhttp_t* parser, data, pointer):
cparser.HPE_INVALID_TRANSFER_ENCODING}:
return BadHttpMessage(err_msg)
elif errno == cparser.HPE_INVALID_METHOD:
if b" " not in data:
return NotHttpProtocol(err_msg)
Comment on lines +837 to +838
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems unreliable since a binary protocol can easily include b" " by accident.

A TLS handshake always begins with b"\x16" (RFC 8446 §5.1); we could use that to reliably detect HTTPS at least.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah its a bit messy to guess here... maybe we should just leave it as-is.

Copy link
Member Author

@bdraco bdraco Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to leave it as-is and not try to differentiate as llhttp is always going to be able to do a better job than a heuristic as otherwise we end up with a whole list of possible exceptions for TLS or other garbage

return BadHttpMethod(error=err_msg)
elif errno in {cparser.HPE_INVALID_STATUS,
cparser.HPE_INVALID_VERSION}:
Expand Down
6 changes: 6 additions & 0 deletions aiohttp/http_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,11 @@ def __init__(self, line: str = "", error: Optional[str] = None) -> None:
super().__init__(line, error or f"Bad HTTP method in status line {line!r}")


class NotHttpProtocol(BadStatusLine):

def __init__(self, line: str = "", error: Optional[str] = None) -> None:
super().__init__(line, error or f"Not HTTP protocol {line!r}")


class InvalidURLError(BadHttpMessage):
pass
3 changes: 2 additions & 1 deletion aiohttp/http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
InvalidHeader,
InvalidURLError,
LineTooLong,
NotHttpProtocol,
TransferEncodingError,
)
from .http_writer import HttpVersion, HttpVersion10
Expand Down Expand Up @@ -565,7 +566,7 @@ def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
try:
method, path, version = line.split(" ", maxsplit=2)
except ValueError:
raise BadHttpMethod(line) from None
raise NotHttpProtocol(line) from None

if len(path) > self.max_line_size:
raise LineTooLong(
Expand Down
6 changes: 3 additions & 3 deletions aiohttp/web_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
RawRequestMessage,
StreamWriter,
)
from .http_exceptions import BadHttpMethod
from .http_exceptions import NotHttpProtocol
from .log import access_logger, server_logger
from .streams import EMPTY_PAYLOAD, StreamReader
from .tcp_helpers import tcp_keepalive
Expand Down Expand Up @@ -716,9 +716,9 @@ def handle_error(
if (
self._manager
and self._manager.requests_count == 1
and isinstance(exc, BadHttpMethod)
and isinstance(exc, NotHttpProtocol)
):
# BadHttpMethod is common when a client sends non-HTTP
# NotHttpProtocol is common when a client sends non-HTTP
# or encrypted traffic to an HTTP port. This is expected
# to happen when connected to the public internet so we log
# it at the debug level as to not fill logs with noise.
Expand Down
19 changes: 19 additions & 0 deletions tests/test_http_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,25 @@ def test_http_request_parser_bad_method(
parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')


@pytest.mark.parametrize(
"not_http_protocol_data",
[
b"\x16\x03\x03\x01F\x01",
b"\x16\x03\x01",
b"\x16\x03\x01\x02",
b"\x16",
],
)
def test_http_request_parser_not_http_protocol(
parser: HttpRequestParser, not_http_protocol_data: bytes
) -> None:
with pytest.raises(http_exceptions.NotHttpProtocol):
# The \r\n\r\n is to ensure that the py parser gets to the
# point where it tries to parse the protocol as it differs
# from the C parser.
parser.feed_data(not_http_protocol_data + b"\r\n\r\n")


def test_http_request_parser_bad_version(parser: HttpRequestParser) -> None:
with pytest.raises(http_exceptions.BadHttpMessage):
parser.feed_data(b"GET //get HT/11\r\n\r\n")
Expand Down
14 changes: 7 additions & 7 deletions tests/test_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

from aiohttp import client, web
from aiohttp.http_exceptions import BadHttpMethod, BadStatusLine
from aiohttp.http_exceptions import BadStatusLine, NotHttpProtocol
from aiohttp.pytest_plugin import AiohttpClient, AiohttpRawServer


Expand Down Expand Up @@ -70,12 +70,12 @@ async def handler(request: web.BaseRequest) -> NoReturn:
logger.exception.assert_called_with("Error handling request", exc_info=exc)


async def test_raw_server_logs_invalid_method_with_loop_debug(
async def test_raw_server_logs_non_http_protocol_with_loop_debug(
aiohttp_raw_server: AiohttpRawServer,
aiohttp_client: AiohttpClient,
loop: asyncio.AbstractEventLoop,
) -> None:
exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error")
exc = NotHttpProtocol(b"\x16\x03\x03\x01F\x01".decode(), "error")

async def handler(request: web.BaseRequest) -> NoReturn:
raise exc
Expand All @@ -99,12 +99,12 @@ async def handler(request: web.BaseRequest) -> NoReturn:
logger.debug.assert_called_with("Error handling request", exc_info=exc)


async def test_raw_server_logs_invalid_method_without_loop_debug(
async def test_raw_server_logs_non_http_protocol_without_loop_debug(
aiohttp_raw_server: AiohttpRawServer,
aiohttp_client: AiohttpClient,
loop: asyncio.AbstractEventLoop,
) -> None:
exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error")
exc = NotHttpProtocol(b"\x16\x03\x03\x01F\x01".decode(), "error")

async def handler(request: web.BaseRequest) -> NoReturn:
raise exc
Expand All @@ -128,12 +128,12 @@ async def handler(request: web.BaseRequest) -> NoReturn:
logger.debug.assert_called_with("Error handling request", exc_info=exc)


async def test_raw_server_logs_invalid_method_second_request(
async def test_raw_server_logs_non_http_protocol_second_request(
aiohttp_raw_server: AiohttpRawServer,
aiohttp_client: AiohttpClient,
loop: asyncio.AbstractEventLoop,
) -> None:
exc = BadHttpMethod(b"\x16\x03\x03\x01F\x01".decode(), "error")
exc = NotHttpProtocol(b"\x16\x03\x03\x01F\x01".decode(), "error")
request_count = 0

async def handler(request: web.BaseRequest) -> web.Response:
Expand Down
Loading