From c315886d0a999611c261896582f6a786245056dd Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:05:02 -0600 Subject: [PATCH 1/7] Restore 304 performance after fixing FileResponse replace race fixes #10112 --- aiohttp/web_fileresponse.py | 140 ++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 54 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index 1177eaf7af8..cfa283558d4 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -4,6 +4,7 @@ import pathlib import sys from contextlib import suppress +from enum import Enum, auto from mimetypes import MimeTypes from stat import S_ISREG from types import MappingProxyType @@ -66,6 +67,16 @@ } ) + +class _FileResponseResult(Enum): + """The state of the file response.""" + + SEND_FILE = auto() # Ie a regular file to send + NOT_ACCEPTABLE = auto() # Ie a socket, or non-regular file + PRE_CONDITION_FAILED = auto() # Ie If-Match or If-None-Match failed + NOT_MODIFIED = auto() # 304 Not Modified + + # Add custom pairs and clear the encodings map so guess_type ignores them. CONTENT_TYPES.encodings_map.clear() for content_type, extension in ADDITIONAL_CONTENT_TYPES.items(): @@ -163,10 +174,12 @@ async def _precondition_failed( self.content_length = 0 return await super().prepare(request) - def _open_file_path_stat_encoding( - self, accept_encoding: str - ) -> Tuple[Optional[io.BufferedReader], os.stat_result, Optional[str]]: - """Return the io object, stat result, and encoding. + def _make_response( + self, request: "BaseRequest", accept_encoding: str + ) -> Tuple[ + _FileResponseResult, Optional[io.BufferedReader], os.stat_result, Optional[str] + ]: + """Return the response state, io object, stat result, and encoding. If an uncompressed file is returned, the encoding is set to :py:data:`None`. @@ -174,6 +187,56 @@ def _open_file_path_stat_encoding( This method should be called from a thread executor since it calls os.stat which may block. """ + file_path, st, file_encoding = self._get_file_path_stat_encoding( + accept_encoding + ) + if not file_path: + return _FileResponseResult.NOT_ACCEPTABLE, None, st, None + + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 + ifmatch = request.if_match + if ifmatch is not None and not self._etag_match( + etag_value, ifmatch, weak=False + ): + return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding + + unmodsince = request.if_unmodified_since + if ( + unmodsince is not None + and ifmatch is None + and st.st_mtime > unmodsince.timestamp() + ): + return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding + + # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 + ifnonematch = request.if_none_match + if ifnonematch is not None and self._etag_match( + etag_value, ifnonematch, weak=True + ): + return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding + + modsince = request.if_modified_since + if ( + modsince is not None + and ifnonematch is None + and st.st_mtime <= modsince.timestamp() + ): + return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding + + fobj = file_path.open("rb") + with suppress(OSError): + # fstat() may not be available on all platforms + # Once we open the file, we want the fstat() to ensure + # the file has not changed between the first stat() + # and the open(). + st = os.stat(fobj.fileno()) + return _FileResponseResult.SEND_FILE, fobj, st, file_encoding + + def _get_file_path_stat_encoding( + self, accept_encoding: str + ) -> Tuple[Optional[pathlib.Path], os.stat_result, Optional[str]]: file_path = self._path for file_extension, file_encoding in ENCODING_EXTENSIONS.items(): if file_encoding not in accept_encoding: @@ -184,27 +247,13 @@ def _open_file_path_stat_encoding( # Do not follow symlinks and ignore any non-regular files. st = compressed_path.lstat() if S_ISREG(st.st_mode): - fobj = compressed_path.open("rb") - with suppress(OSError): - # fstat() may not be available on all platforms - # Once we open the file, we want the fstat() to ensure - # the file has not changed between the first stat() - # and the open(). - st = os.stat(fobj.fileno()) - return fobj, st, file_encoding + return compressed_path, st, file_encoding # Fallback to the uncompressed file st = file_path.stat() if not S_ISREG(st.st_mode): return None, st, None - fobj = file_path.open("rb") - with suppress(OSError): - # fstat() may not be available on all platforms - # Once we open the file, we want the fstat() to ensure - # the file has not changed between the first stat() - # and the open(). - st = os.stat(fobj.fileno()) - return fobj, st, None + return file_path, st, None async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter]: loop = asyncio.get_running_loop() @@ -212,8 +261,8 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1 accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower() try: - fobj, st, file_encoding = await loop.run_in_executor( - None, self._open_file_path_stat_encoding, accept_encoding + response_state, fobj, st, file_encoding = await loop.run_in_executor( + None, self._make_response, request, accept_encoding ) except PermissionError: self.set_status(HTTPForbidden.status_code) @@ -226,11 +275,15 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter try: # Forbid special files like sockets, pipes, devices, etc. - if not fobj or not S_ISREG(st.st_mode): + if response_state is _FileResponseResult.NOT_ACCEPTABLE or not S_ISREG( + st.st_mode + ): self.set_status(HTTPForbidden.status_code) return await super().prepare(request) - return await self._prepare_open_file(request, fobj, st, file_encoding) + return await self._prepare_response( + request, response_state, fobj, st, file_encoding + ) finally: if fobj: # We do not await here because we do not want to wait @@ -243,46 +296,25 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter _CLOSE_FUTURES.add(close_future) close_future.add_done_callback(_CLOSE_FUTURES.remove) - async def _prepare_open_file( + async def _prepare_response( self, request: "BaseRequest", - fobj: io.BufferedReader, + response_state: _FileResponseResult, + fobj: Optional[io.BufferedReader], st: os.stat_result, file_encoding: Optional[str], ) -> Optional[AbstractStreamWriter]: - etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" - last_modified = st.st_mtime - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 - ifmatch = request.if_match - if ifmatch is not None and not self._etag_match( - etag_value, ifmatch, weak=False - ): + if response_state is _FileResponseResult.PRE_CONDITION_FAILED: return await self._precondition_failed(request) - unmodsince = request.if_unmodified_since - if ( - unmodsince is not None - and ifmatch is None - and st.st_mtime > unmodsince.timestamp() - ): - return await self._precondition_failed(request) - - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 - ifnonematch = request.if_none_match - if ifnonematch is not None and self._etag_match( - etag_value, ifnonematch, weak=True - ): - return await self._not_modified(request, etag_value, last_modified) + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + last_modified = st.st_mtime - modsince = request.if_modified_since - if ( - modsince is not None - and ifnonematch is None - and st.st_mtime <= modsince.timestamp() - ): + if response_state is _FileResponseResult.NOT_MODIFIED: return await self._not_modified(request, etag_value, last_modified) + assert fobj is not None status = self._status file_size = st.st_size count = file_size @@ -376,7 +408,7 @@ async def _prepare_open_file( self._compression = False self.etag = etag_value # type: ignore[assignment] - self.last_modified = st.st_mtime # type: ignore[assignment] + self.last_modified = last_modified # type: ignore[assignment] self.content_length = count self._headers[hdrs.ACCEPT_RANGES] = "bytes" From bbe858767384d36f7f195e21f612ed2308d0b52b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:05:48 -0600 Subject: [PATCH 2/7] changelog --- CHANGES/10113.bugfix.rst | 1 + 1 file changed, 1 insertion(+) create mode 120000 CHANGES/10113.bugfix.rst diff --git a/CHANGES/10113.bugfix.rst b/CHANGES/10113.bugfix.rst new file mode 120000 index 00000000000..89cef58729f --- /dev/null +++ b/CHANGES/10113.bugfix.rst @@ -0,0 +1 @@ +10101.bugfix.rst \ No newline at end of file From 383236197200592a2517eea5731bc4abd2ee6aae Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:06:40 -0600 Subject: [PATCH 3/7] naming --- aiohttp/web_fileresponse.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index cfa283558d4..e68e48c9e07 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -261,7 +261,7 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter # https://www.rfc-editor.org/rfc/rfc9110#section-8.4.1 accept_encoding = request.headers.get(hdrs.ACCEPT_ENCODING, "").lower() try: - response_state, fobj, st, file_encoding = await loop.run_in_executor( + response_result, fobj, st, file_encoding = await loop.run_in_executor( None, self._make_response, request, accept_encoding ) except PermissionError: @@ -275,14 +275,14 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter try: # Forbid special files like sockets, pipes, devices, etc. - if response_state is _FileResponseResult.NOT_ACCEPTABLE or not S_ISREG( + if response_result is _FileResponseResult.NOT_ACCEPTABLE or not S_ISREG( st.st_mode ): self.set_status(HTTPForbidden.status_code) return await super().prepare(request) return await self._prepare_response( - request, response_state, fobj, st, file_encoding + request, response_result, fobj, st, file_encoding ) finally: if fobj: @@ -299,19 +299,19 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter async def _prepare_response( self, request: "BaseRequest", - response_state: _FileResponseResult, + response_result: _FileResponseResult, fobj: Optional[io.BufferedReader], st: os.stat_result, file_encoding: Optional[str], ) -> Optional[AbstractStreamWriter]: # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 - if response_state is _FileResponseResult.PRE_CONDITION_FAILED: + if response_result is _FileResponseResult.PRE_CONDITION_FAILED: return await self._precondition_failed(request) etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" last_modified = st.st_mtime - if response_state is _FileResponseResult.NOT_MODIFIED: + if response_result is _FileResponseResult.NOT_MODIFIED: return await self._not_modified(request, etag_value, last_modified) assert fobj is not None From 0a2964efd8e1477501678188930b7e1f65433a7b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:13:26 -0600 Subject: [PATCH 4/7] preen --- aiohttp/web_fileresponse.py | 65 ++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index e68e48c9e07..f4fb5bff230 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -273,52 +273,43 @@ async def prepare(self, request: "BaseRequest") -> Optional[AbstractStreamWriter self.set_status(HTTPNotFound.status_code) return await super().prepare(request) - try: - # Forbid special files like sockets, pipes, devices, etc. - if response_result is _FileResponseResult.NOT_ACCEPTABLE or not S_ISREG( - st.st_mode - ): - self.set_status(HTTPForbidden.status_code) - return await super().prepare(request) + # Forbid special files like sockets, pipes, devices, etc. + if response_result is _FileResponseResult.NOT_ACCEPTABLE: + self.set_status(HTTPForbidden.status_code) + return await super().prepare(request) - return await self._prepare_response( - request, response_result, fobj, st, file_encoding - ) - finally: - if fobj: - # We do not await here because we do not want to wait - # for the executor to finish before returning the response - # so the connection can begin servicing another request - # as soon as possible. - close_future = loop.run_in_executor(None, fobj.close) - # Hold a strong reference to the future to prevent it from being - # garbage collected before it completes. - _CLOSE_FUTURES.add(close_future) - close_future.add_done_callback(_CLOSE_FUTURES.remove) - - async def _prepare_response( - self, - request: "BaseRequest", - response_result: _FileResponseResult, - fobj: Optional[io.BufferedReader], - st: os.stat_result, - file_encoding: Optional[str], - ) -> Optional[AbstractStreamWriter]: - # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 if response_result is _FileResponseResult.PRE_CONDITION_FAILED: return await self._precondition_failed(request) - etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" - last_modified = st.st_mtime - if response_result is _FileResponseResult.NOT_MODIFIED: + etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" + last_modified = st.st_mtime return await self._not_modified(request, etag_value, last_modified) assert fobj is not None + try: + return await self._prepare_open_file(request, fobj, st, file_encoding) + finally: + # We do not await here because we do not want to wait + # for the executor to finish before returning the response + # so the connection can begin servicing another request + # as soon as possible. + close_future = loop.run_in_executor(None, fobj.close) + # Hold a strong reference to the future to prevent it from being + # garbage collected before it completes. + _CLOSE_FUTURES.add(close_future) + close_future.add_done_callback(_CLOSE_FUTURES.remove) + + async def _prepare_open_file( + self, + request: "BaseRequest", + fobj: io.BufferedReader, + st: os.stat_result, + file_encoding: Optional[str], + ) -> Optional[AbstractStreamWriter]: status = self._status file_size = st.st_size count = file_size - start = None ifrange = request.if_range @@ -407,8 +398,8 @@ async def _prepare_response( # compress. self._compression = False - self.etag = etag_value # type: ignore[assignment] - self.last_modified = last_modified # type: ignore[assignment] + self.etag = f"{st.st_mtime_ns:x}-{st.st_size:x}" # type: ignore[assignment] + self.last_modified = st.st_mtime # type: ignore[assignment] self.content_length = count self._headers[hdrs.ACCEPT_RANGES] = "bytes" From 7444bc6c7c7806383223fc5f29c646a2e3bdf9b0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:25:24 -0600 Subject: [PATCH 5/7] Update aiohttp/web_fileresponse.py --- aiohttp/web_fileresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index f4fb5bff230..c9c336a645c 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -69,7 +69,7 @@ class _FileResponseResult(Enum): - """The state of the file response.""" + """The result of the file response.""" SEND_FILE = auto() # Ie a regular file to send NOT_ACCEPTABLE = auto() # Ie a socket, or non-regular file From c168634c4fec1a5be67a7a32803edbdc22b5a600 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 09:25:36 -0600 Subject: [PATCH 6/7] Update aiohttp/web_fileresponse.py --- aiohttp/web_fileresponse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index c9c336a645c..d763b3ff78d 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -179,7 +179,7 @@ def _make_response( ) -> Tuple[ _FileResponseResult, Optional[io.BufferedReader], os.stat_result, Optional[str] ]: - """Return the response state, io object, stat result, and encoding. + """Return the response result, io object, stat result, and encoding. If an uncompressed file is returned, the encoding is set to :py:data:`None`. From 360dd4013e66f145bf9241f703609af7d3f0fa4c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 10:11:40 -0600 Subject: [PATCH 7/7] preen --- aiohttp/web_fileresponse.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/aiohttp/web_fileresponse.py b/aiohttp/web_fileresponse.py index d763b3ff78d..9f9bd6e8f70 100644 --- a/aiohttp/web_fileresponse.py +++ b/aiohttp/web_fileresponse.py @@ -196,30 +196,26 @@ def _make_response( etag_value = f"{st.st_mtime_ns:x}-{st.st_size:x}" # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.1-2 - ifmatch = request.if_match - if ifmatch is not None and not self._etag_match( + if (ifmatch := request.if_match) is not None and not self._etag_match( etag_value, ifmatch, weak=False ): return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding - unmodsince = request.if_unmodified_since if ( - unmodsince is not None + (unmodsince := request.if_unmodified_since) is not None and ifmatch is None and st.st_mtime > unmodsince.timestamp() ): return _FileResponseResult.PRE_CONDITION_FAILED, None, st, file_encoding # https://www.rfc-editor.org/rfc/rfc9110#section-13.1.2-2 - ifnonematch = request.if_none_match - if ifnonematch is not None and self._etag_match( + if (ifnonematch := request.if_none_match) is not None and self._etag_match( etag_value, ifnonematch, weak=True ): return _FileResponseResult.NOT_MODIFIED, None, st, file_encoding - modsince = request.if_modified_since if ( - modsince is not None + (modsince := request.if_modified_since) is not None and ifnonematch is None and st.st_mtime <= modsince.timestamp() ):