From 909ef273701f01332ae54797ecb33c6fef0d5fc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 16 Jan 2025 12:14:42 +0100 Subject: [PATCH 1/7] Fix `UnicodeError.__str__` when attributes have a custom `__str__`. --- Objects/exceptions.c | 59 +++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 4df89edfaf3953..3b98b456940d13 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2673,6 +2673,8 @@ SyntaxError_str(PySyntaxErrorObject *self) if (!filename && !have_lineno) return PyObject_Str(self->msg ? self->msg : Py_None); + // Even if 'filename' can be an instance of a subclass of 'str', + // we only render its "true" content and do not use str(filename). if (filename && have_lineno) result = PyUnicode_FromFormat("%S (%U, line %ld)", self->msg ? self->msg : Py_None, @@ -2790,29 +2792,47 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, /* * Check the validity of 'attr' as a unicode or bytes object depending - * on 'as_bytes' and return a new reference on it if it is the case. + * on 'as_bytes'. * * The 'name' is the attribute name and is only used for error reporting. * - * On success, this returns a strong reference on 'attr'. - * On failure, this sets a TypeError and returns NULL. + * On success, this returns 0. + * On failure, this sets a TypeError and returns -1. */ -static PyObject * -as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) +static int +check_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { assert(as_bytes == 0 || as_bytes == 1); if (attr == NULL) { - PyErr_Format(PyExc_TypeError, "%s attribute not set", name); - return NULL; + PyErr_Format(PyExc_TypeError, + "UnicodeError '%s' attribute is not set", + name); + return -1; } if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, - "%s attribute must be %s", - name, - as_bytes ? "bytes" : "unicode"); - return NULL; + "UnicodeError '%s' attribute must be a %s", + name, as_bytes ? "bytes" : "string"); + return -1; } - return Py_NewRef(attr); + return 0; +} + + +/* + * Check the validity of 'attr' as a unicode or bytes object depending + * on 'as_bytes' and return a new reference on it if it is the case. + * + * The 'name' is the attribute name and is only used for error reporting. + * + * On success, this returns a strong reference on 'attr'. + * On failure, this sets a TypeError and returns NULL. + */ +static PyObject * +as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) +{ + int rc = check_unicode_error_attribute(attr, name, as_bytes); + return rc < 0 ? NULL : Py_NewRef(attr); } @@ -3379,7 +3399,10 @@ UnicodeEncodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3499,7 +3522,10 @@ UnicodeDecodeError_str(PyObject *self) if (encoding_str == NULL) { goto done; } - + // calls to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", true) < 0) { + goto done; + } Py_ssize_t len = PyBytes_GET_SIZE(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3595,7 +3621,10 @@ UnicodeTranslateError_str(PyObject *self) if (reason_str == NULL) { goto done; } - + // call to PyObject_Str(...) above might mutate 'exc->object' + if (check_unicode_error_attribute(exc->object, "object", false) < 0) { + goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; From 268a721ca1f6c42a6db233d20e963c058cbae718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 18 Jan 2025 10:47:18 +0100 Subject: [PATCH 2/7] add tests --- Lib/test/test_capi/test_exceptions.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 666e2f2ab09548..539a59f4c412d9 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -429,6 +429,30 @@ def _check_no_crash(self, exc): # ensure that the __str__() method does not crash _ = str(exc) + def test_unicode_encode_error_custom_str(self): + + class Evil(str): + def __str__(self): + del exc.object + return self + + for reason, encoding in [ + ("reason", Evil("encoding")), + (Evil("reason"), "encoding"), + (Evil("reason"), Evil("encoding")), + ]: + with self.subTest(encoding=encoding, reason=reason): + with self.subTest(UnicodeEncodeError): + exc = UnicodeEncodeError(encoding, "x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + with self.subTest(UnicodeDecodeError): + exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + + with self.subTest(UnicodeTranslateError): + exc = UnicodeTranslateError("x", 0, 1, Evil("reason")) + self.assertRaises(TypeError, str, exc) + def test_unicode_encode_error_get_start(self): get_start = _testcapi.unicode_encode_get_start self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) From 87305df24d417c0e9d7e65d6cbad0523b84da6e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 18 Jan 2025 10:50:07 +0100 Subject: [PATCH 3/7] blurb --- .../2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst new file mode 100644 index 00000000000000..95716d4877df37 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst @@ -0,0 +1,2 @@ +Fix a crash in :meth:`UnicodeError.__str__` when custom attributes have +side-effects on :attr:`UnicodeError.object`. Patch by Bénédikt Tran. From 7d4d6a0ca603cd78c2c0eb5fd379a733888f5dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 18 Jan 2025 11:26:40 +0100 Subject: [PATCH 4/7] update NEWS --- .../2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst index 95716d4877df37..fc4453ae3f2644 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-01-18-10-50-04.gh-issue-128974.KltI-A.rst @@ -1,2 +1,3 @@ -Fix a crash in :meth:`UnicodeError.__str__` when custom attributes have -side-effects on :attr:`UnicodeError.object`. Patch by Bénédikt Tran. +Fix a crash in :meth:`UnicodeError.__str__ ` when custom +attributes implement :meth:`~object.__str__` with side-effects. +Patch by Bénédikt Tran. From 36e44f87d42006523e5717a112591d40689ef72f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 19 Jan 2025 10:24:00 +0100 Subject: [PATCH 5/7] update test case name --- Lib/test/test_capi/test_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 539a59f4c412d9..9b51c35ff5a03f 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -429,7 +429,7 @@ def _check_no_crash(self, exc): # ensure that the __str__() method does not crash _ = str(exc) - def test_unicode_encode_error_custom_str(self): + def test_unicode_error_custom_str(self): class Evil(str): def __str__(self): From d6163c85a969b887d5dcc0bfa1ea56eef926bc41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:49:46 +0100 Subject: [PATCH 6/7] move test --- Lib/test/test_capi/test_exceptions.py | 24 ----------------- Lib/test/test_exceptions.py | 37 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 9b51c35ff5a03f..666e2f2ab09548 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -429,30 +429,6 @@ def _check_no_crash(self, exc): # ensure that the __str__() method does not crash _ = str(exc) - def test_unicode_error_custom_str(self): - - class Evil(str): - def __str__(self): - del exc.object - return self - - for reason, encoding in [ - ("reason", Evil("encoding")), - (Evil("reason"), "encoding"), - (Evil("reason"), Evil("encoding")), - ]: - with self.subTest(encoding=encoding, reason=reason): - with self.subTest(UnicodeEncodeError): - exc = UnicodeEncodeError(encoding, "x", 0, 1, reason) - self.assertRaises(TypeError, str, exc) - with self.subTest(UnicodeDecodeError): - exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason) - self.assertRaises(TypeError, str, exc) - - with self.subTest(UnicodeTranslateError): - exc = UnicodeTranslateError("x", 0, 1, Evil("reason")) - self.assertRaises(TypeError, str, exc) - def test_unicode_encode_error_get_start(self): get_start = _testcapi.unicode_encode_get_start self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 206e22e791e02a..6b7e6bbf010ceb 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1360,6 +1360,43 @@ def test_unicode_error_str_does_not_crash(self): exc = UnicodeDecodeError('utf-8', encoded, start, end, '') self.assertIsInstance(str(exc), str) + def test_unicode_error_evil_str_set_none_object(self): + def side_effect(exc): + object.__setattr__(exc, 'object', None) + self.do_test_unicode_error_mutate(side_effect) + + def test_unicode_error_evil_str_del_self_object(self): + def side_effect(exc): + object.__delattr__(exc, 'object') + self.do_test_unicode_error_mutate(side_effect) + + def do_test_unicode_error_mutate(self, side_effect): + # Test that str(UnicodeError(...)) does not crash when + # side-effects mutate the underlying 'object' attribute. + # See https://github.com/python/cpython/issues/128974. + + class Evil(str): + def __str__(self): + side_effect(exc) + return self + + for reason, encoding in [ + ("reason", Evil("utf-8")), + (Evil("reason"), "utf-8"), + (Evil("reason"), Evil("utf-8")), + ]: + with self.subTest(encoding=encoding, reason=reason): + with self.subTest(UnicodeEncodeError): + exc = UnicodeEncodeError(encoding, "x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + with self.subTest(UnicodeDecodeError): + exc = UnicodeDecodeError(encoding, b"x", 0, 1, reason) + self.assertRaises(TypeError, str, exc) + + with self.subTest(UnicodeTranslateError): + exc = UnicodeTranslateError("x", 0, 1, Evil("reason")) + self.assertRaises(TypeError, str, exc) + @no_tracing def test_badisinstance(self): # Bug #2542: if issubclass(e, MyException) raises an exception, From 78572c9368d1f7cbffb3e6f1be984e920f96c27c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:51:02 +0100 Subject: [PATCH 7/7] simplify --- Lib/test/test_exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 6b7e6bbf010ceb..eb7b338811717e 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1362,12 +1362,12 @@ def test_unicode_error_str_does_not_crash(self): def test_unicode_error_evil_str_set_none_object(self): def side_effect(exc): - object.__setattr__(exc, 'object', None) + exc.object = None self.do_test_unicode_error_mutate(side_effect) def test_unicode_error_evil_str_del_self_object(self): def side_effect(exc): - object.__delattr__(exc, 'object') + del exc.object self.do_test_unicode_error_mutate(side_effect) def do_test_unicode_error_mutate(self, side_effect):