external/python3/0001-3.9-bpo-45461-Fix-IncrementalDecoder-and-StreamReade.patch.1
| 384 ++++++++
external/python3/0001-3.9-gh-133767-Fix-use-after-free-in-the-unicode-esca.patch.1
| 441 ++++++++++
external/python3/0001-Cut-disused-recode_encoding-logic-in-_PyBytes_Decode.patch.1
| 168 +++
external/python3/UnpackedTarball_python3.mk
| 3
4 files changed, 996 insertions(+)
New commits:
commit faf175dad0a930e343f8704113fc469b7a895a92
Author: Michael Stahl <[email protected]>
AuthorDate: Wed Oct 1 17:55:57 2025 +0200
Commit: Michael Stahl <[email protected]>
CommitDate: Tue Oct 14 20:05:59 2025 +0200
python3: add patch for CVE-2025-4516
... plus some prerequisites to be able to apply it.
This was fixed in 3.9.23 but 3.8 is EOL.
Change-Id: I97edb7c402333bd972ad7f5e7c37ae82ded6c948
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/191749
Tested-by: allotropia jenkins <[email protected]>
Reviewed-by: Michael Stahl <[email protected]>
diff --git
a/external/python3/0001-3.9-bpo-45461-Fix-IncrementalDecoder-and-StreamReade.patch.1
b/external/python3/0001-3.9-bpo-45461-Fix-IncrementalDecoder-and-StreamReade.patch.1
new file mode 100644
index 000000000000..c1946e21a4ab
--- /dev/null
+++
b/external/python3/0001-3.9-bpo-45461-Fix-IncrementalDecoder-and-StreamReade.patch.1
@@ -0,0 +1,384 @@
+From 7c722e32bf582108680f49983cf01eaed710ddb9 Mon Sep 17 00:00:00 2001
+From: Serhiy Storchaka <[email protected]>
+Date: Thu, 14 Oct 2021 20:03:29 +0300
+Subject: [PATCH] [3.9] bpo-45461: Fix IncrementalDecoder and StreamReader in
+ the "unicode-escape" codec (GH-28939) (GH-28945)
+
+They support now splitting escape sequences between input chunks.
+
+Add the third parameter "final" in codecs.unicode_escape_decode().
+It is True by default to match the former behavior.
+(cherry picked from commit c96d1546b11b4c282a7e21737cb1f5d16349656d)
+
+Co-authored-by: Serhiy Storchaka <[email protected]>
+---
+ Doc/data/python3.9.abi | 23654 ++++++++++------
+ Include/cpython/unicodeobject.h | 10 +-
+ Lib/encodings/unicode_escape.py | 9 +-
+ Lib/test/test_codecs.py | 44 +-
+ .../2021-10-14-00-19-02.bpo-45461.4LB_tJ.rst | 2 +
+ Modules/_codecsmodule.c | 13 +-
+ Modules/clinic/_codecsmodule.c.h | 23 +-
+ Objects/unicodeobject.c | 49 +-
+ Parser/pegen/parse_string.c | 2 +-
+ Python/ast.c | 2 +-
+ 10 files changed, 14377 insertions(+), 9431 deletions(-)
+ create mode 100644
Misc/NEWS.d/next/Library/2021-10-14-00-19-02.bpo-45461.4LB_tJ.rst
+
+diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
+index d24a7a6a4d7..d3c906aa927 100644
+--- a/Include/cpython/unicodeobject.h
++++ b/Include/cpython/unicodeobject.h
+@@ -857,12 +857,20 @@ PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+
+ /* --- Unicode-Escape Codecs ---------------------------------------------- */
+
++/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
++ const char *string, /* Unicode-Escape encoded string */
++ Py_ssize_t length, /* size of string */
++ const char *errors, /* error handling */
++ Py_ssize_t *consumed /* bytes consumed */
++);
+ /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+ chars. */
+-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
++ Py_ssize_t *consumed, /* bytes consumed */
+ const char **first_invalid_escape /* on return, points to first
+ invalid escaped char in
+ string. */
+diff --git a/Lib/encodings/unicode_escape.py b/Lib/encodings/unicode_escape.py
+index 817f93265a4..9b1ce99b339 100644
+--- a/Lib/encodings/unicode_escape.py
++++ b/Lib/encodings/unicode_escape.py
+@@ -21,15 +21,16 @@ class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.unicode_escape_encode(input, self.errors)[0]
+
+-class IncrementalDecoder(codecs.IncrementalDecoder):
+- def decode(self, input, final=False):
+- return codecs.unicode_escape_decode(input, self.errors)[0]
++class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
++ def _buffer_decode(self, input, errors, final):
++ return codecs.unicode_escape_decode(input, errors, final)
+
+ class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+ class StreamReader(Codec,codecs.StreamReader):
+- pass
++ def decode(self, input, errors='strict'):
++ return codecs.unicode_escape_decode(input, errors, False)
+
+ ### encodings module API
+
+diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
+index 8d112a171d7..09ab852b398 100644
+--- a/Lib/test/test_codecs.py
++++ b/Lib/test/test_codecs.py
+@@ -2327,7 +2327,11 @@ def test_unicode_escape(self):
+ (r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30", 10))
+
+
+-class UnicodeEscapeTest(unittest.TestCase):
++class UnicodeEscapeTest(ReadTest, unittest.TestCase):
++ encoding = "unicode-escape"
++
++ test_lone_surrogates = None
++
+ def test_empty(self):
+ self.assertEqual(codecs.unicode_escape_encode(""), (b"", 0))
+ self.assertEqual(codecs.unicode_escape_decode(b""), ("", 0))
+@@ -2414,6 +2418,44 @@ def test_decode_errors(self):
+ self.assertEqual(decode(br"\U00110000", "ignore"), ("", 10))
+ self.assertEqual(decode(br"\U00110000", "replace"), ("\ufffd", 10))
+
++ def test_partial(self):
++ self.check_partial(
++ "\x00
\\xff\uffff\U00010000",
++ [
++ '',
++ '',
++ '',
++ '\x00',
++ '\x00',
++ '\x00 ',
++ '\x00 ',
++ '\x00
',
++ '\x00
',
++ '\x00
',
++ '\x00
',
++ '\x00
\',
++ '\x00
\',
++ '\x00
\',
++ '\x00
\',
++ '\x00
\\xff',
++ '\x00
\\xff',
++ '\x00
\\xff',
++ '\x00
\\xff',
++ '\x00
\\xff',
++ '\x00
\\xff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff',
++ '\x00
\\xff\uffff\U00010000',
++ ]
++ )
+
+ class RawUnicodeEscapeTest(unittest.TestCase):
+ def test_empty(self):
+diff --git a/Misc/NEWS.d/next/Library/2021-10-14-00-19-02.bpo-45461.4LB_tJ.rst
b/Misc/NEWS.d/next/Library/2021-10-14-00-19-02.bpo-45461.4LB_tJ.rst
+new file mode 100644
+index 00000000000..c1c4ed1ace2
+--- /dev/null
++++ b/Misc/NEWS.d/next/Library/2021-10-14-00-19-02.bpo-45461.4LB_tJ.rst
+@@ -0,0 +1,2 @@
++Fix incremental decoder and stream reader in the "unicode-escape" codec.
++Previously they failed if the escape sequence was split.
+diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c
+index 952072102d5..f22d4daca09 100644
+--- a/Modules/_codecsmodule.c
++++ b/Modules/_codecsmodule.c
+@@ -487,17 +487,20 @@ _codecs_utf_32_ex_decode_impl(PyObject *module,
Py_buffer *data,
+ _codecs.unicode_escape_decode
+ data: Py_buffer(accept={str, buffer})
+ errors: str(accept={str, NoneType}) = None
++ final: bool(accept={int}) = True
+ /
+ [clinic start generated code]*/
+
+ static PyObject *
+ _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
+- const char *errors)
+-/*[clinic end generated code: output=3ca3c917176b82ab
input=8328081a3a569bd6]*/
++ const char *errors, int final)
++/*[clinic end generated code: output=b284f97b12c635ee
input=6154f039a9f7c639]*/
+ {
+- PyObject *decoded = PyUnicode_DecodeUnicodeEscape(data->buf, data->len,
+- errors);
+- return codec_tuple(decoded, data->len);
++ Py_ssize_t consumed = data->len;
++ PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf,
data->len,
++ errors,
++ final ? NULL :
&consumed);
++ return codec_tuple(decoded, consumed);
+ }
+
+ /*[clinic input]
+diff --git a/Modules/clinic/_codecsmodule.c.h
b/Modules/clinic/_codecsmodule.c.h
+index 772c8ca538d..4e2c057007b 100644
+--- a/Modules/clinic/_codecsmodule.c.h
++++ b/Modules/clinic/_codecsmodule.c.h
+@@ -1149,7 +1149,7 @@ exit:
+ }
+
+ PyDoc_STRVAR(_codecs_unicode_escape_decode__doc__,
+-"unicode_escape_decode($module, data, errors=None, /)
"
++"unicode_escape_decode($module, data, errors=None, final=True, /)
"
+ "--
"
+ "
");
+
+@@ -1158,7 +1158,7 @@ PyDoc_STRVAR(_codecs_unicode_escape_decode__doc__,
+
+ static PyObject *
+ _codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
+- const char *errors);
++ const char *errors, int final);
+
+ static PyObject *
+ _codecs_unicode_escape_decode(PyObject *module, PyObject *const *args,
Py_ssize_t nargs)
+@@ -1166,8 +1166,9 @@ _codecs_unicode_escape_decode(PyObject *module, PyObject
*const *args, Py_ssize_
+ PyObject *return_value = NULL;
+ Py_buffer data = {NULL, NULL};
+ const char *errors = NULL;
++ int final = 1;
+
+- if (!_PyArg_CheckPositional("unicode_escape_decode", nargs, 1, 2)) {
++ if (!_PyArg_CheckPositional("unicode_escape_decode", nargs, 1, 3)) {
+ goto exit;
+ }
+ if (PyUnicode_Check(args[0])) {
+@@ -1208,8 +1209,20 @@ _codecs_unicode_escape_decode(PyObject *module,
PyObject *const *args, Py_ssize_
+ _PyArg_BadArgument("unicode_escape_decode", "argument 2", "str or
None", args[1]);
+ goto exit;
+ }
++ if (nargs < 3) {
++ goto skip_optional;
++ }
++ if (PyFloat_Check(args[2])) {
++ PyErr_SetString(PyExc_TypeError,
++ "integer argument expected, got float" );
++ goto exit;
++ }
++ final = _PyLong_AsInt(args[2]);
++ if (final == -1 && PyErr_Occurred()) {
++ goto exit;
++ }
+ skip_optional:
+- return_value = _codecs_unicode_escape_decode_impl(module, &data, errors);
++ return_value = _codecs_unicode_escape_decode_impl(module, &data, errors,
final);
+
+ exit:
+ /* Cleanup for data */
+@@ -2922,4 +2935,4 @@ exit:
+ #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF
+ #define _CODECS_CODE_PAGE_ENCODE_METHODDEF
+ #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */
+-/*[clinic end generated code: output=51b42d170889524c
input=a9049054013a1b77]*/
++/*[clinic end generated code: output=d4b696fe54cfee8f
input=a9049054013a1b77]*/
+diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
+index 38fb3ffc5eb..d6fc03e1ae4 100644
+--- a/Objects/unicodeobject.c
++++ b/Objects/unicodeobject.c
+@@ -6271,9 +6271,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
+ static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+
+ PyObject *
+-_PyUnicode_DecodeUnicodeEscape(const char *s,
++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ Py_ssize_t size,
+ const char *errors,
++ Py_ssize_t *consumed,
+ const char **first_invalid_escape)
+ {
+ const char *starts = s;
+@@ -6286,6 +6287,9 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ *first_invalid_escape = NULL;
+
+ if (size == 0) {
++ if (consumed) {
++ *consumed = 0;
++ }
+ _Py_RETURN_UNICODE_EMPTY();
+ }
+ /* Escaped strings will always be longer than the resulting
+@@ -6336,7 +6340,7 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ /* \ - Escapes */
+ if (s >= end) {
+ message = "\ at end of string";
+- goto error;
++ goto incomplete;
+ }
+ c = (unsigned char) *s++;
+
+@@ -6390,7 +6394,10 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ count = 8;
+ message = "truncated \UXXXXXXXX escape";
+ hexescape:
+- for (ch = 0; count && s < end; ++s, --count) {
++ for (ch = 0; count; ++s, --count) {
++ if (s >= end) {
++ goto incomplete;
++ }
+ c = (unsigned char)*s;
+ ch <<= 4;
+ if (c >= '0' && c <= '9') {
+@@ -6403,12 +6410,9 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ ch += c - ('A' - 10);
+ }
+ else {
+- break;
++ goto error;
+ }
+ }
+- if (count) {
+- goto error;
+- }
+
+ /* when we get here, ch is a 32-bit unicode character */
+ if (ch > MAX_UNICODE) {
+@@ -6435,14 +6439,20 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ }
+
+ message = "malformed \N character escape";
+- if (s < end && *s == '{') {
++ if (s >= end) {
++ goto incomplete;
++ }
++ if (*s == '{') {
+ const char *start = ++s;
+ size_t namelen;
+ /* look for the closing brace */
+ while (s < end && *s != '}')
+ s++;
++ if (s >= end) {
++ goto incomplete;
++ }
+ namelen = s - start;
+- if (namelen && s < end) {
++ if (namelen) {
+ /* found a name. look it up in the unicode database */
+ s++;
+ ch = 0xffffffff; /* in case 'getcode' messes up */
+@@ -6468,6 +6478,11 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ continue;
+ }
+
++ incomplete:
++ if (consumed) {
++ *consumed = startinpos;
++ break;
++ }
+ error:
+ endinpos = s-starts;
+ writer.min_length = end - s + writer.pos;
+@@ -6496,12 +6511,14 @@ _PyUnicode_DecodeUnicodeEscape(const char *s,
+ }
+
+ PyObject *
+-PyUnicode_DecodeUnicodeEscape(const char *s,
++_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
+ Py_ssize_t size,
+- const char *errors)
++ const char *errors,
++ Py_ssize_t *consumed)
+ {
+ const char *first_invalid_escape;
+- PyObject *result = _PyUnicode_DecodeUnicodeEscape(s, size, errors,
++ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
++ consumed,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+@@ -6516,6 +6533,14 @@ PyUnicode_DecodeUnicodeEscape(const char *s,
+ return result;
+ }
+
++PyObject *
++PyUnicode_DecodeUnicodeEscape(const char *s,
++ Py_ssize_t size,
++ const char *errors)
++{
++ return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL);
++}
++
+ /* Return a Unicode-Escape string version of the Unicode object. */
+
+ PyObject *
+diff --git a/Python/ast.c b/Python/ast.c
+index c7ba4d9c544..6dd70592631 100644
+--- a/Python/ast.c
++++ b/Python/ast.c
+@@ -4640,7 +4640,7 @@ decode_unicode_with_escapes(struct compiling *c, const
node *n, const char *s,
+ s = buf;
+
+ const char *first_invalid_escape;
+- v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
++ v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL,
&first_invalid_escape);
+
+ if (v != NULL && first_invalid_escape != NULL) {
+ if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
+--
+2.51.0
+
diff --git
a/external/python3/0001-3.9-gh-133767-Fix-use-after-free-in-the-unicode-esca.patch.1
b/external/python3/0001-3.9-gh-133767-Fix-use-after-free-in-the-unicode-esca.patch.1
new file mode 100644
index 000000000000..a1f2ff201246
--- /dev/null
+++
b/external/python3/0001-3.9-gh-133767-Fix-use-after-free-in-the-unicode-esca.patch.1
@@ -0,0 +1,441 @@
+From 8d35fd1b34935221aff23a1ab69a429dd156be77 Mon Sep 17 00:00:00 2001
+From: Serhiy Storchaka <[email protected]>
+Date: Mon, 2 Jun 2025 18:58:01 +0300
+Subject: [PATCH] [3.9] gh-133767: Fix use-after-free in the unicode-escape
+ decoder with an error handler (GH-129648) (GH-133944) (#134346)
+
+* [3.9] gh-133767: Fix use-after-free in the unicode-escape decoder with an
error handler (GH-129648) (GH-133944)
+
+If the error handler is used, a new bytes object is created to set as
+the object attribute of UnicodeDecodeError, and that bytes object then
+replaces the original data. A pointer to the decoded data will became invalid
+after destroying that temporary bytes object. So we need other way to return
+the first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal().
+
+_PyBytes_DecodeEscape() does not have such issue, because it does not
+use the error handlers registry, but it should be changed for compatibility
+with _PyUnicode_DecodeUnicodeEscapeInternal().
+(cherry picked from commit 9f69a58623bd01349a18ba0c7a9cb1dad6a51e8e)
+(cherry picked from commit 6279eb8c076d89d3739a6edb393e43c7929b429d)
+(cherry picked from commit a75953b347716fff694aa59a7c7c2489fa50d1f5)
+(cherry picked from commit 0c33e5baedf18ebcb04bc41dff7cfc614d5ea5fe)
+(cherry picked from commit 8b528cacbbde60504f6ac62784d04889d285f18b)
+
+Co-authored-by: Serhiy Storchaka <[email protected]>
+---
+ Include/cpython/bytesobject.h | 4 ++
+ Include/cpython/unicodeobject.h | 13 ++++++
+ Lib/test/test_codeccallbacks.py | 36 ++++++++++++++-
+ Lib/test/test_codecs.py | 39 ++++++++++++----
+ ...-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst | 2 +
+ Objects/bytesobject.c | 40 ++++++++++++-----
+ Objects/unicodeobject.c | 45 ++++++++++++++-----
+ Parser/pegen/parse_string.c | 26 ++++++-----
+ 8 files changed, 164 insertions(+), 41 deletions(-)
+ create mode 100644
Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+
+diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h
+index f284c5835df..a17a1af907b 100644
+--- a/Include/bytesobject.h
++++ b/Include/bytesobject.h
+@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
+ const char *);
+ #ifndef Py_LIMITED_API
+ /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
++PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
++ const char *,
++ int *, const char **);
++// Export for binary compatibility.
+ PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
+ const char *, const char **);
+ #endif
+diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
+index 1b460c9f189..7c0eaf73327 100644
+--- a/Include/cpython/unicodeobject.h
++++ b/Include/cpython/unicodeobject.h
+@@ -866,6 +866,19 @@ PyAPI_FUNC(PyObject*)
_PyUnicode_DecodeUnicodeEscapeStateful(
+ );
+ /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+ chars. */
++PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
++ const char *string, /* Unicode-Escape encoded string */
++ Py_ssize_t length, /* size of string */
++ const char *errors, /* error handling */
++ Py_ssize_t *consumed, /* bytes consumed */
++ int *first_invalid_escape_char, /* on return, if not -1, contain the first
++ invalid escaped char (<= 0xff) or
invalid
++ octal escape (> 0xff) in string. */
++ const char **first_invalid_escape_ptr); /* on return, if not NULL, may
++ point to the first invalid escaped
++ char in string.
++ May be NULL if errors is not NULL. */
++// Export for binary compatibility.
+ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
+index 4991330489d..73b63770716 100644
+--- a/Lib/test/test_codeccallbacks.py
++++ b/Lib/test/test_codeccallbacks.py
+@@ -1124,7 +1124,7 @@ def test_bug828737(self):
+ text = 'abc<def>ghi'*n
+ text.translate(charmap)
+
+- def test_mutatingdecodehandler(self):
++ def test_mutating_decode_handler(self):
+ baddata = [
+ ("ascii", b"\xff"),
+ ("utf-7", b"++"),
+@@ -1159,6 +1159,40 @@ def mutating(exc):
+ for (encoding, data) in baddata:
+ self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
+
++ def test_mutating_decode_handler_unicode_escape(self):
++ decode = codecs.unicode_escape_decode
++ def mutating(exc):
++ if isinstance(exc, UnicodeDecodeError):
++ r = data.get(exc.object[:exc.end])
++ if r is not None:
++ exc.object = r[0] + exc.object[exc.end:]
++ return ('\u0404', r[1])
++ raise AssertionError("don't know how to handle %r" % exc)
++
++ codecs.register_error('test.mutating2', mutating)
++ data = {
++ br'\x0': (b'\', 0),
++ br'\x3': (b'xxx\', 3),
++ br'\x5': (b'x\', 1),
++ }
++ def check(input, expected, msg):
++ with self.assertWarns(DeprecationWarning) as cm:
++ self.assertEqual(decode(input, 'test.mutating2'), (expected,
len(input)))
++ self.assertIn(msg, str(cm.warning))
++
++ check(br'\x0n\z', '\u0404
\z', r"invalid escape sequence '\z'")
++ check(br'\x0z', '\u0404\z', r"invalid escape sequence '\z'")
++
++ check(br'\x3n\zr', '\u0404
\zr', r"invalid escape sequence '\z'")
++ check(br'\x3zr', '\u0404\zr', r"invalid escape sequence '\z'")
++ check(br'\x3z5', '\u0404\z5', r"invalid escape sequence '\z'")
++ check(memoryview(br'\x3z5x')[:-1], '\u0404\z5', r"invalid escape
sequence '\z'")
++ check(memoryview(br'\x3z5xy')[:-2], '\u0404\z5', r"invalid escape
sequence '\z'")
++
++ check(br'\x5n\z', '\u0404
\z', r"invalid escape sequence '\z'")
++ check(br'\x5z', '\u0404\z', r"invalid escape sequence '\z'")
++ check(memoryview(br'\x5zy')[:-1], '\u0404\z', r"invalid escape
sequence '\z'")
++
+ # issue32583
+ def test_crashing_decode_handler(self):
+ # better generating one more character to fill the extra space slot
+diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
+index 3520cc00a1a..74250ac3444 100644
+--- a/Lib/test/test_codecs.py
++++ b/Lib/test/test_codecs.py
+@@ -1178,20 +1178,32 @@ def test_escape(self):
+ check(br"[A]", b"[A]")
+ check(br"[\x41]", b"[A]")
+ check(br"[\x410]", b"[A0]")
++
++ def test_warnings(self):
++ decode = codecs.escape_decode
++ check = coding_checker(self, decode)
+ for i in range(97, 123):
+ b = bytes([i])
+ if b not in b'abfnrtvx':
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\%c'" % i):
+ check(b"\" + b, b"\" + b)
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\%c'" % (i-32)):
+ check(b"\" + b.upper(), b"\" + b.upper())
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\8'"):
+ check(br"\8", b"\8")
+ with self.assertWarns(DeprecationWarning):
+ check(br"\9", b"\9")
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\\xfa'") as cm:
+ check(b"\\xfa", b"\\xfa")
+
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\z'"):
++ self.assertEqual(decode(br'\x\z', 'ignore'), (b'\z', 4))
++
+ def test_errors(self):
+ decode = codecs.escape_decode
+ self.assertRaises(ValueError, decode, br"\x")
+@@ -2393,20 +2405,31 @@ def test_escape_decode(self):
+ check(br"[\x410]", "[A0]")
+ check(br"\u20ac", "\u20ac")
+ check(br"\U0001d120", "\U0001d120")
++
++ def test_decode_warnings(self):
++ decode = codecs.unicode_escape_decode
++ check = coding_checker(self, decode)
+ for i in range(97, 123):
+ b = bytes([i])
+ if b not in b'abfnrtuvx':
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\%c'" % i):
+ check(b"\" + b, "\" + chr(i))
+ if b.upper() not in b'UN':
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\%c'" % (i-32)):
+ check(b"\" + b.upper(), "\" + chr(i-32))
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\8'"):
+ check(br"\8", "\8")
+ with self.assertWarns(DeprecationWarning):
+ check(br"\9", "\9")
+- with self.assertWarns(DeprecationWarning):
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\\xfa'") as cm:
+ check(b"\\xfa", "\\xfa")
++ with self.assertWarnsRegex(DeprecationWarning,
++ r"invalid escape sequence '\z'"):
++ self.assertEqual(decode(br'\x\z', 'ignore'), ('\z', 4))
+
+ def test_decode_errors(self):
+ decode = codecs.unicode_escape_decode
+diff --git
a/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+new file mode 100644
+index 00000000000..39d2f1e1a89
+--- /dev/null
++++ b/Misc/NEWS.d/next/Security/2025-05-09-20-22-54.gh-issue-133767.kN2i3Q.rst
+@@ -0,0 +1,2 @@
++Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
++handler.
+diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
+index 25d9814dd6d..f684e2eb336 100644
+--- a/Objects/bytesobject.c
++++ b/Objects/bytesobject.c
+@@ -1060,10 +1060,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t
format_len,
+ }
+
+ /* Unescape a backslash-escaped string. */
+-PyObject *_PyBytes_DecodeEscape(const char *s,
++PyObject *_PyBytes_DecodeEscape2(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+- const char **first_invalid_escape)
++ int *first_invalid_escape_char,
++ const char **first_invalid_escape_ptr)
+ {
+ int c;
+ char *p;
+@@ -1077,7 +1078,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+ return NULL;
+ writer.overallocate = 1;
+
+- *first_invalid_escape = NULL;
++ *first_invalid_escape_char = -1;
++ *first_invalid_escape_ptr = NULL;
+
+ end = s + len;
+ while (s < end) {
+@@ -1152,9 +1154,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+ break;
+
+ default:
+- if (*first_invalid_escape == NULL) {
+- *first_invalid_escape = s-1; /* Back up one char, since we've
+- already incremented s. */
++ if (*first_invalid_escape_char == -1) {
++ *first_invalid_escape_char = (unsigned char)s[-1];
++ /* Back up one char, since we've already incremented s. */
++ *first_invalid_escape_ptr = s - 1;
+ }
+ *p++ = '\';
+ s--;
+@@ -1168,21 +1171,36 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+ return NULL;
+ }
+
++// Export for binary compatibility.
++PyObject *_PyBytes_DecodeEscape(const char *s,
++ Py_ssize_t len,
++ const char *errors,
++ const char **first_invalid_escape)
++{
++ int first_invalid_escape_char;
++ return _PyBytes_DecodeEscape2(
++ s, len, errors,
++ &first_invalid_escape_char,
++ first_invalid_escape);
++}
++
+ PyObject *PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+ Py_ssize_t Py_UNUSED(unicode),
+ const char *Py_UNUSED(recode_encoding))
+ {
+- const char* first_invalid_escape;
+- PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
+- &first_invalid_escape);
++ int first_invalid_escape_char;
++ const char *first_invalid_escape_ptr;
++ PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
++ &first_invalid_escape_char,
++ &first_invalid_escape_ptr);
+ if (result == NULL)
+ return NULL;
+- if (first_invalid_escape != NULL) {
++ if (first_invalid_escape_char != -1) {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\%c'",
+- (unsigned char)*first_invalid_escape) < 0) {
++ first_invalid_escape_char) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
+index bd08b198781..cd1a0130149 100644
+--- a/Objects/unicodeobject.c
++++ b/Objects/unicodeobject.c
+@@ -6278,20 +6278,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
+ static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
+
+ PyObject *
+-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
++_PyUnicode_DecodeUnicodeEscapeInternal2(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed,
+- const char **first_invalid_escape)
++ int *first_invalid_escape_char,
++ const char **first_invalid_escape_ptr)
+ {
+ const char *starts = s;
++ const char *initial_starts = starts;
+ _PyUnicodeWriter writer;
+ const char *end;
+ PyObject *errorHandler = NULL;
+ PyObject *exc = NULL;
+
+ // so we can remember if we've seen an invalid escape char or not
+- *first_invalid_escape = NULL;
++ *first_invalid_escape_char = -1;
++ *first_invalid_escape_ptr = NULL;
+
+ if (size == 0) {
+ if (consumed) {
+@@ -6474,9 +6477,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ goto error;
+
+ default:
+- if (*first_invalid_escape == NULL) {
+- *first_invalid_escape = s-1; /* Back up one char, since we've
+- already incremented s. */
++ if (*first_invalid_escape_char == -1) {
++ *first_invalid_escape_char = c;
++ if (starts == initial_starts) {
++ /* Back up one char, since we've already incremented s. */
++ *first_invalid_escape_ptr = s - 1;
++ }
+ }
+ WRITE_ASCII_CHAR('\');
+ WRITE_CHAR(c);
+@@ -6515,22 +6521,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
+ return NULL;
+ }
+
++// Export for binary compatibility.
++PyObject *
++_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
++ Py_ssize_t size,
++ const char *errors,
++ Py_ssize_t *consumed,
++ const char **first_invalid_escape)
++{
++ int first_invalid_escape_char;
++ return _PyUnicode_DecodeUnicodeEscapeInternal2(
++ s, size, errors, consumed,
++ &first_invalid_escape_char,
++ first_invalid_escape);
++}
++
+ PyObject *
+ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+ {
+- const char *first_invalid_escape;
+- PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
++ int first_invalid_escape_char;
++ const char *first_invalid_escape_ptr;
++ PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal2(s, size,
errors,
+ consumed,
+- &first_invalid_escape);
++
&first_invalid_escape_char,
++
&first_invalid_escape_ptr);
+ if (result == NULL)
+ return NULL;
+- if (first_invalid_escape != NULL) {
++ if (first_invalid_escape_char != -1) {
+ if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
+ "invalid escape sequence '\%c'",
+- (unsigned char)*first_invalid_escape) < 0) {
++ first_invalid_escape_char) < 0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+diff --git a/Python/ast.c b/Python/ast.c
+index 15a132b4e05..9df1313c103 100644
+--- a/Python/ast.c
++++ b/Python/ast.c
+@@ -119,12 +119,15 @@ decode_unicode_with_escapes(Parser *parser, const char
*s, size_t len, Token *t)
+ len = p - buf;
+ s = buf;
+
+- const char *first_invalid_escape;
+- v = _PyUnicode_DecodeUnicodeEscapeInternal(s, len, NULL, NULL,
&first_invalid_escape);
+-
+- if (v != NULL && first_invalid_escape != NULL) {
+- if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
+- /* We have not decref u before because first_invalid_escape points
++ int first_invalid_escape_char;
++ const char *first_invalid_escape_ptr;
++ v = _PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len, NULL,
NULL,
++ &first_invalid_escape_char,
++ &first_invalid_escape_ptr);
++
++ if (v != NULL && first_invalid_escape_ptr != NULL) {
++ if (warn_invalid_escape_sequence(c, n, *first_invalid_escape_ptr) <
0) {
++ /* We have not decref u before because first_invalid_escape_ptr
points
+ inside u. */
+ Py_XDECREF(u);
+ Py_DECREF(v);
+@@ -138,14 +141,16 @@ decode_unicode_with_escapes(Parser *parser, const char
*s, size_t len, Token *t)
+ decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
+ size_t len)
+ {
+- const char *first_invalid_escape;
+- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
+- &first_invalid_escape);
++ int first_invalid_escape_char;
++ const char *first_invalid_escape_ptr;
++ PyObject *result = _PyBytes_DecodeEscape2(s, len, NULL,
++ &first_invalid_escape_char,
++ &first_invalid_escape_ptr);
+ if (result == NULL)
+ return NULL;
+
+- if (first_invalid_escape != NULL) {
+- if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
++ if (first_invalid_escape_ptr != NULL) {
++ if (warn_invalid_escape_sequence(c, n, *first_invalid_escape_ptr) <
0) {
+ Py_DECREF(result);
+ return NULL;
+ }
+--
+2.51.0
+
diff --git
a/external/python3/0001-Cut-disused-recode_encoding-logic-in-_PyBytes_Decode.patch.1
b/external/python3/0001-Cut-disused-recode_encoding-logic-in-_PyBytes_Decode.patch.1
new file mode 100644
index 000000000000..68e0bcab0779
--- /dev/null
+++
b/external/python3/0001-Cut-disused-recode_encoding-logic-in-_PyBytes_Decode.patch.1
@@ -0,0 +1,168 @@
+From 3a4f66707e824ef3a8384827590ebaa6ca463dc0 Mon Sep 17 00:00:00 2001
+From: Greg Price <[email protected]>
+Date: Thu, 12 Sep 2019 11:12:22 -0700
+Subject: [PATCH] Cut disused recode_encoding logic in _PyBytes_DecodeEscape.
+ (GH-16013)
+
+All call sites pass NULL for `recode_encoding`, so this path is
+completely untested. That's been true since before Python 3.0.
+It adds significant complexity to this logic, so it's best to
+take it out.
+
+All call sites now have a literal NULL, and that's been true since
+commit 768921cf3 eliminated a conditional (`foo ? bar : NULL`) at
+the call site in Python/ast.c where we're parsing a bytes literal.
+But even before then, that condition `foo` had been a constant
+since unadorned string literals started meaning Unicode, in commit
+572dbf8f1 aka v3.0a1~1035 .
+
+The `unicode` parameter is already unused, so mark it as unused too.
+The code that acted on it was also taken out before Python 3.0, in
+commit 8d30cc014 aka v3.0a1~1031 .
+
+The function (PyBytes_DecodeEscape) is exposed in the API, but it's
+never been documented.
+---
+ Include/bytesobject.h | 4 +--
+ Include/longobject.h | 2 +-
+ Objects/bytesobject.c | 63 ++++---------------------------------------
+ Python/ast.c | 2 +-
+ 4 files changed, 8 insertions(+), 63 deletions(-)
+
+diff --git a/Include/bytesobject.h b/Include/bytesobject.h
+index 3fde4a221fd..fc9981e56d2 100644
+--- a/Include/bytesobject.h
++++ b/Include/bytesobject.h
+@@ -77,9 +77,7 @@ PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *,
Py_ssize_t,
+ #ifndef Py_LIMITED_API
+ /* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
+ PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
+- const char *, Py_ssize_t,
+- const char *,
+- const char **);
++ const char *, const char **);
+ #endif
+
+ /* Macro, trading safety for speed */
+diff --git a/Include/longobject.h b/Include/longobject.h
+index 1e7a58d994b..87b4d017d32 100644
+--- a/Include/longobject.h
++++ b/Include/longobject.h
+@@ -74,7 +74,7 @@ PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *);
+ #endif
+
+ /* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
+- _PyBytes_DecodeEscapeRecode(), etc. */
++ _PyBytes_DecodeEscape(), etc. */
+ #ifndef Py_LIMITED_API
+ PyAPI_DATA(unsigned char) _PyLong_DigitValue[256];
+ #endif
+diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
+index e1f5ee2f62f..4b2a77b4b8c 100644
+--- a/Objects/bytesobject.c
++++ b/Objects/bytesobject.c
+@@ -1077,52 +1077,10 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t
format_len,
+ return NULL;
+ }
+
+-/* Unescape a backslash-escaped string. If unicode is non-zero,
+- the string is a u-literal. If recode_encoding is non-zero,
+- the string is UTF-8 encoded and should be re-encoded in the
+- specified encoding. */
+-
+-static char *
+-_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
+- const char *errors, const char *recode_encoding,
+- _PyBytesWriter *writer, char *p)
+-{
+- PyObject *u, *w;
+- const char* t;
+-
+- t = *s;
+- /* Decode non-ASCII bytes as UTF-8. */
+- while (t < end && (*t & 0x80))
+- t++;
+- u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
+- if (u == NULL)
+- return NULL;
+-
+- /* Recode them in target encoding. */
+- w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
+- Py_DECREF(u);
+- if (w == NULL)
+- return NULL;
+- assert(PyBytes_Check(w));
+-
+- /* Append bytes to output buffer. */
+- writer->min_size--; /* subtract 1 preallocated byte */
+- p = _PyBytesWriter_WriteBytes(writer, p,
+- PyBytes_AS_STRING(w),
+- PyBytes_GET_SIZE(w));
+- Py_DECREF(w);
+- if (p == NULL)
+- return NULL;
+-
+- *s = t;
+- return p;
+-}
+-
++/* Unescape a backslash-escaped string. */
+ PyObject *_PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+- Py_ssize_t unicode,
+- const char *recode_encoding,
+ const char **first_invalid_escape)
+ {
+ int c;
+@@ -1142,17 +1100,7 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+ while (s < end) {
+ if (*s != '\') {
+ non_esc:
+- if (!(recode_encoding && (*s & 0x80))) {
+- *p++ = *s++;
+- }
+- else {
+- /* non-ASCII character and need to recode */
+- p = _PyBytes_DecodeEscapeRecode(&s, end,
+- errors, recode_encoding,
+- &writer, p);
+- if (p == NULL)
+- goto failed;
+- }
++ *p++ = *s++;
+ continue;
+ }
+
+@@ -1241,12 +1189,11 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
+ PyObject *PyBytes_DecodeEscape(const char *s,
+ Py_ssize_t len,
+ const char *errors,
+- Py_ssize_t unicode,
+- const char *recode_encoding)
++ Py_ssize_t Py_UNUSED(unicode),
++ const char *Py_UNUSED(recode_encoding))
+ {
+ const char* first_invalid_escape;
+- PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
+- recode_encoding,
++ PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+diff --git a/Python/ast.c b/Python/ast.c
+index e6f71671c18..05147a49fc1 100644
+--- a/Python/ast.c
++++ b/Python/ast.c
+@@ -4766,7 +4766,7 @@ decode_bytes_with_escapes(struct compiling *c, const
node *n, const char *s,
+ size_t len)
+ {
+ const char *first_invalid_escape;
+- PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
++ PyObject *result = _PyBytes_DecodeEscape(s, len, NULL,
+ &first_invalid_escape);
+ if (result == NULL)
+ return NULL;
+--
+2.51.0
+
diff --git a/external/python3/UnpackedTarball_python3.mk
b/external/python3/UnpackedTarball_python3.mk
index 94950537386c..58f47329a84e 100644
--- a/external/python3/UnpackedTarball_python3.mk
+++ b/external/python3/UnpackedTarball_python3.mk
@@ -23,6 +23,9 @@ $(eval $(call gb_UnpackedTarball_add_patches,python3,\
external/python3/ubsan.patch.0 \
external/python3/python-3.5.tweak.strip.soabi.patch \
external/python3/darwin.patch.0 \
+
external/python3/0001-3.9-bpo-45461-Fix-IncrementalDecoder-and-StreamReade.patch.1
\
+
external/python3/0001-Cut-disused-recode_encoding-logic-in-_PyBytes_Decode.patch.1
\
+
external/python3/0001-3.9-gh-133767-Fix-use-after-free-in-the-unicode-esca.patch.1
\
))
ifneq ($(filter DRAGONFLY FREEBSD LINUX NETBSD OPENBSD SOLARIS,$(OS)),)