https://github.com/python/cpython/commit/b89735625dff07005c31bdc86cbe7113ef1b59d0
commit: b89735625dff07005c31bdc86cbe7113ef1b59d0
branch: main
author: Serhiy Storchaka <[email protected]>
committer: serhiy-storchaka <[email protected]>
date: 2026-05-02T12:04:05+03:00
summary:

gh-148914: Fix memoization of in-band PickleBuffer in the Python implementation 
(GH-149052)

Previously, identical PickleBuffers did not preserve identity.
Also, empty writable PickleBuffer memoized an empty bytearray object
in place of b'' which is a singleton in CPython, so the following
references to b'' were unpickled as an empty bytearray object.

files:
A Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst
M Lib/pickle.py
M Lib/test/pickletester.py

diff --git a/Lib/pickle.py b/Lib/pickle.py
index 3e7cf25cb05337..95836afdc2b43e 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -920,17 +920,11 @@ def save_picklebuffer(self, obj):
                     # Write data in-band
                     # XXX The C implementation avoids a copy here
                     buf = m.tobytes()
-                    in_memo = id(buf) in self.memo
                     if m.readonly:
-                        if in_memo:
-                            self._save_bytes_no_memo(buf)
-                        else:
-                            self.save_bytes(buf)
+                        self._save_bytes_no_memo(buf)
                     else:
-                        if in_memo:
-                            self._save_bytearray_no_memo(buf)
-                        else:
-                            self.save_bytearray(buf)
+                        self._save_bytearray_no_memo(buf)
+                    self.memoize(obj)
                 else:
                     # Write data out-of-band
                     self.write(NEXT_BUFFER)
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index c2018c9785b9b3..9ba498ce8f575d 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3100,6 +3100,51 @@ def test_bytearray_memoization(self):
                         self.assertIsNot(b2a, b2b)
                         self.assert_is_copy(b2a, b2b)
 
+    def test_picklebuffer_memoization(self):
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        array_types = [bytes, bytearray]
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for array_type in array_types:
+                for s in b'', b'xyz', b'xyz'*100:
+                    with self.subTest(proto=proto, array_type=array_type, s=s, 
independent=False):
+                        b = pickle.PickleBuffer(array_type(s))
+                        p = self.dumps((b, b), proto)
+                        b1, b2 = self.loads(p)
+                        self.assertIs(b1, b2)
+
+                    with self.subTest(proto=proto, array_type=array_type, s=s, 
independent=True):
+                        b = array_type(s)
+                        b1a = pickle.PickleBuffer(b)
+                        b2a = pickle.PickleBuffer(b)
+                        p = self.dumps((b1a, b2a), proto)
+                        b1b, b2b = self.loads(p)
+                        if array_type is not bytes:
+                            self.assertIsNot(b1b, b2b)
+                        self.assert_is_copy(b1b, b)
+                        self.assert_is_copy(b2b, b)
+
+    def test_empty_picklebuffer_memoization(self):
+        # gh-148914: Empty writable PickleBuffer memoized an empty bytearray
+        # with the id of b'' (a singleton in CPython).
+        if self.py_version < (3, 8):
+            self.skipTest('not supported in Python < 3.8')
+        for proto in range(5, pickle.HIGHEST_PROTOCOL + 1):
+            for readonly in False, True:
+                with self.subTest(proto=proto, readonly=readonly):
+                    b = b''
+                    ba = bytearray()
+                    buf = pickle.PickleBuffer(b if readonly else ba)
+                    p = self.dumps((buf, b, ba), proto)
+                    buf, b, ba = self.loads(p)
+                    array_type = bytes if readonly else bytearray
+                    self.assertIsInstance(buf, array_type)
+                    self.assertIsInstance(b, bytes)
+                    self.assertIsInstance(ba, bytearray)
+                    self.assertEqual(buf, b'')
+                    self.assertEqual(b, b'')
+                    self.assertEqual(ba, b'')
+
     def test_ints(self):
         for proto in protocols:
             n = sys.maxsize
diff --git 
a/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst 
b/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst
new file mode 100644
index 00000000000000..8348aad0d892c3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2026-04-27-17-12-11.gh-issue-148914.i5C3kW.rst
@@ -0,0 +1,6 @@
+Fix memoization of in-band :class:`~pickle.PickleBuffer` in the Python
+implementation of :mod:`pickle`. Previously, identical
+:class:`!PickleBuffer`\ s did not preserve identity, and empty writable
+:class:`!PickleBuffer` memoized an empty bytearray object in place of
+``b''``, so the following references to ``b''`` were unpickled as an empty
+bytearray object.

_______________________________________________
Python-checkins mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://mail.python.org/mailman3//lists/python-checkins.python.org
Member address: [email protected]

Reply via email to