From 6261c8633a6b1169d9da1e90c0fee13916fe2897 Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Thu, 26 Feb 2026 16:21:30 +0000 Subject: [PATCH 1/6] gh-145261: Fix ShareableList corruption for multi-byte strings and bytes with trailing nulls ShareableList had two bugs: 1. Used character count len(item) instead of byte count len(item.encode('utf-8')) for string slot allocation, causing UnicodeDecodeError with multi-byte UTF-8 characters. 2. Used rstrip(b'\x00') to recover bytes values, which stripped legitimate trailing null bytes. Fix uses UTF-8 byte length for string allocation and stores the actual byte length in the format metadata for bytes values, so retrieval reads exactly the right number of bytes without needing rstrip. --- Lib/multiprocessing/shared_memory.py | 14 ++++++++++---- Lib/test/_test_multiprocessing.py | 5 ++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 99a8ce3320ad4e..0e1aca0783e2b9 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -287,7 +287,7 @@ class ShareableList: _back_transforms_mapping = { 0: lambda value: value, # int, float, bool 1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str - 2: lambda value: value.rstrip(b'\x00'), # bytes + 2: lambda value: value, # bytes 3: lambda _value: None, # None } @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None): self._types_mapping[type(item)] if not isinstance(item, (str, bytes)) else self._types_mapping[type(item)] % ( - self._alignment * (len(item) // self._alignment + 1), + self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1), ) for item in sequence ] @@ -355,11 +355,16 @@ def __init__(self, sequence=None, *, name=None): self._offset_data_start, *(v.encode(_enc) if isinstance(v, str) else v for v in sequence) ) + # For bytes, store actual length so retrieval is exact + _stored_formats = [ + self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f + for v, f in zip(sequence, _formats) + ] struct.pack_into( self._format_packing_metainfo, self.shm.buf, self._offset_packing_formats, - *(v.encode(_enc) for v in _formats) + *(v.encode(_enc) for v in _stored_formats) ) struct.pack_into( self._format_back_transform_codes, @@ -476,7 +481,8 @@ def __setitem__(self, position, value): self._set_packing_format_and_transform( position, - new_format, + self._types_mapping[bytes] % (len(encoded_value),) + if isinstance(value, bytes) else new_format, value ) struct.pack_into(new_format, self.shm.buf, offset, encoded_value) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index cc07062eee6f98..8590cb1a648771 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(current_format, sl._get_packing_format(0)) # Verify attributes are readable. - self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q') # Exercise len(). self.assertEqual(len(sl), 7) @@ -4785,7 +4785,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(sl[3], 42) sl[4] = 'some' # Change type at a given position. self.assertEqual(sl[4], 'some') - self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q') + self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q') with self.assertRaisesRegex(ValueError, "exceeds available storage"): sl[4] = 'far too many' @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self): self.assertNotEqual(sl.shm.name, sl_copy.shm.name) self.assertEqual(name_duplicate, sl_copy.shm.name) self.assertEqual(list(sl), list(sl_copy)) - self.assertEqual(sl.format, sl_copy.format) sl_copy[-1] = 77 self.assertEqual(sl_copy[-1], 77) self.assertNotEqual(sl[-1], 77) From 34003c6f3317c199972eed5a1950158080f8314a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:41:16 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst diff --git a/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst new file mode 100644 index 00000000000000..6651d1b0858508 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-26-17-41-14.gh-issue-145261.Wce9Dh.rst @@ -0,0 +1,4 @@ +Fix :class:`~multiprocessing.shared_memory.ShareableList` corrupting +multi-byte UTF-8 strings due to using character count instead of byte count +for slot allocation, and stripping legitimate trailing null bytes from +``bytes`` values. From 3f874bc648b58eead1857d1e1f4baff6fdc478e3 Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Fri, 27 Feb 2026 13:44:59 +0000 Subject: [PATCH 3/6] docs: Update ShareableList documentation for fixed trailing null bug The bug where ShareableList stripped trailing null bytes has been fixed in Python 3.15. Update documentation to: - Note the fix with versionchanged directive - Update doctest to show correct behavior (nulls preserved) - Clarify workaround is only needed for Python 3.14 and earlier - Reference both original issue #106939 and fix issue #145261 Fixes failing doctest in CI where expected output showed old buggy behavior instead of corrected behavior. --- Doc/library/multiprocessing.shared_memory.rst | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index e8f04a6ac7b95d..3d1261f2ef93d1 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -313,26 +313,33 @@ finishes execution. .. note:: - A known issue exists for :class:`bytes` and :class:`str` values. - If they end with ``\x00`` nul bytes or characters, those may be + .. versionchanged:: 3.15 + Fixed a bug where :class:`bytes` and :class:`str` values ending with + ``\x00`` nul bytes or characters were silently stripped when fetching + them by index. Trailing nulls are now preserved correctly. + See :gh:`106939` and :gh:`145261`. + + In Python 3.14 and earlier, a bug exists where :class:`bytes` and + :class:`str` values ending with ``\x00`` nul bytes or characters may be *silently stripped* when fetching them by index from the - :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is - considered a bug and may go away in the future. See :gh:`106939`. + :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior has been + fixed in Python 3.15. - For applications where rstripping of trailing nulls is a problem, - work around it by always unconditionally appending an extra non-0 - byte to the end of such values when storing and unconditionally - removing it when fetching: + For applications that need to work with Python 3.14 and earlier where + rstripping of trailing nulls is a problem, work around it by always + unconditionally appending an extra non-0 byte to the end of such values + when storing and unconditionally removing it when fetching: .. doctest:: >>> from multiprocessing import shared_memory >>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) >>> nul_bug_demo[0] - '?' + '?\x00' >>> nul_bug_demo[1] - b'\x03\x02\x01' + b'\x03\x02\x01\x00\x00\x00' >>> nul_bug_demo.shm.unlink() + >>> # Workaround for Python 3.14 and earlier (not needed in 3.15+): >>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07']) >>> padded[0][:-1] '?\x00' From 254230f65f5cc673d6f619cbe65116a07428fd73 Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Fri, 27 Feb 2026 14:08:50 +0000 Subject: [PATCH 4/6] fix: Also preserve trailing nulls in strings, not just bytes Extended the fix to remove rstrip from strings as well and store actual byte lengths for both strings and bytes in format metadata. --- Lib/multiprocessing/shared_memory.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Lib/multiprocessing/shared_memory.py b/Lib/multiprocessing/shared_memory.py index 0e1aca0783e2b9..80fe66a1f86cb9 100644 --- a/Lib/multiprocessing/shared_memory.py +++ b/Lib/multiprocessing/shared_memory.py @@ -286,7 +286,7 @@ class ShareableList: _alignment = 8 _back_transforms_mapping = { 0: lambda value: value, # int, float, bool - 1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str + 1: lambda value: value.decode(_encoding), # str 2: lambda value: value, # bytes 3: lambda _value: None, # None } @@ -355,9 +355,11 @@ def __init__(self, sequence=None, *, name=None): self._offset_data_start, *(v.encode(_enc) if isinstance(v, str) else v for v in sequence) ) - # For bytes, store actual length so retrieval is exact + # For bytes and str, store actual byte length so retrieval is exact _stored_formats = [ - self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) else f + (self._types_mapping[str] % (len(v.encode(_enc)),) if isinstance(v, str) + else self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes) + else f) for v, f in zip(sequence, _formats) ] struct.pack_into( @@ -481,8 +483,9 @@ def __setitem__(self, position, value): self._set_packing_format_and_transform( position, - self._types_mapping[bytes] % (len(encoded_value),) - if isinstance(value, bytes) else new_format, + (self._types_mapping[bytes] % (len(encoded_value),) if isinstance(value, bytes) + else self._types_mapping[str] % (len(encoded_value),) if isinstance(value, str) + else new_format), value ) struct.pack_into(new_format, self.shm.buf, offset, encoded_value) From 2c0935c1793f7ed91fdf9238904da0e2875f626a Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Fri, 27 Feb 2026 14:08:54 +0000 Subject: [PATCH 5/6] test: Update ShareableList tests to reflect actual byte lengths Updated format string assertions and test data to match the new behavior where strings are stored with their actual UTF-8 byte length instead of being padded to 8 bytes minimum. --- Lib/test/_test_multiprocessing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 8590cb1a648771..991165b950cf3e 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(current_format, sl._get_packing_format(0)) # Verify attributes are readable. - self.assertEqual(sl.format, '8s5sdqxxxxxx?xxxxxxxx?q') + self.assertEqual(sl.format, '5s5sdqxxxxxx?xxxxxxxx?q') # Exercise len(). self.assertEqual(len(sl), 7) @@ -4785,17 +4785,17 @@ def test_shared_memory_ShareableList_basics(self): self.assertEqual(sl[3], 42) sl[4] = 'some' # Change type at a given position. self.assertEqual(sl[4], 'some') - self.assertEqual(sl.format, '8s5sdq8sxxxxxxx?q') + self.assertEqual(sl.format, '5s5sdq4sxxxxxxx?q') with self.assertRaisesRegex(ValueError, "exceeds available storage"): sl[4] = 'far too many' self.assertEqual(sl[4], 'some') - sl[0] = 'encodés' # Exactly 8 bytes of UTF-8 data - self.assertEqual(sl[0], 'encodés') + sl[0] = 'hello' # Exactly 5 bytes of UTF-8 data + self.assertEqual(sl[0], 'hello') self.assertEqual(sl[1], b'HoWdY') # no spillage with self.assertRaisesRegex(ValueError, "exceeds available storage"): - sl[0] = 'encodées' # Exactly 9 bytes of UTF-8 data + sl[0] = 'hëllöö' # Exactly 8 bytes of UTF-8 data self.assertEqual(sl[1], b'HoWdY') with self.assertRaisesRegex(ValueError, "exceeds available storage"): From eb4ce8ab4271e01f625bbeadf7ba5d0f346936be Mon Sep 17 00:00:00 2001 From: Stefan Zetzsche Date: Fri, 27 Feb 2026 14:08:59 +0000 Subject: [PATCH 6/6] docs: Update ShareableList documentation to reflect trailing null preservation Added versionchanged directive for Python 3.15 noting that trailing null bytes are now preserved in both strings and bytes. Updated doctest example to show correct behavior and clarified workaround is only needed for 3.14 and earlier. --- Doc/library/multiprocessing.shared_memory.rst | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index 3d1261f2ef93d1..533b2e00253c53 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -311,19 +311,17 @@ finishes execution. existing :class:`!ShareableList`, specify its shared memory block's unique name while leaving *sequence* set to ``None``. - .. note:: + .. versionchanged:: 3.15 + Trailing null bytes (``\x00``) in :class:`bytes` and :class:`str` values + are now preserved correctly. See :gh:`106939` and :gh:`145261`. - .. versionchanged:: 3.15 - Fixed a bug where :class:`bytes` and :class:`str` values ending with - ``\x00`` nul bytes or characters were silently stripped when fetching - them by index. Trailing nulls are now preserved correctly. - See :gh:`106939` and :gh:`145261`. + .. note:: - In Python 3.14 and earlier, a bug exists where :class:`bytes` and - :class:`str` values ending with ``\x00`` nul bytes or characters may be - *silently stripped* when fetching them by index from the - :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior has been - fixed in Python 3.15. + In Python 3.14 and earlier, a known issue exists for :class:`bytes` and + :class:`str` values. If they end with ``\x00`` nul bytes or characters, + those may be *silently stripped* when fetching them by index from the + :class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior was fixed + in Python 3.15. For applications that need to work with Python 3.14 and earlier where rstripping of trailing nulls is a problem, work around it by always @@ -333,13 +331,14 @@ finishes execution. .. doctest:: >>> from multiprocessing import shared_memory - >>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) - >>> nul_bug_demo[0] + >>> # Python 3.15+: trailing nulls are preserved + >>> sl = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00']) + >>> sl[0] '?\x00' - >>> nul_bug_demo[1] + >>> sl[1] b'\x03\x02\x01\x00\x00\x00' - >>> nul_bug_demo.shm.unlink() - >>> # Workaround for Python 3.14 and earlier (not needed in 3.15+): + >>> sl.shm.unlink() + >>> # Workaround for Python 3.14 and earlier: >>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07']) >>> padded[0][:-1] '?\x00'