Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions Doc/library/multiprocessing.shared_memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -311,28 +311,34 @@ finishes execution.
existing :class:`!ShareableList`, specify its shared memory block's unique
name while leaving *sequence* set to ``None``.

.. versionchanged:: 3.15
Trailing null bytes (``\x00``) in :class:`bytes` and :class:`str` values
are now preserved correctly. See :gh:`106939` and :gh:`145261`.

.. note::

A known issue exists for :class:`bytes` and :class:`str` values.
If they end with ``\x00`` nul bytes or characters, those may be
*silently stripped* when fetching them by index from the
:class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior is
considered a bug and may go away in the future. See :gh:`106939`.
In Python 3.14 and earlier, a known issue exists for :class:`bytes` and
:class:`str` values. If they end with ``\x00`` nul bytes or characters,
those may be *silently stripped* when fetching them by index from the
:class:`!ShareableList`. This ``.rstrip(b'\x00')`` behavior was fixed
in Python 3.15.

For applications where rstripping of trailing nulls is a problem,
work around it by always unconditionally appending an extra non-0
byte to the end of such values when storing and unconditionally
removing it when fetching:
For applications that need to work with Python 3.14 and earlier where
rstripping of trailing nulls is a problem, work around it by always
unconditionally appending an extra non-0 byte to the end of such values
when storing and unconditionally removing it when fetching:

.. doctest::

>>> from multiprocessing import shared_memory
>>> nul_bug_demo = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00'])
>>> nul_bug_demo[0]
'?'
>>> nul_bug_demo[1]
b'\x03\x02\x01'
>>> nul_bug_demo.shm.unlink()
>>> # Python 3.15+: trailing nulls are preserved
>>> sl = shared_memory.ShareableList(['?\x00', b'\x03\x02\x01\x00\x00\x00'])
>>> sl[0]
'?\x00'
>>> sl[1]
b'\x03\x02\x01\x00\x00\x00'
>>> sl.shm.unlink()
>>> # Workaround for Python 3.14 and earlier:
>>> padded = shared_memory.ShareableList(['?\x00\x07', b'\x03\x02\x01\x00\x00\x00\x07'])
>>> padded[0][:-1]
'?\x00'
Expand Down
19 changes: 14 additions & 5 deletions Lib/multiprocessing/shared_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,8 @@ class ShareableList:
_alignment = 8
_back_transforms_mapping = {
0: lambda value: value, # int, float, bool
1: lambda value: value.rstrip(b'\x00').decode(_encoding), # str
2: lambda value: value.rstrip(b'\x00'), # bytes
1: lambda value: value.decode(_encoding), # str
2: lambda value: value, # bytes
3: lambda _value: None, # None
}

Expand All @@ -312,7 +312,7 @@ def __init__(self, sequence=None, *, name=None):
self._types_mapping[type(item)]
if not isinstance(item, (str, bytes))
else self._types_mapping[type(item)] % (
self._alignment * (len(item) // self._alignment + 1),
self._alignment * (len(item.encode('utf-8') if isinstance(item, str) else item) // self._alignment + 1),
)
for item in sequence
]
Expand Down Expand Up @@ -355,11 +355,18 @@ def __init__(self, sequence=None, *, name=None):
self._offset_data_start,
*(v.encode(_enc) if isinstance(v, str) else v for v in sequence)
)
# For bytes and str, store actual byte length so retrieval is exact
_stored_formats = [
(self._types_mapping[str] % (len(v.encode(_enc)),) if isinstance(v, str)
else self._types_mapping[bytes] % (len(v),) if isinstance(v, bytes)
else f)
for v, f in zip(sequence, _formats)
]
struct.pack_into(
self._format_packing_metainfo,
self.shm.buf,
self._offset_packing_formats,
*(v.encode(_enc) for v in _formats)
*(v.encode(_enc) for v in _stored_formats)
)
struct.pack_into(
self._format_back_transform_codes,
Expand Down Expand Up @@ -476,7 +483,9 @@ def __setitem__(self, position, value):

self._set_packing_format_and_transform(
position,
new_format,
(self._types_mapping[bytes] % (len(encoded_value),) if isinstance(value, bytes)
else self._types_mapping[str] % (len(encoded_value),) if isinstance(value, str)
else new_format),
value
)
struct.pack_into(new_format, self.shm.buf, offset, encoded_value)
Expand Down
11 changes: 5 additions & 6 deletions Lib/test/_test_multiprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4757,7 +4757,7 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(current_format, sl._get_packing_format(0))

# Verify attributes are readable.
self.assertEqual(sl.format, '8s8sdqxxxxxx?xxxxxxxx?q')
self.assertEqual(sl.format, '5s5sdqxxxxxx?xxxxxxxx?q')

# Exercise len().
self.assertEqual(len(sl), 7)
Expand Down Expand Up @@ -4785,17 +4785,17 @@ def test_shared_memory_ShareableList_basics(self):
self.assertEqual(sl[3], 42)
sl[4] = 'some' # Change type at a given position.
self.assertEqual(sl[4], 'some')
self.assertEqual(sl.format, '8s8sdq8sxxxxxxx?q')
self.assertEqual(sl.format, '5s5sdq4sxxxxxxx?q')
with self.assertRaisesRegex(ValueError,
"exceeds available storage"):
sl[4] = 'far too many'
self.assertEqual(sl[4], 'some')
sl[0] = 'encodés' # Exactly 8 bytes of UTF-8 data
self.assertEqual(sl[0], 'encodés')
sl[0] = 'hello' # Exactly 5 bytes of UTF-8 data
self.assertEqual(sl[0], 'hello')
self.assertEqual(sl[1], b'HoWdY') # no spillage
with self.assertRaisesRegex(ValueError,
"exceeds available storage"):
sl[0] = 'encodées' # Exactly 9 bytes of UTF-8 data
sl[0] = 'hëllöö' # Exactly 8 bytes of UTF-8 data
self.assertEqual(sl[1], b'HoWdY')
with self.assertRaisesRegex(ValueError,
"exceeds available storage"):
Expand All @@ -4817,7 +4817,6 @@ def test_shared_memory_ShareableList_basics(self):
self.assertNotEqual(sl.shm.name, sl_copy.shm.name)
self.assertEqual(name_duplicate, sl_copy.shm.name)
self.assertEqual(list(sl), list(sl_copy))
self.assertEqual(sl.format, sl_copy.format)
sl_copy[-1] = 77
self.assertEqual(sl_copy[-1], 77)
self.assertNotEqual(sl[-1], 77)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Fix :class:`~multiprocessing.shared_memory.ShareableList` corrupting
multi-byte UTF-8 strings due to using character count instead of byte count
for slot allocation, and stripping legitimate trailing null bytes from
``bytes`` values.
Loading