diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index e91acc065ba9ae..cde697e3dc7ab0 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -31,7 +31,7 @@ from . import _meta from ._collections import FreezableDefaultDict, Pair -from ._functools import method_cache, pass_none +from ._functools import method_cache, noop, pass_none, passthrough from ._itertools import always_iterable, bucket, unique_everseen from ._meta import PackageMetadata, SimplePath from ._typing import md_none @@ -783,6 +783,20 @@ def find_distributions(self, context=Context()) -> Iterable[Distribution]: """ +@passthrough +def _clear_after_fork(cached): + """Ensure ``func`` clears cached state after ``fork`` when supported. + + ``FastPath`` caches zip-backed ``pathlib.Path`` objects that retain a + reference to the parent's open ``ZipFile`` handle. Re-using a cached + instance in a forked child can therefore resurrect invalid file pointers + and trigger ``BadZipFile``/``OSError`` failures (python/importlib_metadata#520). + Registering ``cache_clear`` with ``os.register_at_fork`` keeps each process + on its own cache. + """ + getattr(os, 'register_at_fork', noop)(after_in_child=cached.cache_clear) + + class FastPath: """ Micro-optimized class for searching a root for children. @@ -799,7 +813,8 @@ class FastPath: True """ - @functools.lru_cache() # type: ignore[misc] + @_clear_after_fork # type: ignore[misc] + @functools.lru_cache() def __new__(cls, root): return super().__new__(cls) @@ -925,10 +940,12 @@ def __init__(self, name: str | None): def normalize(name): """ PEP 503 normalization plus dashes as underscores. + + Specifically avoids ``re.sub`` as prescribed for performance + benefits (see python/cpython#143658). """ - # Much faster than re.sub, and even faster than str.translate value = name.lower().replace("-", "_").replace(".", "_") - # Condense repeats (faster than regex) + # Condense repeats while "__" in value: value = value.replace("__", "_") return value diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py index f5b30dd92cde69..dede395d79a38b 100644 --- a/Lib/importlib/metadata/_adapters.py +++ b/Lib/importlib/metadata/_adapters.py @@ -9,7 +9,8 @@ class RawPolicy(email.policy.EmailPolicy): def fold(self, name, value): folded = self.linesep.join( - textwrap.indent(value, prefix=' ' * 8, predicate=lambda line: True) + textwrap + .indent(value, prefix=' ' * 8, predicate=lambda line: True) .lstrip() .splitlines() ) diff --git a/Lib/importlib/metadata/_functools.py b/Lib/importlib/metadata/_functools.py index 5dda6a2199ad0b..c159b46e48959c 100644 --- a/Lib/importlib/metadata/_functools.py +++ b/Lib/importlib/metadata/_functools.py @@ -1,5 +1,7 @@ import functools import types +from collections.abc import Callable +from typing import TypeVar # from jaraco.functools 3.3 @@ -102,3 +104,33 @@ def wrapper(param, *args, **kwargs): return func(param, *args, **kwargs) return wrapper + + +# From jaraco.functools 4.4 +def noop(*args, **kwargs): + """ + A no-operation function that does nothing. + + >>> noop(1, 2, three=3) + """ + + +_T = TypeVar('_T') + + +# From jaraco.functools 4.4 +def passthrough(func: Callable[..., object]) -> Callable[[_T], _T]: + """ + Wrap the function to always return the first parameter. + + >>> passthrough(print)('3') + 3 + '3' + """ + + @functools.wraps(func) + def wrapper(first: _T, *args, **kwargs) -> _T: + func(first, *args, **kwargs) + return first + + return wrapper # type: ignore[return-value] diff --git a/Lib/test/test_importlib/metadata/fixtures.py b/Lib/test/test_importlib/metadata/fixtures.py index ad0ab42e089a9d..3283697d418188 100644 --- a/Lib/test/test_importlib/metadata/fixtures.py +++ b/Lib/test/test_importlib/metadata/fixtures.py @@ -6,6 +6,7 @@ import shutil import sys import textwrap +from importlib import resources from test.support import import_helper from test.support import os_helper @@ -14,11 +15,6 @@ from . import _path from ._path import FilesSpec -if sys.version_info >= (3, 9): - from importlib import resources -else: - import importlib_resources as resources - @contextlib.contextmanager def tmp_path(): @@ -374,8 +370,6 @@ def setUp(self): # Add self.zip_name to the front of sys.path. self.resources = contextlib.ExitStack() self.addCleanup(self.resources.close) - # workaround for #138313 - self.addCleanup(lambda: None) def parameterize(*args_set): diff --git a/Lib/test/test_importlib/metadata/test_api.py b/Lib/test/test_importlib/metadata/test_api.py index 3c856a88b77bf6..5449f0484492fb 100644 --- a/Lib/test/test_importlib/metadata/test_api.py +++ b/Lib/test/test_importlib/metadata/test_api.py @@ -317,33 +317,31 @@ def test_invalidate_cache(self): class PreparedTests(unittest.TestCase): - def test_normalize(self): - tests = [ - # Simple - ("sample", "sample"), - # Mixed case - ("Sample", "sample"), - ("SAMPLE", "sample"), - ("SaMpLe", "sample"), - # Separator conversions - ("sample-pkg", "sample_pkg"), - ("sample.pkg", "sample_pkg"), - ("sample_pkg", "sample_pkg"), - # Multiple separators - ("sample---pkg", "sample_pkg"), - ("sample___pkg", "sample_pkg"), - ("sample...pkg", "sample_pkg"), - # Mixed separators - ("sample-._pkg", "sample_pkg"), - ("sample_.-pkg", "sample_pkg"), - # Complex - ("Sample__Pkg-name.foo", "sample_pkg_name_foo"), - ("Sample__Pkg.name__foo", "sample_pkg_name_foo"), - # Uppercase with separators - ("SAMPLE-PKG", "sample_pkg"), - ("Sample.Pkg", "sample_pkg"), - ("SAMPLE_PKG", "sample_pkg"), - ] - for name, expected in tests: - with self.subTest(name=name): - self.assertEqual(Prepared.normalize(name), expected) + @fixtures.parameterize( + # Simple + dict(input='sample', expected='sample'), + # Mixed case + dict(input='Sample', expected='sample'), + dict(input='SAMPLE', expected='sample'), + dict(input='SaMpLe', expected='sample'), + # Separator conversions + dict(input='sample-pkg', expected='sample_pkg'), + dict(input='sample.pkg', expected='sample_pkg'), + dict(input='sample_pkg', expected='sample_pkg'), + # Multiple separators + dict(input='sample---pkg', expected='sample_pkg'), + dict(input='sample___pkg', expected='sample_pkg'), + dict(input='sample...pkg', expected='sample_pkg'), + # Mixed separators + dict(input='sample-._pkg', expected='sample_pkg'), + dict(input='sample_.-pkg', expected='sample_pkg'), + # Complex + dict(input='Sample__Pkg-name.foo', expected='sample_pkg_name_foo'), + dict(input='Sample__Pkg.name__foo', expected='sample_pkg_name_foo'), + # Uppercase with separators + dict(input='SAMPLE-PKG', expected='sample_pkg'), + dict(input='Sample.Pkg', expected='sample_pkg'), + dict(input='SAMPLE_PKG', expected='sample_pkg'), + ) + def test_normalize(self, input, expected): + self.assertEqual(Prepared.normalize(input), expected) diff --git a/Lib/test/test_importlib/metadata/test_main.py b/Lib/test/test_importlib/metadata/test_main.py index 83b686babfdb7a..f6c4ab2e78fe47 100644 --- a/Lib/test/test_importlib/metadata/test_main.py +++ b/Lib/test/test_importlib/metadata/test_main.py @@ -2,12 +2,12 @@ import pickle import re import unittest -from test.support import os_helper try: import pyfakefs.fake_filesystem_unittest as ffs except ImportError: from .stubs import fake_filesystem_unittest as ffs +from test.support import os_helper from importlib.metadata import ( Distribution, diff --git a/Lib/test/test_importlib/metadata/test_zip.py b/Lib/test/test_importlib/metadata/test_zip.py index fcb649f3736076..9daa04173b843e 100644 --- a/Lib/test/test_importlib/metadata/test_zip.py +++ b/Lib/test/test_importlib/metadata/test_zip.py @@ -1,7 +1,12 @@ +import multiprocessing +import os import sys import unittest +from test.support import warnings_helper + from importlib.metadata import ( + FastPath, PackageNotFoundError, distribution, distributions, @@ -47,6 +52,38 @@ def test_one_distribution(self): dists = list(distributions(path=sys.path[:1])) assert len(dists) == 1 + @warnings_helper.ignore_fork_in_thread_deprecation_warnings() + @unittest.skipUnless( + hasattr(os, 'register_at_fork') + and 'fork' in multiprocessing.get_all_start_methods(), + 'requires fork-based multiprocessing support', + ) + def test_fastpath_cache_cleared_in_forked_child(self): + zip_path = sys.path[0] + + FastPath(zip_path) + assert FastPath.__new__.cache_info().currsize >= 1 + + ctx = multiprocessing.get_context('fork') + parent_conn, child_conn = ctx.Pipe() + + def child(conn, root): + try: + before = FastPath.__new__.cache_info().currsize + FastPath(root) + after = FastPath.__new__.cache_info().currsize + conn.send((before, after)) + finally: + conn.close() + + proc = ctx.Process(target=child, args=(child_conn, zip_path)) + proc.start() + child_conn.close() + cache_sizes = parent_conn.recv() + proc.join() + + self.assertEqual(cache_sizes, (0, 1)) + class TestEgg(TestZip): def setUp(self): diff --git a/Misc/NEWS.d/next/Library/2026-03-20-14-53-00.gh-issue-146228.OJVEDL.rst b/Misc/NEWS.d/next/Library/2026-03-20-14-53-00.gh-issue-146228.OJVEDL.rst new file mode 100644 index 00000000000000..1356e2ca07d9ae --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-03-20-14-53-00.gh-issue-146228.OJVEDL.rst @@ -0,0 +1,2 @@ +Cached FastPath objects in importlib.metadata are now cleared on fork, +avoiding broken references to zip files during fork.