From 77028cd9a13e4e8b745afa0890953e125b2a6bcf Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Tue, 7 Apr 2026 01:02:39 +0000 Subject: [PATCH] gh-144503: Pass sys.argv to forkserver as real argv elements Avoid embedding the parent's sys.argv into the forkserver -c command string via repr(). When sys.argv is large (e.g. thousands of file paths from a pre-commit hook), the resulting single argument could exceed the OS per-argument length limit (MAX_ARG_STRLEN on Linux, typically 128 KiB), causing posix_spawn to fail and the parent to observe a BrokenPipeError. Instead, append the argv entries as separate command-line arguments after -c; the forkserver child reads them back as sys.argv[1:]. This cannot exceed any limit the parent itself did not already satisfy. Regression introduced by gh-143706 / 298d5440eb8. --- Lib/multiprocessing/forkserver.py | 15 +++- Lib/test/_test_multiprocessing.py | 71 ++++++++++++++++++- Lib/test/mp_preload_large_sysargv.py | 30 ++++++++ ...-04-07-01-04-00.gh-issue-144503.argvfs.rst | 6 ++ 4 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 Lib/test/mp_preload_large_sysargv.py create mode 100644 Misc/NEWS.d/next/Library/2026-04-07-01-04-00.gh-issue-144503.argvfs.rst diff --git a/Lib/multiprocessing/forkserver.py b/Lib/multiprocessing/forkserver.py index d89b24ac59bec0..b204a3ad0f1f01 100644 --- a/Lib/multiprocessing/forkserver.py +++ b/Lib/multiprocessing/forkserver.py @@ -162,10 +162,17 @@ def ensure_running(self): self._forkserver_alive_fd = None self._forkserver_pid = None - cmd = ('from multiprocessing.forkserver import main; ' + - 'main(%d, %d, %r, **%r)') + # gh-144503: sys_argv is passed as real argv elements after the + # ``-c cmd`` rather than repr'd into main_kws so that a large + # parent sys.argv cannot push the single ``-c`` command string + # over the OS per-argument length limit (MAX_ARG_STRLEN on Linux). + # The child sees them as sys.argv[1:]. + cmd = ('import sys; ' + 'from multiprocessing.forkserver import main; ' + 'main(%d, %d, %r, sys_argv=sys.argv[1:], **%r)') main_kws = {} + sys_argv = None if self._preload_modules: data = spawn.get_preparation_data('ignore') if 'sys_path' in data: @@ -173,7 +180,7 @@ def ensure_running(self): if 'init_main_from_path' in data: main_kws['main_path'] = data['init_main_from_path'] if 'sys_argv' in data: - main_kws['sys_argv'] = data['sys_argv'] + sys_argv = data['sys_argv'] if self._preload_on_error != 'ignore': main_kws['on_error'] = self._preload_on_error @@ -197,6 +204,8 @@ def ensure_running(self): exe = spawn.get_executable() args = [exe] + util._args_from_interpreter_flags() args += ['-c', cmd] + if sys_argv is not None: + args += sys_argv pid = util.spawnv_passfds(exe, args, fds_to_pass) except: os.close(alive_w) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 490c7ae5e8076c..69174cff699115 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -207,10 +207,38 @@ def spawn_check_wrapper(*args, **kwargs): return decorator +def only_run_in_forkserver_testsuite(reason): + """Returns a decorator: raises SkipTest unless fork is supported + and the current start method is forkserver. + + Combines @support.requires_fork() with the single-run semantics of + only_run_in_spawn_testsuite(), but uses the forkserver testsuite as + the single-run target. Appropriate for tests that exercise + os.fork() directly (raw fork or mp.set_start_method("fork") in a + subprocess) and don't vary by start method, since forkserver is + only available on platforms that support fork. + """ + + def decorator(test_item): + + @functools.wraps(test_item) + def forkserver_check_wrapper(*args, **kwargs): + if not support.has_fork_support: + raise unittest.SkipTest("requires working os.fork()") + if (start_method := multiprocessing.get_start_method()) != "forkserver": + raise unittest.SkipTest( + f"{start_method=}, not 'forkserver'; {reason}") + return test_item(*args, **kwargs) + + return forkserver_check_wrapper + + return decorator + + class TestInternalDecorators(unittest.TestCase): """Logic within a test suite that could errantly skip tests? Test it!""" - @unittest.skipIf(sys.platform == "win32", "test requires that fork exists.") + @support.requires_fork() def test_only_run_in_spawn_testsuite(self): if multiprocessing.get_start_method() != "spawn": raise unittest.SkipTest("only run in test_multiprocessing_spawn.") @@ -234,6 +262,30 @@ def return_four_if_spawn(): finally: multiprocessing.set_start_method(orig_start_method, force=True) + @support.requires_fork() + def test_only_run_in_forkserver_testsuite(self): + if multiprocessing.get_start_method() != "forkserver": + raise unittest.SkipTest("only run in test_multiprocessing_forkserver.") + + try: + @only_run_in_forkserver_testsuite("testing this decorator") + def return_four_if_forkserver(): + return 4 + except Exception as err: + self.fail(f"expected decorated `def` not to raise; caught {err}") + + orig_start_method = multiprocessing.get_start_method(allow_none=True) + try: + multiprocessing.set_start_method("forkserver", force=True) + self.assertEqual(return_four_if_forkserver(), 4) + multiprocessing.set_start_method("spawn", force=True) + with self.assertRaises(unittest.SkipTest) as ctx: + return_four_if_forkserver() + self.assertIn("testing this decorator", str(ctx.exception)) + self.assertIn("start_method=", str(ctx.exception)) + finally: + multiprocessing.set_start_method(orig_start_method, force=True) + # # Creates a wrapper for a function which records the time it takes to finish @@ -7133,6 +7185,23 @@ def test_preload_main_sys_argv(self): '', ]) + @only_run_in_forkserver_testsuite("forkserver specific test.") + def test_preload_main_large_sys_argv(self): + # gh-144503: a very large parent sys.argv must not prevent the + # forkserver from starting (it previously overflowed the OS + # per-argument length limit when repr'd into the -c command string). + name = os.path.join(os.path.dirname(__file__), + 'mp_preload_large_sysargv.py') + _, out, err = test.support.script_helper.assert_python_ok(name) + self.assertEqual(err, b'') + + out = out.decode().split("\n") + self.assertEqual(out, [ + 'preload:5002:sentinel', + 'worker:5002:sentinel', + '', + ]) + # # Mixins # diff --git a/Lib/test/mp_preload_large_sysargv.py b/Lib/test/mp_preload_large_sysargv.py new file mode 100644 index 00000000000000..790fcd76eadae6 --- /dev/null +++ b/Lib/test/mp_preload_large_sysargv.py @@ -0,0 +1,30 @@ +# gh-144503: Test that the forkserver can start when the parent process has +# a very large sys.argv. Prior to the fix, sys.argv was repr'd into the +# forkserver ``-c`` command string which could exceed the OS limit on the +# length of a single argv element (MAX_ARG_STRLEN on Linux, ~128 KiB), +# causing posix_spawn to fail and the parent to see a BrokenPipeError. + +import multiprocessing +import sys + +EXPECTED_LEN = 5002 # argv[0] + 5000 padding entries + sentinel + + +def fun(): + print(f"worker:{len(sys.argv)}:{sys.argv[-1]}") + + +if __name__ == "__main__": + # Inflate sys.argv well past 128 KiB before the forkserver is started. + sys.argv[1:] = ["x" * 50] * 5000 + ["sentinel"] + assert len(sys.argv) == EXPECTED_LEN + + ctx = multiprocessing.get_context("forkserver") + p = ctx.Process(target=fun) + p.start() + p.join() + sys.exit(p.exitcode) +else: + # This branch runs when the forkserver preloads this module as + # __mp_main__; confirm the large argv was propagated intact. + print(f"preload:{len(sys.argv)}:{sys.argv[-1]}") diff --git a/Misc/NEWS.d/next/Library/2026-04-07-01-04-00.gh-issue-144503.argvfs.rst b/Misc/NEWS.d/next/Library/2026-04-07-01-04-00.gh-issue-144503.argvfs.rst new file mode 100644 index 00000000000000..fc73d1902eae79 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-07-01-04-00.gh-issue-144503.argvfs.rst @@ -0,0 +1,6 @@ +Fix a regression introduced in 3.14.3 and 3.13.12 where the +:mod:`multiprocessing` ``forkserver`` start method would fail with +:exc:`BrokenPipeError` when the parent process had a very large +:data:`sys.argv`. The argv is now passed to the forkserver as separate +command-line arguments rather than being embedded in the ``-c`` command +string, avoiding the operating system's per-argument length limit.