From 1a85b019a49debe05c318972a7078a4bad2b5ca4 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 10 Apr 2026 16:41:00 +0800 Subject: [PATCH 1/6] Block inlining of gigantic functions --- Makefile.pre.in | 5 +++++ configure | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 29 +++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/Makefile.pre.in b/Makefile.pre.in index 354580aa482d25..8d325a4932509d 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -130,6 +130,8 @@ CONFIGURE_EXE_LDFLAGS=@EXE_LDFLAGS@ PY_CORE_EXE_LDFLAGS:= $(if $(CONFIGURE_EXE_LDFLAGS), $(CONFIGURE_EXE_LDFLAGS) $(PY_LDFLAGS_NODIST), $(PY_CORE_LDFLAGS)) # Strict or non-strict aliasing flags used to compile dtoa.c, see above CFLAGS_ALIASING=@CFLAGS_ALIASING@ +# Compilation flags only for ceval.c. +CFLAGS_CEVAL=@CFLAGS_CEVAL@ # Machine-dependent subdirectories @@ -3203,6 +3205,9 @@ regen-jit: Python/dtoa.o: Python/dtoa.c $(CC) -c $(PY_CORE_CFLAGS) $(CFLAGS_ALIASING) -o $@ $< +Python/ceval.o: Python/ceval.c + $(CC) -c $(PY_CORE_CFLAGS) $(CFLAGS_CEVAL) -o $@ $< + # Run reindent on the library .PHONY: reindent reindent: diff --git a/configure b/configure index 4726b4fe3102ac..10a2ef9e5370c1 100755 --- a/configure +++ b/configure @@ -829,6 +829,7 @@ OPENSSL_LDFLAGS OPENSSL_LIBS OPENSSL_INCLUDES ENSUREPIP +CFLAGS_CEVAL SRCDIRS THREADHEADERS PANEL_LIBS @@ -30358,6 +30359,53 @@ printf "%s\n" "#define HAVE_GLIBC_MEMMOVE_BUG 1" >>confdefs.h fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we need to manually block large inlining in ceval.c" >&5 +printf %s "checking if we need to manually block large inlining in ceval.c... " >&6; } +if test "$cross_compiling" = yes +then : + block_huge_inlining_in_ceval=undefined +else case e in #( + e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +void foo(void *p, void *q) { memmove(p, q, 19); } +int main(void) { +// See gh-148284: +// Clang 22 seems to have interactions with inlining and the stackref buffer +// which cause 40kB of stack usage on x86-64 in buggy versions of _PyEval_EvalFrameDefault +// in computed goto interpreter. The normal usage seen is normally 1-2kB. +#if defined(__clang__) && (__clang_major__ == 22) + return 1; +#else + return 0; +#endif +} + +_ACEOF +if ac_fn_c_try_run "$LINENO" +then : + block_huge_inlining_in_ceval=no +else case e in #( + e) block_huge_inlining_in_ceval=yes ;; +esac +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext ;; +esac +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $block_huge_inlining_in_ceval" >&5 +printf "%s\n" "$block_huge_inlining_in_ceval" >&6; } + +if test "$block_huge_inlining_in_ceval" = yes && test "$ac_cv_computed_gotos" = yes; then + // This number should be tuned to follow the C stack consumption + // in _PyEval_EvalFrameDefault on computed goto interpreter. + CFLAGS_CEVAL="-finline-max-stacksize=512" +else + CFLAGS_CEVAL="" +fi + + if test "$ac_cv_gcc_asm_for_x87" = yes; then # Some versions of gcc miscompile inline asm: # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46491 diff --git a/configure.ac b/configure.ac index dd860292cc2058..eb0f8f63338836 100644 --- a/configure.ac +++ b/configure.ac @@ -7371,6 +7371,35 @@ if test "$have_glibc_memmove_bug" = yes; then for memmove and bcopy.]) fi +AC_MSG_CHECKING([if we need to manually block large inlining in ceval.c]) +AC_RUN_IFELSE([AC_LANG_SOURCE([[ +void foo(void *p, void *q) { memmove(p, q, 19); } +int main(void) { +// See gh-148284: +// Clang 22 seems to have interactions with inlining and the stackref buffer +// which cause 40kB of stack usage on x86-64 in buggy versions of _PyEval_EvalFrameDefault +// in computed goto interpreter. The normal usage seen is normally 1-2kB. +#if defined(__clang__) && (__clang_major__ == 22) + return 1; +#else + return 0; +#endif +} +]])], +[block_huge_inlining_in_ceval=no], +[block_huge_inlining_in_ceval=yes], +[block_huge_inlining_in_ceval=undefined]) +AC_MSG_RESULT([$block_huge_inlining_in_ceval]) + +if test "$block_huge_inlining_in_ceval" = yes && test "$ac_cv_computed_gotos" = yes; then + // This number should be tuned to follow the C stack consumption + // in _PyEval_EvalFrameDefault on computed goto interpreter. + CFLAGS_CEVAL="-finline-max-stacksize=512" +else + CFLAGS_CEVAL="" +fi +AC_SUBST([CFLAGS_CEVAL]) + if test "$ac_cv_gcc_asm_for_x87" = yes; then # Some versions of gcc miscompile inline asm: # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46491 From 291f142f2d803a79efcabaa81c9ac9eed0f5d2ec Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 09:21:49 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst new file mode 100644 index 00000000000000..5453f15f67e3d0 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst @@ -0,0 +1 @@ +Fix high stack consumption in Python's interpreter loop on Clang 22 by setting function limits for inlining. From 4b79670357a52c360aabfc8f20c2174153f00b58 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 10 Apr 2026 17:30:40 +0800 Subject: [PATCH 3/6] remove unused function --- configure | 1 - configure.ac | 1 - 2 files changed, 2 deletions(-) diff --git a/configure b/configure index 10a2ef9e5370c1..c593d6cb3a411f 100755 --- a/configure +++ b/configure @@ -30368,7 +30368,6 @@ else case e in #( e) cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -void foo(void *p, void *q) { memmove(p, q, 19); } int main(void) { // See gh-148284: // Clang 22 seems to have interactions with inlining and the stackref buffer diff --git a/configure.ac b/configure.ac index eb0f8f63338836..ec8cd2a33c3627 100644 --- a/configure.ac +++ b/configure.ac @@ -7373,7 +7373,6 @@ fi AC_MSG_CHECKING([if we need to manually block large inlining in ceval.c]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ -void foo(void *p, void *q) { memmove(p, q, 19); } int main(void) { // See gh-148284: // Clang 22 seems to have interactions with inlining and the stackref buffer From a95c401df5b884633b9f1ebbf344cc1778b099fe Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 10 Apr 2026 22:14:28 +0800 Subject: [PATCH 4/6] Address Victor review Co-Authored-By: Victor Stinner --- Doc/using/configure.rst | 6 ++++++ configure | 18 +++++++++--------- configure.ac | 18 +++++++++--------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 82409ac0cdcfa6..bf25de7cc90c6c 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -1574,6 +1574,12 @@ Compiler flags .. versionadded:: 3.7 +.. envvar:: CFLAGS_CEVAL + + Flags used to compile ``Python/ceval.c``. + + .. versionadded:: 3.14.5 + .. envvar:: CCSHARED Compiler flags used to build a shared library. diff --git a/configure b/configure index c593d6cb3a411f..a599cd639eac6f 100755 --- a/configure +++ b/configure @@ -30369,10 +30369,10 @@ else case e in #( /* end confdefs.h. */ int main(void) { -// See gh-148284: -// Clang 22 seems to have interactions with inlining and the stackref buffer -// which cause 40kB of stack usage on x86-64 in buggy versions of _PyEval_EvalFrameDefault -// in computed goto interpreter. The normal usage seen is normally 1-2kB. +// See gh-148284: Clang 22 seems to have interactions with inlining +// and the stackref buffer which cause 40 kB of stack usage on x86-64 +// in buggy versions of _PyEval_EvalFrameDefault() in computed goto +// interpreter. The normal usage seen is normally 1-2 kB. #if defined(__clang__) && (__clang_major__ == 22) return 1; #else @@ -30397,11 +30397,11 @@ fi printf "%s\n" "$block_huge_inlining_in_ceval" >&6; } if test "$block_huge_inlining_in_ceval" = yes && test "$ac_cv_computed_gotos" = yes; then - // This number should be tuned to follow the C stack consumption - // in _PyEval_EvalFrameDefault on computed goto interpreter. - CFLAGS_CEVAL="-finline-max-stacksize=512" -else - CFLAGS_CEVAL="" + # gh-148284: Suppress inlining of functions whose stack size exceeds + # 512 bytes. This number should be tuned to follow the C stack + # consumption in _PyEval_EvalFrameDefault() on computed goto + # interpreter. + CFLAGS_CEVAL="$CFLAGS_CEVAL -finline-max-stacksize=512" fi diff --git a/configure.ac b/configure.ac index ec8cd2a33c3627..d7785c6f3e6e73 100644 --- a/configure.ac +++ b/configure.ac @@ -7374,10 +7374,10 @@ fi AC_MSG_CHECKING([if we need to manually block large inlining in ceval.c]) AC_RUN_IFELSE([AC_LANG_SOURCE([[ int main(void) { -// See gh-148284: -// Clang 22 seems to have interactions with inlining and the stackref buffer -// which cause 40kB of stack usage on x86-64 in buggy versions of _PyEval_EvalFrameDefault -// in computed goto interpreter. The normal usage seen is normally 1-2kB. +// See gh-148284: Clang 22 seems to have interactions with inlining +// and the stackref buffer which cause 40 kB of stack usage on x86-64 +// in buggy versions of _PyEval_EvalFrameDefault() in computed goto +// interpreter. The normal usage seen is normally 1-2 kB. #if defined(__clang__) && (__clang_major__ == 22) return 1; #else @@ -7391,11 +7391,11 @@ int main(void) { AC_MSG_RESULT([$block_huge_inlining_in_ceval]) if test "$block_huge_inlining_in_ceval" = yes && test "$ac_cv_computed_gotos" = yes; then - // This number should be tuned to follow the C stack consumption - // in _PyEval_EvalFrameDefault on computed goto interpreter. - CFLAGS_CEVAL="-finline-max-stacksize=512" -else - CFLAGS_CEVAL="" + # gh-148284: Suppress inlining of functions whose stack size exceeds + # 512 bytes. This number should be tuned to follow the C stack + # consumption in _PyEval_EvalFrameDefault() on computed goto + # interpreter. + CFLAGS_CEVAL="$CFLAGS_CEVAL -finline-max-stacksize=512" fi AC_SUBST([CFLAGS_CEVAL]) From 91352a9a1f28c2840e5d95b685347504f575d6a6 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 10 Apr 2026 22:16:06 +0800 Subject: [PATCH 5/6] Delete 2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst --- .../2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst deleted file mode 100644 index 5453f15f67e3d0..00000000000000 --- a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-09-21-40.gh-issue-148284.6xMH49.rst +++ /dev/null @@ -1 +0,0 @@ -Fix high stack consumption in Python's interpreter loop on Clang 22 by setting function limits for inlining. From 0ddb3ec5694fc5906f5b288fe076de1da8586745 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:21:00 +0000 Subject: [PATCH 6/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2026-04-10-14-20-54.gh-issue-148284.HKs-S_.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-14-20-54.gh-issue-148284.HKs-S_.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-14-20-54.gh-issue-148284.HKs-S_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-14-20-54.gh-issue-148284.HKs-S_.rst new file mode 100644 index 00000000000000..a74f6c1a61affd --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-04-10-14-20-54.gh-issue-148284.HKs-S_.rst @@ -0,0 +1 @@ +Fix high stack consumption in Python's interpreter loop on Clang 22 by setting function limits for inlining when building with computed gotos.