From fae87b98fb42d7fcb8b490f17791c775cab80996 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Mar 2026 23:16:12 +0800 Subject: [PATCH 1/9] Verify that JIT stencils preserve frame pointer --- Tools/jit/_optimizers.py | 15 ++++++++++++++- Tools/jit/_targets.py | 17 ++++++++++------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 83c878d8fe205b..a1b8c9f2d6f4e4 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -162,6 +162,7 @@ class Optimizer: label_prefix: str symbol_prefix: str re_global: re.Pattern[str] + frame_pointers: bool # The first block in the linked list: _root: _Block = dataclasses.field(init=False, default_factory=_Block) _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) @@ -193,6 +194,7 @@ class Optimizer: _re_small_const_1 = _RE_NEVER_MATCH _re_small_const_2 = _RE_NEVER_MATCH const_reloc = "" + _frame_pointer_prologue: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH def __post_init__(self) -> None: # Split the code into a linked list of basic blocks. A basic block is an @@ -553,6 +555,14 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]: def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool: raise NotImplementedError() + def _validate(self): + for block in self._blocks(): + if not block.instructions: + continue + for inst in block.instructions: + if self.frame_pointers: + assert self._frame_pointer_prologue.match(inst.text) is None, "Frame pointer should not be modified" + def run(self) -> None: """Run this optimizer.""" self._insert_continue_label() @@ -565,6 +575,7 @@ def run(self) -> None: self._remove_unreachable() self._fixup_external_labels() self._fixup_constants() + self._validate() self.path.write_text(self._body()) @@ -595,6 +606,7 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods r"\s*(?Pldr)\s+.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" ) const_reloc = "CUSTOM_AARCH64_CONST" + _frame_pointer_prologue = re.compile(r"\s*stp\s+x29.*") def _get_reg(self, inst: Instruction) -> str: _, rest = inst.text.split(inst.name) @@ -649,4 +661,5 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods # https://www.felixcloutier.com/x86/jmp _re_jump = re.compile(r"\s*jmp\s+(?P[\w.]+)") # https://www.felixcloutier.com/x86/ret - _re_return = re.compile(r"\s*ret\b") + _re_return = re.compile(r"\s*retq?\b") + _frame_pointer_prologue = re.compile(r"\s*pushq\s+%rbp.*") diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index fa98dcb5a40851..76481495409d64 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -176,8 +176,9 @@ async def _compile( f"{s}", f"{c}", ] + is_shim = opname == "shim" if self.frame_pointers: - frame_pointer = "all" if opname == "shim" else "reserved" + frame_pointer = "all" if is_shim else "reserved" args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"] args_s += self.args # Allow user-provided CFLAGS to override any defaults @@ -185,12 +186,14 @@ async def _compile( await _llvm.run( "clang", args_s, echo=self.verbose, llvm_version=self.llvm_version ) - self.optimizer( - s, - label_prefix=self.label_prefix, - symbol_prefix=self.symbol_prefix, - re_global=self.re_global, - ).run() + if not is_shim: + self.optimizer( + s, + label_prefix=self.label_prefix, + symbol_prefix=self.symbol_prefix, + re_global=self.re_global, + frame_pointers=self.frame_pointers + ).run() args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] await _llvm.run( "clang", args_o, echo=self.verbose, llvm_version=self.llvm_version From 776e109d0b43ecdabdf83b8f70641aa5e5f565e8 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Mar 2026 23:22:47 +0800 Subject: [PATCH 2/9] Update _optimizers.py --- Tools/jit/_optimizers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index a1b8c9f2d6f4e4..0923f7c69f60b0 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -194,7 +194,7 @@ class Optimizer: _re_small_const_1 = _RE_NEVER_MATCH _re_small_const_2 = _RE_NEVER_MATCH const_reloc = "" - _frame_pointer_prologue: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH + _frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH def __post_init__(self) -> None: # Split the code into a linked list of basic blocks. A basic block is an @@ -561,7 +561,7 @@ def _validate(self): continue for inst in block.instructions: if self.frame_pointers: - assert self._frame_pointer_prologue.match(inst.text) is None, "Frame pointer should not be modified" + assert self._frame_pointer_modify.match(inst.text) is None, "Frame pointer should not be modified" def run(self) -> None: """Run this optimizer.""" @@ -606,7 +606,7 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods r"\s*(?Pldr)\s+.*(?P_JIT_OP(ARG|ERAND(0|1))_(16|32)).*" ) const_reloc = "CUSTOM_AARCH64_CONST" - _frame_pointer_prologue = re.compile(r"\s*stp\s+x29.*") + _frame_pointer_modify = re.compile(r"\s*stp\s+x29.*") def _get_reg(self, inst: Instruction) -> str: _, rest = inst.text.split(inst.name) @@ -662,4 +662,4 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods _re_jump = re.compile(r"\s*jmp\s+(?P[\w.]+)") # https://www.felixcloutier.com/x86/ret _re_return = re.compile(r"\s*retq?\b") - _frame_pointer_prologue = re.compile(r"\s*pushq\s+%rbp.*") + _frame_pointer_modify = re.compile(r"\s*movq?\s+%(\w+),\s+%rbp.*") From 8ad1774380c52304d7911ca20c40dad36ed7d171 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Mar 2026 23:24:40 +0800 Subject: [PATCH 3/9] Fix lint --- Tools/jit/_optimizers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py index 0923f7c69f60b0..ef28e0c0ddeac8 100644 --- a/Tools/jit/_optimizers.py +++ b/Tools/jit/_optimizers.py @@ -555,13 +555,15 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]: def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool: raise NotImplementedError() - def _validate(self): + def _validate(self) -> None: for block in self._blocks(): if not block.instructions: continue for inst in block.instructions: if self.frame_pointers: - assert self._frame_pointer_modify.match(inst.text) is None, "Frame pointer should not be modified" + assert ( + self._frame_pointer_modify.match(inst.text) is None + ), "Frame pointer should not be modified" def run(self) -> None: """Run this optimizer.""" From 8c9fb56c7d3f67fa269602ad4cc56a193b12ea10 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Mar 2026 23:26:37 +0800 Subject: [PATCH 4/9] more linting --- Tools/jit/_targets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 76481495409d64..5b35201bff27b4 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -192,7 +192,7 @@ async def _compile( label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix, re_global=self.re_global, - frame_pointers=self.frame_pointers + frame_pointers=self.frame_pointers, ).run() args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] await _llvm.run( From bde6c5bbf46134c75638ded1ab57e544a0178a68 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 28 Mar 2026 02:47:17 +0800 Subject: [PATCH 5/9] Try a fix --- Include/internal/pycore_pystate.h | 6 ------ Python/pystate.c | 12 +++++------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index a66543cf1eb164..464e44eaf40a3e 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -306,12 +306,6 @@ _Py_AssertHoldsTstateFunc(const char *func) #define _Py_AssertHoldsTstate() #endif -#if !_Py__has_builtin(__builtin_frame_address) && !defined(__GNUC__) && !defined(_MSC_VER) -static uintptr_t return_pointer_as_int(char* p) { - return (uintptr_t)p; -} -#endif - PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void); static inline intptr_t diff --git a/Python/pystate.c b/Python/pystate.c index f974c82c391f6a..18fbad35220bf7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3287,15 +3287,13 @@ _Py_GetMainConfig(void) return _PyInterpreterState_GetConfig(interp); } -uintptr_t +static uintptr_t return_pointer_as_int(char* p) { + return (uintptr_t)p; +} + +Py_NO_INLINE uintptr_t _Py_get_machine_stack_pointer(void) { -#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) - return (uintptr_t)__builtin_frame_address(0); -#elif defined(_MSC_VER) - return (uintptr_t)_AddressOfReturnAddress(); -#else char here; /* Avoid compiler warning about returning stack address */ return return_pointer_as_int(&here); -#endif } From 3e1199e6554dacac648f72e4a039a66f283b9a8c Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 28 Mar 2026 02:47:44 +0800 Subject: [PATCH 6/9] PEP 7 --- Python/pystate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 18fbad35220bf7..c7bfa2ebe04c1e 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3287,7 +3287,8 @@ _Py_GetMainConfig(void) return _PyInterpreterState_GetConfig(interp); } -static uintptr_t return_pointer_as_int(char* p) { +static uintptr_t +return_pointer_as_int(char* p) { return (uintptr_t)p; } From 743cc6a0dec2782c0df87fcee8e3b90c5b2e60a0 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 28 Mar 2026 03:04:34 +0800 Subject: [PATCH 7/9] Revert "PEP 7" This reverts commit 3e1199e6554dacac648f72e4a039a66f283b9a8c. --- Python/pystate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/pystate.c b/Python/pystate.c index c7bfa2ebe04c1e..18fbad35220bf7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3287,8 +3287,7 @@ _Py_GetMainConfig(void) return _PyInterpreterState_GetConfig(interp); } -static uintptr_t -return_pointer_as_int(char* p) { +static uintptr_t return_pointer_as_int(char* p) { return (uintptr_t)p; } From d7a2d30b8225632df23d220e61bd2ede5efc6233 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 28 Mar 2026 03:04:38 +0800 Subject: [PATCH 8/9] Revert "Try a fix" This reverts commit bde6c5bbf46134c75638ded1ab57e544a0178a68. --- Include/internal/pycore_pystate.h | 6 ++++++ Python/pystate.c | 12 +++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 464e44eaf40a3e..a66543cf1eb164 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -306,6 +306,12 @@ _Py_AssertHoldsTstateFunc(const char *func) #define _Py_AssertHoldsTstate() #endif +#if !_Py__has_builtin(__builtin_frame_address) && !defined(__GNUC__) && !defined(_MSC_VER) +static uintptr_t return_pointer_as_int(char* p) { + return (uintptr_t)p; +} +#endif + PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void); static inline intptr_t diff --git a/Python/pystate.c b/Python/pystate.c index 18fbad35220bf7..f974c82c391f6a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3287,13 +3287,15 @@ _Py_GetMainConfig(void) return _PyInterpreterState_GetConfig(interp); } -static uintptr_t return_pointer_as_int(char* p) { - return (uintptr_t)p; -} - -Py_NO_INLINE uintptr_t +uintptr_t _Py_get_machine_stack_pointer(void) { +#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) + return (uintptr_t)__builtin_frame_address(0); +#elif defined(_MSC_VER) + return (uintptr_t)_AddressOfReturnAddress(); +#else char here; /* Avoid compiler warning about returning stack address */ return return_pointer_as_int(&here); +#endif } From 108ea06cba87dec504e5d72142d07d58114a5ad3 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Sat, 28 Mar 2026 03:07:50 +0800 Subject: [PATCH 9/9] Apply Brandt's fix --- Include/internal/pycore_ceval.h | 11 +---------- Include/internal/pycore_pystate.h | 13 ++++++++++++- Python/ceval.c | 13 +++++++++++++ Python/pystate.c | 13 ------------- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 16913289a02f59..2c83101b6b26fe 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -249,16 +249,7 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) { PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate); -static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { - uintptr_t here_addr = _Py_get_machine_stack_pointer(); - _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - assert(_tstate->c_stack_hard_limit != 0); -#if _Py_STACK_GROWS_DOWN - return here_addr <= _tstate->c_stack_soft_limit; -#else - return here_addr >= _tstate->c_stack_soft_limit; -#endif -} +PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate); // Export for test_peg_generator PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin( diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index a66543cf1eb164..189a8dde9f09ed 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -312,7 +312,18 @@ static uintptr_t return_pointer_as_int(char* p) { } #endif -PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void); +static inline uintptr_t +_Py_get_machine_stack_pointer(void) { +#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) + return (uintptr_t)__builtin_frame_address(0); +#elif defined(_MSC_VER) + return (uintptr_t)_AddressOfReturnAddress(); +#else + char here; + /* Avoid compiler warning about returning stack address */ + return return_pointer_as_int(&here); +#endif +} static inline intptr_t _Py_RecursionLimit_GetMargin(PyThreadState *tstate) diff --git a/Python/ceval.c b/Python/ceval.c index b4c57b65d13d18..f95900ae01a6af 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1201,6 +1201,19 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from return PyStackRef_FromPyObjectSteal(iter_o); } +Py_NO_INLINE int +_Py_ReachedRecursionLimit(PyThreadState *tstate) { + uintptr_t here_addr = _Py_get_machine_stack_pointer(); + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + assert(_tstate->c_stack_hard_limit != 0); +#if _Py_STACK_GROWS_DOWN + return here_addr <= _tstate->c_stack_soft_limit; +#else + return here_addr >= _tstate->c_stack_soft_limit; +#endif +} + + #if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__) /* * gh-129987: The SLP autovectorizer can cause poor code generation for diff --git a/Python/pystate.c b/Python/pystate.c index f974c82c391f6a..143175da0f45c7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3286,16 +3286,3 @@ _Py_GetMainConfig(void) } return _PyInterpreterState_GetConfig(interp); } - -uintptr_t -_Py_get_machine_stack_pointer(void) { -#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) - return (uintptr_t)__builtin_frame_address(0); -#elif defined(_MSC_VER) - return (uintptr_t)_AddressOfReturnAddress(); -#else - char here; - /* Avoid compiler warning about returning stack address */ - return return_pointer_as_int(&here); -#endif -}