Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 1 addition & 10 deletions Include/internal/pycore_ceval.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,16 +249,7 @@ static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) {

PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate);

static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit;
#else
return here_addr >= _tstate->c_stack_soft_limit;
#endif
}
PyAPI_FUNC(int) _Py_ReachedRecursionLimit(PyThreadState *tstate);

// Export for test_peg_generator
PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(
Expand Down
13 changes: 12 additions & 1 deletion Include/internal/pycore_pystate.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,18 @@ static uintptr_t return_pointer_as_int(char* p) {
}
#endif

PyAPI_DATA(uintptr_t) _Py_get_machine_stack_pointer(void);
static inline uintptr_t
_Py_get_machine_stack_pointer(void) {
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
return (uintptr_t)__builtin_frame_address(0);
#elif defined(_MSC_VER)
return (uintptr_t)_AddressOfReturnAddress();
#else
char here;
/* Avoid compiler warning about returning stack address */
return return_pointer_as_int(&here);
#endif
}

static inline intptr_t
_Py_RecursionLimit_GetMargin(PyThreadState *tstate)
Expand Down
13 changes: 13 additions & 0 deletions Python/ceval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,19 @@ _PyEval_GetIter(_PyStackRef iterable, _PyStackRef *index_or_null, int yield_from
return PyStackRef_FromPyObjectSteal(iter_o);
}

Py_NO_INLINE int
_Py_ReachedRecursionLimit(PyThreadState *tstate) {
uintptr_t here_addr = _Py_get_machine_stack_pointer();
_PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate;
assert(_tstate->c_stack_hard_limit != 0);
#if _Py_STACK_GROWS_DOWN
return here_addr <= _tstate->c_stack_soft_limit;
#else
return here_addr >= _tstate->c_stack_soft_limit;
#endif
}


#if (defined(__GNUC__) && __GNUC__ >= 10 && !defined(__clang__)) && defined(__x86_64__)
/*
* gh-129987: The SLP autovectorizer can cause poor code generation for
Expand Down
13 changes: 0 additions & 13 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -3286,16 +3286,3 @@ _Py_GetMainConfig(void)
}
return _PyInterpreterState_GetConfig(interp);
}

uintptr_t
_Py_get_machine_stack_pointer(void) {
#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__)
return (uintptr_t)__builtin_frame_address(0);
#elif defined(_MSC_VER)
return (uintptr_t)_AddressOfReturnAddress();
#else
char here;
/* Avoid compiler warning about returning stack address */
return return_pointer_as_int(&here);
#endif
}
17 changes: 16 additions & 1 deletion Tools/jit/_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class Optimizer:
label_prefix: str
symbol_prefix: str
re_global: re.Pattern[str]
frame_pointers: bool
# The first block in the linked list:
_root: _Block = dataclasses.field(init=False, default_factory=_Block)
_labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict)
Expand Down Expand Up @@ -193,6 +194,7 @@ class Optimizer:
_re_small_const_1 = _RE_NEVER_MATCH
_re_small_const_2 = _RE_NEVER_MATCH
const_reloc = "<Not supported>"
_frame_pointer_modify: typing.ClassVar[re.Pattern[str]] = _RE_NEVER_MATCH

def __post_init__(self) -> None:
# Split the code into a linked list of basic blocks. A basic block is an
Expand Down Expand Up @@ -553,6 +555,16 @@ def _small_const_2(self, inst: Instruction) -> tuple[str, Instruction | None]:
def _small_consts_match(self, inst1: Instruction, inst2: Instruction) -> bool:
raise NotImplementedError()

def _validate(self) -> None:
for block in self._blocks():
if not block.instructions:
continue
for inst in block.instructions:
if self.frame_pointers:
assert (
self._frame_pointer_modify.match(inst.text) is None
), "Frame pointer should not be modified"

def run(self) -> None:
"""Run this optimizer."""
self._insert_continue_label()
Expand All @@ -565,6 +577,7 @@ def run(self) -> None:
self._remove_unreachable()
self._fixup_external_labels()
self._fixup_constants()
self._validate()
self.path.write_text(self._body())


Expand Down Expand Up @@ -595,6 +608,7 @@ class OptimizerAArch64(Optimizer): # pylint: disable = too-few-public-methods
r"\s*(?P<instruction>ldr)\s+.*(?P<value>_JIT_OP(ARG|ERAND(0|1))_(16|32)).*"
)
const_reloc = "CUSTOM_AARCH64_CONST"
_frame_pointer_modify = re.compile(r"\s*stp\s+x29.*")

def _get_reg(self, inst: Instruction) -> str:
_, rest = inst.text.split(inst.name)
Expand Down Expand Up @@ -649,4 +663,5 @@ class OptimizerX86(Optimizer): # pylint: disable = too-few-public-methods
# https://www.felixcloutier.com/x86/jmp
_re_jump = re.compile(r"\s*jmp\s+(?P<target>[\w.]+)")
# https://www.felixcloutier.com/x86/ret
_re_return = re.compile(r"\s*ret\b")
_re_return = re.compile(r"\s*retq?\b")
_frame_pointer_modify = re.compile(r"\s*movq?\s+%(\w+),\s+%rbp.*")
17 changes: 10 additions & 7 deletions Tools/jit/_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,21 +176,24 @@ async def _compile(
f"{s}",
f"{c}",
]
is_shim = opname == "shim"
if self.frame_pointers:
frame_pointer = "all" if opname == "shim" else "reserved"
frame_pointer = "all" if is_shim else "reserved"
args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"]
args_s += self.args
# Allow user-provided CFLAGS to override any defaults
args_s += shlex.split(self.cflags)
await _llvm.run(
"clang", args_s, echo=self.verbose, llvm_version=self.llvm_version
)
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
).run()
if not is_shim:
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
frame_pointers=self.frame_pointers,
).run()
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
await _llvm.run(
"clang", args_o, echo=self.verbose, llvm_version=self.llvm_version
Expand Down
Loading