Skip to content

Commit c6944e7

Browse files
committed
Issue python#28618: Make hot functions using __attribute__((hot))
When Python is not compiled with PGO, the performance of Python on call_simple and call_method microbenchmarks depend highly on the code placement. In the worst case, the performance slowdown can be up to 70%. The GCC __attribute__((hot)) attribute helps to keep hot code close to reduce the risk of such major slowdown. This attribute is ignored when Python is compiled with PGO. The following functions are considered as hot according to statistics collected by perf record/perf report: * _PyEval_EvalFrameDefault() * call_function() * _PyFunction_FastCall() * PyFrame_New() * frame_dealloc() * PyErr_Occurred()
1 parent 0cae609 commit c6944e7

File tree

4 files changed

+31
-8
lines changed

4 files changed

+31
-8
lines changed

Include/pyport.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -490,13 +490,36 @@ extern "C" {
490490
* typedef int T1 Py_DEPRECATED(2.4);
491491
* extern int x() Py_DEPRECATED(2.5);
492492
*/
493-
#if defined(__GNUC__) && ((__GNUC__ >= 4) || \
494-
(__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
493+
#if defined(__GNUC__) \
494+
&& ((__GNUC__ >= 4) || (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
495495
#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__))
496496
#else
497497
#define Py_DEPRECATED(VERSION_UNUSED)
498498
#endif
499499

500+
501+
/* Py_HOT_FUNCTION
502+
* The hot attribute on a function is used to inform the compiler that the
503+
* function is a hot spot of the compiled program. The function is optimized
504+
* more aggressively and on many target it is placed into special subsection of
505+
* the text section so all hot functions appears close together improving
506+
* locality.
507+
*
508+
* Usage:
509+
* int Py_HOT_FUNCTION x() { return 3; }
510+
*
511+
* Issue #28618: This attribute must not be abused, otherwise it can have a
512+
* negative effect on performance. Only the functions were Python spend most of
513+
* its time must use it. Use a profiler when running performance benchmark
514+
* suite to find these functions.
515+
*/
516+
#if defined(__GNUC__) \
517+
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))
518+
#define _Py_HOT_FUNCTION __attribute__((hot))
519+
#else
520+
#define _Py_HOT_FUNCTION
521+
#endif
522+
500523
/**************************************************************************
501524
Prototypes that are missing from the standard include files on some systems
502525
(and possibly only some versions of such systems.)

Objects/frameobject.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ static int numfree = 0; /* number of frames currently in free_list */
409409
/* max value for numfree */
410410
#define PyFrame_MAXFREELIST 200
411411

412-
static void
412+
static void _Py_HOT_FUNCTION
413413
frame_dealloc(PyFrameObject *f)
414414
{
415415
PyObject **p, **valuestack;
@@ -605,7 +605,7 @@ int _PyFrame_Init()
605605
return 1;
606606
}
607607

608-
PyFrameObject *
608+
PyFrameObject* _Py_HOT_FUNCTION
609609
PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals,
610610
PyObject *locals)
611611
{

Python/ceval.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
718718
return tstate->interp->eval_frame(f, throwflag);
719719
}
720720

721-
PyObject *
721+
PyObject* _Py_HOT_FUNCTION
722722
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
723723
{
724724
#ifdef DXPAIRS
@@ -4771,7 +4771,7 @@ if (tstate->use_tracing && tstate->c_profilefunc) { \
47714771
x = call; \
47724772
}
47734773

4774-
static PyObject *
4774+
static PyObject* _Py_HOT_FUNCTION
47754775
call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
47764776
{
47774777
PyObject **pfunc = (*pp_stack) - oparg - 1;
@@ -4844,7 +4844,7 @@ call_function(PyObject ***pp_stack, Py_ssize_t oparg, PyObject *kwnames)
48444844
done before evaluating the frame.
48454845
*/
48464846

4847-
static PyObject*
4847+
static PyObject* _Py_HOT_FUNCTION
48484848
_PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs,
48494849
PyObject *globals)
48504850
{

Python/errors.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ PyErr_SetString(PyObject *exception, const char *string)
158158
}
159159

160160

161-
PyObject *
161+
PyObject* _Py_HOT_FUNCTION
162162
PyErr_Occurred(void)
163163
{
164164
PyThreadState *tstate = PyThreadState_GET();

0 commit comments

Comments
 (0)