Speed up Python (according to pybench and 2to3-on-itself) by 1-2% by caching
whether any thread has tracing turned on, which saves one load instruction in
the fast_next_opcode path in PyEval_EvalFrameEx(). See issue 4477.
diff --git a/Python/ceval.c b/Python/ceval.c
index 6eef7ef..cdcf9f6 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -504,6 +504,13 @@
static enum why_code do_raise(PyObject *, PyObject *, PyObject *);
static int unpack_iterable(PyObject *, int, PyObject **);
+/* Records whether tracing is on for any thread. Counts the number of
+ threads for which tstate->c_tracefunc is non-NULL, so if the value
+ is 0, we know we don't have to check this thread's c_tracefunc.
+ This speeds up the if statement in PyEval_EvalFrameEx() after
+ fast_next_opcode*/
+static int _Py_TracingPossible = 0;
+
/* for manipulating the thread switch and periodic "stuff" - used to be
per thread, now just a pair o' globals */
int _Py_CheckInterval = 100;
@@ -886,7 +893,8 @@
/* line-by-line tracing support */
- if (tstate->c_tracefunc != NULL && !tstate->tracing) {
+ if (_Py_TracingPossible &&
+ tstate->c_tracefunc != NULL && !tstate->tracing) {
/* see maybe_call_line_trace
for expository comments */
f->f_stacktop = stack_pointer;
@@ -3414,6 +3422,7 @@
{
PyThreadState *tstate = PyThreadState_GET();
PyObject *temp = tstate->c_traceobj;
+ _Py_TracingPossible += (func != NULL) - (tstate->c_tracefunc != NULL);
Py_XINCREF(arg);
tstate->c_tracefunc = NULL;
tstate->c_traceobj = NULL;