bpo-43760: Speed up check for tracing in interpreter dispatch (#25276)
* Remove redundant tracing_possible field from interpreter state.
* Move 'use_tracing' from tstate onto C stack, for fastest possible checking in dispatch logic.
* Add comments stressing the importance stack discipline when dealing with CFrames.
* Add NEWS
diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index cfaee89..e3ccc54 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -29,6 +29,21 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *);
#define PyTrace_OPCODE 7
+typedef struct _cframe {
+ /* This struct will be threaded through the C stack
+ * allowing fast access to per-thread state that needs
+ * to be accessed quickly by the interpreter, but can
+ * be modified outside of the interpreter.
+ *
+ * WARNING: This makes data on the C stack accessible from
+ * heap objects. Care must be taken to maintain stack
+ * discipline and make sure that instances of this struct cannot
+ * accessed outside of their lifetime.
+ */
+ int use_tracing;
+ struct _cframe *previous;
+} CFrame;
+
typedef struct _err_stackitem {
/* This struct represents an entry on the exception stack, which is a
* per-coroutine state. (Coroutine in the computer science sense,
@@ -61,7 +76,10 @@ struct _ts {
This is to prevent the actual trace/profile code from being recorded in
the trace/profile. */
int tracing;
- int use_tracing;
+
+ /* Pointer to current CFrame in the C stack frame of the currently,
+ * or most recently, executing _PyEval_EvalFrameDefault. */
+ CFrame *cframe;
Py_tracefunc c_profilefunc;
Py_tracefunc c_tracefunc;
@@ -129,6 +147,8 @@ struct _ts {
/* Unique thread state id. */
uint64_t id;
+ CFrame root_cframe;
+
/* XXX signal handlers should also be here */
};