bpo-43760: Speed up check for tracing in interpreter dispatch (#25276)

* Remove redundant tracing_possible field from interpreter state.

* Move 'use_tracing' from tstate onto C stack, for fastest possible checking in dispatch logic.

* Add comments stressing the importance stack discipline when dealing with CFrames.

* Add NEWS
diff --git a/Python/pystate.c b/Python/pystate.c
index c8b2530..436f874 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -624,7 +624,8 @@ new_threadstate(PyInterpreterState *interp, int init)
     tstate->recursion_headroom = 0;
     tstate->stackcheck_counter = 0;
     tstate->tracing = 0;
-    tstate->use_tracing = 0;
+    tstate->root_cframe.use_tracing = 0;
+    tstate->cframe = &tstate->root_cframe;
     tstate->gilstate_counter = 0;
     tstate->async_exc = NULL;
     tstate->thread_id = PyThread_get_thread_ident();