bpo-30860: Consolidate stateful runtime globals. (#3397)

* group the (stateful) runtime globals into various topical structs
* consolidate the topical structs under a single top-level _PyRuntimeState struct
* add a check-c-globals.py script that helps identify runtime globals

Other globals are excluded (see globals.txt and check-c-globals.py).
diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c
index 4e5acf3..832e27e 100644
--- a/Modules/gcmodule.c
+++ b/Modules/gcmodule.c
@@ -24,6 +24,8 @@
 */
 
 #include "Python.h"
+#include "internal/mem.h"
+#include "internal/pystate.h"
 #include "frameobject.h"        /* for PyFrame_ClearFreeList */
 #include "pydtrace.h"
 #include "pytime.h"             /* for _PyTime_GetMonotonicClock() */
@@ -39,133 +41,9 @@
 /* Get the object given the GC head */
 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
 
-/*** Global GC state ***/
-
-struct gc_generation {
-    PyGC_Head head;
-    int threshold; /* collection threshold */
-    int count; /* count of allocations or collections of younger
-                  generations */
-};
-
-/* If we change this, we need to change the default value in the signature of
-   gc.collect. */
-#define NUM_GENERATIONS 3
-#define GEN_HEAD(n) (&generations[n].head)
-
-/* linked lists of container objects */
-static struct gc_generation generations[NUM_GENERATIONS] = {
-    /* PyGC_Head,                               threshold,      count */
-    {{{GEN_HEAD(0), GEN_HEAD(0), 0}},           700,            0},
-    {{{GEN_HEAD(1), GEN_HEAD(1), 0}},           10,             0},
-    {{{GEN_HEAD(2), GEN_HEAD(2), 0}},           10,             0},
-};
-
-PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
-
-static int enabled = 1; /* automatic collection enabled? */
-
-/* true if we are currently running the collector */
-static int collecting = 0;
-
-/* list of uncollectable objects */
-static PyObject *garbage = NULL;
-
 /* Python string to use if unhandled exception occurs */
 static PyObject *gc_str = NULL;
 
-/* a list of callbacks to be invoked when collection is performed */
-static PyObject *callbacks = NULL;
-
-/* This is the number of objects that survived the last full collection. It
-   approximates the number of long lived objects tracked by the GC.
-
-   (by "full collection", we mean a collection of the oldest generation).
-*/
-static Py_ssize_t long_lived_total = 0;
-
-/* This is the number of objects that survived all "non-full" collections,
-   and are awaiting to undergo a full collection for the first time.
-
-*/
-static Py_ssize_t long_lived_pending = 0;
-
-/*
-   NOTE: about the counting of long-lived objects.
-
-   To limit the cost of garbage collection, there are two strategies;
-     - make each collection faster, e.g. by scanning fewer objects
-     - do less collections
-   This heuristic is about the latter strategy.
-
-   In addition to the various configurable thresholds, we only trigger a
-   full collection if the ratio
-    long_lived_pending / long_lived_total
-   is above a given value (hardwired to 25%).
-
-   The reason is that, while "non-full" collections (i.e., collections of
-   the young and middle generations) will always examine roughly the same
-   number of objects -- determined by the aforementioned thresholds --,
-   the cost of a full collection is proportional to the total number of
-   long-lived objects, which is virtually unbounded.
-
-   Indeed, it has been remarked that doing a full collection every
-   <constant number> of object creations entails a dramatic performance
-   degradation in workloads which consist in creating and storing lots of
-   long-lived objects (e.g. building a large list of GC-tracked objects would
-   show quadratic performance, instead of linear as expected: see issue #4074).
-
-   Using the above ratio, instead, yields amortized linear performance in
-   the total number of objects (the effect of which can be summarized
-   thusly: "each full garbage collection is more and more costly as the
-   number of objects grows, but we do fewer and fewer of them").
-
-   This heuristic was suggested by Martin von Löwis on python-dev in
-   June 2008. His original analysis and proposal can be found at:
-    http://mail.python.org/pipermail/python-dev/2008-June/080579.html
-*/
-
-/*
-   NOTE: about untracking of mutable objects.
-
-   Certain types of container cannot participate in a reference cycle, and
-   so do not need to be tracked by the garbage collector. Untracking these
-   objects reduces the cost of garbage collections. However, determining
-   which objects may be untracked is not free, and the costs must be
-   weighed against the benefits for garbage collection.
-
-   There are two possible strategies for when to untrack a container:
-
-   i) When the container is created.
-   ii) When the container is examined by the garbage collector.
-
-   Tuples containing only immutable objects (integers, strings etc, and
-   recursively, tuples of immutable objects) do not need to be tracked.
-   The interpreter creates a large number of tuples, many of which will
-   not survive until garbage collection. It is therefore not worthwhile
-   to untrack eligible tuples at creation time.
-
-   Instead, all tuples except the empty tuple are tracked when created.
-   During garbage collection it is determined whether any surviving tuples
-   can be untracked. A tuple can be untracked if all of its contents are
-   already not tracked. Tuples are examined for untracking in all garbage
-   collection cycles. It may take more than one cycle to untrack a tuple.
-
-   Dictionaries containing only immutable objects also do not need to be
-   tracked. Dictionaries are untracked when created. If a tracked item is
-   inserted into a dictionary (either as a key or value), the dictionary
-   becomes tracked. During a full garbage collection (all generations),
-   the collector will untrack any dictionaries whose contents are not
-   tracked.
-
-   The module provides the python function is_tracked(obj), which returns
-   the CURRENT tracking status of the object. Subsequent garbage
-   collections may change the tracking status of the object.
-
-   Untracking of certain containers was introduced in issue #4688, and
-   the algorithm was refined in response to issue #14775.
-*/
-
 /* set for debugging information */
 #define DEBUG_STATS             (1<<0) /* print collection statistics */
 #define DEBUG_COLLECTABLE       (1<<1) /* print collectable objects */
@@ -174,19 +52,26 @@
 #define DEBUG_LEAK              DEBUG_COLLECTABLE | \
                 DEBUG_UNCOLLECTABLE | \
                 DEBUG_SAVEALL
-static int debug;
 
-/* Running stats per generation */
-struct gc_generation_stats {
-    /* total number of collections */
-    Py_ssize_t collections;
-    /* total number of collected objects */
-    Py_ssize_t collected;
-    /* total number of uncollectable objects (put into gc.garbage) */
-    Py_ssize_t uncollectable;
-};
+#define GEN_HEAD(n) (&_PyRuntime.gc.generations[n].head)
 
-static struct gc_generation_stats generation_stats[NUM_GENERATIONS];
+void
+_PyGC_Initialize(struct _gc_runtime_state *state)
+{
+    state->enabled = 1; /* automatic collection enabled? */
+
+#define _GEN_HEAD(n) (&state->generations[n].head)
+    struct gc_generation generations[NUM_GENERATIONS] = {
+        /* PyGC_Head,                                 threshold,      count */
+        {{{_GEN_HEAD(0), _GEN_HEAD(0), 0}},           700,            0},
+        {{{_GEN_HEAD(1), _GEN_HEAD(1), 0}},           10,             0},
+        {{{_GEN_HEAD(2), _GEN_HEAD(2), 0}},           10,             0},
+    };
+    for (int i = 0; i < NUM_GENERATIONS; i++) {
+        state->generations[i] = generations[i];
+    };
+    state->generation0 = GEN_HEAD(0);
+}
 
 /*--------------------------------------------------------------------------
 gc_refs values.
@@ -766,16 +651,16 @@
 {
     PyGC_Head *gc = finalizers->gc.gc_next;
 
-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
             Py_FatalError("gc couldn't create gc.garbage list");
     }
     for (; gc != finalizers; gc = gc->gc.gc_next) {
         PyObject *op = FROM_GC(gc);
 
-        if ((debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
-            if (PyList_Append(garbage, op) < 0)
+        if ((_PyRuntime.gc.debug & DEBUG_SAVEALL) || has_legacy_finalizer(op)) {
+            if (PyList_Append(_PyRuntime.gc.garbage, op) < 0)
                 return -1;
         }
     }
@@ -865,8 +750,8 @@
         PyGC_Head *gc = collectable->gc.gc_next;
         PyObject *op = FROM_GC(gc);
 
-        if (debug & DEBUG_SAVEALL) {
-            PyList_Append(garbage, op);
+        if (_PyRuntime.gc.debug & DEBUG_SAVEALL) {
+            PyList_Append(_PyRuntime.gc.garbage, op);
         }
         else {
             if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
@@ -919,9 +804,9 @@
     PyGC_Head *gc;
     _PyTime_t t1 = 0;   /* initialize to prevent a compiler warning */
 
-    struct gc_generation_stats *stats = &generation_stats[generation];
+    struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];
 
-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
         PySys_WriteStderr("gc: collecting generation %d...\n",
                           generation);
         PySys_WriteStderr("gc: objects in each generation:");
@@ -938,9 +823,9 @@
 
     /* update collection and allocation counters */
     if (generation+1 < NUM_GENERATIONS)
-        generations[generation+1].count += 1;
+        _PyRuntime.gc.generations[generation+1].count += 1;
     for (i = 0; i <= generation; i++)
-        generations[i].count = 0;
+        _PyRuntime.gc.generations[i].count = 0;
 
     /* merge younger generations with one we are currently collecting */
     for (i = 0; i < generation; i++) {
@@ -974,7 +859,7 @@
     /* Move reachable objects to next generation. */
     if (young != old) {
         if (generation == NUM_GENERATIONS - 2) {
-            long_lived_pending += gc_list_size(young);
+            _PyRuntime.gc.long_lived_pending += gc_list_size(young);
         }
         gc_list_merge(young, old);
     }
@@ -982,8 +867,8 @@
         /* We only untrack dicts in full collections, to avoid quadratic
            dict build-up. See issue #14775. */
         untrack_dicts(young);
-        long_lived_pending = 0;
-        long_lived_total = gc_list_size(young);
+        _PyRuntime.gc.long_lived_pending = 0;
+        _PyRuntime.gc.long_lived_total = gc_list_size(young);
     }
 
     /* All objects in unreachable are trash, but objects reachable from
@@ -1003,7 +888,7 @@
     for (gc = unreachable.gc.gc_next; gc != &unreachable;
                     gc = gc->gc.gc_next) {
         m++;
-        if (debug & DEBUG_COLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
             debug_cycle("collectable", FROM_GC(gc));
         }
     }
@@ -1032,10 +917,10 @@
          gc != &finalizers;
          gc = gc->gc.gc_next) {
         n++;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
             debug_cycle("uncollectable", FROM_GC(gc));
     }
-    if (debug & DEBUG_STATS) {
+    if (_PyRuntime.gc.debug & DEBUG_STATS) {
         _PyTime_t t2 = _PyTime_GetMonotonicClock();
 
         if (m == 0 && n == 0)
@@ -1098,11 +983,11 @@
     PyObject *info = NULL;
 
     /* we may get called very early */
-    if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL)
         return;
     /* The local variable cannot be rebound, check it for sanity */
-    assert(callbacks != NULL && PyList_CheckExact(callbacks));
-    if (PyList_GET_SIZE(callbacks) != 0) {
+    assert(_PyRuntime.gc.callbacks != NULL && PyList_CheckExact(_PyRuntime.gc.callbacks));
+    if (PyList_GET_SIZE(_PyRuntime.gc.callbacks) != 0) {
         info = Py_BuildValue("{sisnsn}",
             "generation", generation,
             "collected", collected,
@@ -1112,8 +997,8 @@
             return;
         }
     }
-    for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
-        PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
+    for (i=0; i<PyList_GET_SIZE(_PyRuntime.gc.callbacks); i++) {
+        PyObject *r, *cb = PyList_GET_ITEM(_PyRuntime.gc.callbacks, i);
         Py_INCREF(cb); /* make sure cb doesn't go away */
         r = PyObject_CallFunction(cb, "sO", phase, info);
         Py_XDECREF(r);
@@ -1147,13 +1032,13 @@
      * exceeds the threshold.  Objects in the that generation and
      * generations younger than it will be collected. */
     for (i = NUM_GENERATIONS-1; i >= 0; i--) {
-        if (generations[i].count > generations[i].threshold) {
+        if (_PyRuntime.gc.generations[i].count > _PyRuntime.gc.generations[i].threshold) {
             /* Avoid quadratic performance degradation in number
                of tracked objects. See comments at the beginning
                of this file, and issue #4074.
             */
             if (i == NUM_GENERATIONS - 1
-                && long_lived_pending < long_lived_total / 4)
+                && _PyRuntime.gc.long_lived_pending < _PyRuntime.gc.long_lived_total / 4)
                 continue;
             n = collect_with_callback(i);
             break;
@@ -1174,7 +1059,7 @@
 gc_enable_impl(PyObject *module)
 /*[clinic end generated code: output=45a427e9dce9155c input=81ac4940ca579707]*/
 {
-    enabled = 1;
+    _PyRuntime.gc.enabled = 1;
     Py_RETURN_NONE;
 }
 
@@ -1188,7 +1073,7 @@
 gc_disable_impl(PyObject *module)
 /*[clinic end generated code: output=97d1030f7aa9d279 input=8c2e5a14e800d83b]*/
 {
-    enabled = 0;
+    _PyRuntime.gc.enabled = 0;
     Py_RETURN_NONE;
 }
 
@@ -1202,7 +1087,7 @@
 gc_isenabled_impl(PyObject *module)
 /*[clinic end generated code: output=1874298331c49130 input=30005e0422373b31]*/
 {
-    return enabled;
+    return _PyRuntime.gc.enabled;
 }
 
 /*[clinic input]
@@ -1230,12 +1115,12 @@
         return -1;
     }
 
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect_with_callback(generation);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
 
     return n;
@@ -1263,7 +1148,7 @@
 gc_set_debug_impl(PyObject *module, int flags)
 /*[clinic end generated code: output=7c8366575486b228 input=5e5ce15e84fbed15]*/
 {
-    debug = flags;
+    _PyRuntime.gc.debug = flags;
 
     Py_RETURN_NONE;
 }
@@ -1278,7 +1163,7 @@
 gc_get_debug_impl(PyObject *module)
 /*[clinic end generated code: output=91242f3506cd1e50 input=91a101e1c3b98366]*/
 {
-    return debug;
+    return _PyRuntime.gc.debug;
 }
 
 PyDoc_STRVAR(gc_set_thresh__doc__,
@@ -1292,13 +1177,13 @@
 {
     int i;
     if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
-                          &generations[0].threshold,
-                          &generations[1].threshold,
-                          &generations[2].threshold))
+                          &_PyRuntime.gc.generations[0].threshold,
+                          &_PyRuntime.gc.generations[1].threshold,
+                          &_PyRuntime.gc.generations[2].threshold))
         return NULL;
     for (i = 2; i < NUM_GENERATIONS; i++) {
         /* generations higher than 2 get the same threshold */
-        generations[i].threshold = generations[2].threshold;
+        _PyRuntime.gc.generations[i].threshold = _PyRuntime.gc.generations[2].threshold;
     }
 
     Py_RETURN_NONE;
@@ -1315,9 +1200,9 @@
 /*[clinic end generated code: output=7902bc9f41ecbbd8 input=286d79918034d6e6]*/
 {
     return Py_BuildValue("(iii)",
-                         generations[0].threshold,
-                         generations[1].threshold,
-                         generations[2].threshold);
+                         _PyRuntime.gc.generations[0].threshold,
+                         _PyRuntime.gc.generations[1].threshold,
+                         _PyRuntime.gc.generations[2].threshold);
 }
 
 /*[clinic input]
@@ -1331,9 +1216,9 @@
 /*[clinic end generated code: output=354012e67b16398f input=a392794a08251751]*/
 {
     return Py_BuildValue("(iii)",
-                         generations[0].count,
-                         generations[1].count,
-                         generations[2].count);
+                         _PyRuntime.gc.generations[0].count,
+                         _PyRuntime.gc.generations[1].count,
+                         _PyRuntime.gc.generations[2].count);
 }
 
 static int
@@ -1464,7 +1349,7 @@
     /* To get consistent values despite allocations while constructing
        the result list, we use a snapshot of the running stats. */
     for (i = 0; i < NUM_GENERATIONS; i++) {
-        stats[i] = generation_stats[i];
+        stats[i] = _PyRuntime.gc.generation_stats[i];
     }
 
     result = PyList_New(0);
@@ -1581,22 +1466,22 @@
     if (m == NULL)
         return NULL;
 
-    if (garbage == NULL) {
-        garbage = PyList_New(0);
-        if (garbage == NULL)
+    if (_PyRuntime.gc.garbage == NULL) {
+        _PyRuntime.gc.garbage = PyList_New(0);
+        if (_PyRuntime.gc.garbage == NULL)
             return NULL;
     }
-    Py_INCREF(garbage);
-    if (PyModule_AddObject(m, "garbage", garbage) < 0)
+    Py_INCREF(_PyRuntime.gc.garbage);
+    if (PyModule_AddObject(m, "garbage", _PyRuntime.gc.garbage) < 0)
         return NULL;
 
-    if (callbacks == NULL) {
-        callbacks = PyList_New(0);
-        if (callbacks == NULL)
+    if (_PyRuntime.gc.callbacks == NULL) {
+        _PyRuntime.gc.callbacks = PyList_New(0);
+        if (_PyRuntime.gc.callbacks == NULL)
             return NULL;
     }
-    Py_INCREF(callbacks);
-    if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
+    Py_INCREF(_PyRuntime.gc.callbacks);
+    if (PyModule_AddObject(m, "callbacks", _PyRuntime.gc.callbacks) < 0)
         return NULL;
 
 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return NULL
@@ -1615,12 +1500,12 @@
 {
     Py_ssize_t n;
 
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0; /* already collecting, don't do anything */
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect_with_callback(NUM_GENERATIONS - 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
 
     return n;
@@ -1629,7 +1514,7 @@
 Py_ssize_t
 _PyGC_CollectIfEnabled(void)
 {
-    if (!enabled)
+    if (!_PyRuntime.gc.enabled)
         return 0;
 
     return PyGC_Collect();
@@ -1646,12 +1531,12 @@
        during interpreter shutdown (and then never finish it).
        See http://bugs.python.org/issue8713#msg195178 for an example.
        */
-    if (collecting)
+    if (_PyRuntime.gc.collecting)
         n = 0;
     else {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         n = collect(NUM_GENERATIONS - 1, NULL, NULL, 1);
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
     return n;
 }
@@ -1659,10 +1544,10 @@
 void
 _PyGC_DumpShutdownStats(void)
 {
-    if (!(debug & DEBUG_SAVEALL)
-        && garbage != NULL && PyList_GET_SIZE(garbage) > 0) {
+    if (!(_PyRuntime.gc.debug & DEBUG_SAVEALL)
+        && _PyRuntime.gc.garbage != NULL && PyList_GET_SIZE(_PyRuntime.gc.garbage) > 0) {
         char *message;
-        if (debug & DEBUG_UNCOLLECTABLE)
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
             message = "gc: %zd uncollectable objects at " \
                 "shutdown";
         else
@@ -1673,13 +1558,13 @@
            already. */
         if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0,
                                      "gc", NULL, message,
-                                     PyList_GET_SIZE(garbage)))
+                                     PyList_GET_SIZE(_PyRuntime.gc.garbage)))
             PyErr_WriteUnraisable(NULL);
-        if (debug & DEBUG_UNCOLLECTABLE) {
+        if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE) {
             PyObject *repr = NULL, *bytes = NULL;
-            repr = PyObject_Repr(garbage);
+            repr = PyObject_Repr(_PyRuntime.gc.garbage);
             if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr)))
-                PyErr_WriteUnraisable(garbage);
+                PyErr_WriteUnraisable(_PyRuntime.gc.garbage);
             else {
                 PySys_WriteStderr(
                     "      %s\n",
@@ -1695,7 +1580,7 @@
 void
 _PyGC_Fini(void)
 {
-    Py_CLEAR(callbacks);
+    Py_CLEAR(_PyRuntime.gc.callbacks);
 }
 
 /* for debugging */
@@ -1746,15 +1631,15 @@
         return PyErr_NoMemory();
     g->gc.gc_refs = 0;
     _PyGCHead_SET_REFS(g, GC_UNTRACKED);
-    generations[0].count++; /* number of allocated GC objects */
-    if (generations[0].count > generations[0].threshold &&
-        enabled &&
-        generations[0].threshold &&
-        !collecting &&
+    _PyRuntime.gc.generations[0].count++; /* number of allocated GC objects */
+    if (_PyRuntime.gc.generations[0].count > _PyRuntime.gc.generations[0].threshold &&
+        _PyRuntime.gc.enabled &&
+        _PyRuntime.gc.generations[0].threshold &&
+        !_PyRuntime.gc.collecting &&
         !PyErr_Occurred()) {
-        collecting = 1;
+        _PyRuntime.gc.collecting = 1;
         collect_generations();
-        collecting = 0;
+        _PyRuntime.gc.collecting = 0;
     }
     op = FROM_GC(g);
     return op;
@@ -1819,8 +1704,8 @@
     PyGC_Head *g = AS_GC(op);
     if (IS_TRACKED(op))
         gc_list_remove(g);
-    if (generations[0].count > 0) {
-        generations[0].count--;
+    if (_PyRuntime.gc.generations[0].count > 0) {
+        _PyRuntime.gc.generations[0].count--;
     }
     PyObject_FREE(g);
 }