Revert "bpo-30860: Consolidate stateful runtime globals." (#3379)

Windows buildbots started failing due to include-related errors.
diff --git a/Python/_warnings.c b/Python/_warnings.c
index a5e42a3..8616195 100644
--- a/Python/_warnings.c
+++ b/Python/_warnings.c
@@ -8,6 +8,13 @@
 MODULE_NAME " provides basic warning filtering support.\n"
 "It is a helper module to speed up interpreter start-up.");
 
+/* Both 'filters' and 'onceregistry' can be set in warnings.py;
+   get_warnings_attr() will reset these variables accordingly. */
+static PyObject *_filters;  /* List */
+static PyObject *_once_registry;  /* Dict */
+static PyObject *_default_action; /* String */
+static long _filters_version;
+
 _Py_IDENTIFIER(argv);
 _Py_IDENTIFIER(stderr);
 
@@ -46,7 +53,7 @@
     }
 
     /* don't try to import after the start of the Python finallization */
-    if (try_import && !_Py_IS_FINALIZING()) {
+    if (try_import && _Py_Finalizing == NULL) {
         warnings_module = PyImport_Import(warnings_str);
         if (warnings_module == NULL) {
             /* Fallback to the C implementation if we cannot get
@@ -83,10 +90,10 @@
     if (registry == NULL) {
         if (PyErr_Occurred())
             return NULL;
-        return _PyRuntime.warnings.once_registry;
+        return _once_registry;
     }
-    Py_DECREF(_PyRuntime.warnings.once_registry);
-    _PyRuntime.warnings.once_registry = registry;
+    Py_DECREF(_once_registry);
+    _once_registry = registry;
     return registry;
 }
 
@@ -101,11 +108,11 @@
         if (PyErr_Occurred()) {
             return NULL;
         }
-        return _PyRuntime.warnings.default_action;
+        return _default_action;
     }
 
-    Py_DECREF(_PyRuntime.warnings.default_action);
-    _PyRuntime.warnings.default_action = default_action;
+    Py_DECREF(_default_action);
+    _default_action = default_action;
     return default_action;
 }
 
@@ -125,24 +132,23 @@
             return NULL;
     }
     else {
-        Py_DECREF(_PyRuntime.warnings.filters);
-        _PyRuntime.warnings.filters = warnings_filters;
+        Py_DECREF(_filters);
+        _filters = warnings_filters;
     }
 
-    PyObject *filters = _PyRuntime.warnings.filters;
-    if (filters == NULL || !PyList_Check(filters)) {
+    if (_filters == NULL || !PyList_Check(_filters)) {
         PyErr_SetString(PyExc_ValueError,
                         MODULE_NAME ".filters must be a list");
         return NULL;
     }
 
-    /* _PyRuntime.warnings.filters could change while we are iterating over it. */
-    for (i = 0; i < PyList_GET_SIZE(filters); i++) {
+    /* _filters could change while we are iterating over it. */
+    for (i = 0; i < PyList_GET_SIZE(_filters); i++) {
         PyObject *tmp_item, *action, *msg, *cat, *mod, *ln_obj;
         Py_ssize_t ln;
         int is_subclass, good_msg, good_mod;
 
-        tmp_item = PyList_GET_ITEM(filters, i);
+        tmp_item = PyList_GET_ITEM(_filters, i);
         if (!PyTuple_Check(tmp_item) || PyTuple_GET_SIZE(tmp_item) != 5) {
             PyErr_Format(PyExc_ValueError,
                          MODULE_NAME ".filters item %zd isn't a 5-tuple", i);
@@ -214,9 +220,9 @@
     version_obj = _PyDict_GetItemId(registry, &PyId_version);
     if (version_obj == NULL
         || !PyLong_CheckExact(version_obj)
-        || PyLong_AsLong(version_obj) != _PyRuntime.warnings.filters_version) {
+        || PyLong_AsLong(version_obj) != _filters_version) {
         PyDict_Clear(registry);
-        version_obj = PyLong_FromLong(_PyRuntime.warnings.filters_version);
+        version_obj = PyLong_FromLong(_filters_version);
         if (version_obj == NULL)
             return -1;
         if (_PyDict_SetItemId(registry, &PyId_version, version_obj) < 0) {
@@ -514,7 +520,7 @@
                 if (registry == NULL)
                     goto cleanup;
             }
-            /* _PyRuntime.warnings.once_registry[(text, category)] = 1 */
+            /* _once_registry[(text, category)] = 1 */
             rc = update_registry(registry, text, category, 0);
         }
         else if (_PyUnicode_EqualToASCIIString(action, "module")) {
@@ -904,7 +910,7 @@
 static PyObject *
 warnings_filters_mutated(PyObject *self, PyObject *args)
 {
-    _PyRuntime.warnings.filters_version++;
+    _filters_version++;
     Py_RETURN_NONE;
 }
 
@@ -1154,8 +1160,7 @@
     }
 
     /* This assumes the line number is zero for now. */
-    return PyTuple_Pack(5, action_obj, Py_None,
-                        category, Py_None, _PyLong_Zero);
+    return PyTuple_Pack(5, action_obj, Py_None, category, Py_None, _PyLong_Zero);
 }
 
 static PyObject *
@@ -1223,35 +1228,33 @@
     if (m == NULL)
         return NULL;
 
-    if (_PyRuntime.warnings.filters == NULL) {
-        _PyRuntime.warnings.filters = init_filters();
-        if (_PyRuntime.warnings.filters == NULL)
+    if (_filters == NULL) {
+        _filters = init_filters();
+        if (_filters == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.filters);
-    if (PyModule_AddObject(m, "filters", _PyRuntime.warnings.filters) < 0)
+    Py_INCREF(_filters);
+    if (PyModule_AddObject(m, "filters", _filters) < 0)
         return NULL;
 
-    if (_PyRuntime.warnings.once_registry == NULL) {
-        _PyRuntime.warnings.once_registry = PyDict_New();
-        if (_PyRuntime.warnings.once_registry == NULL)
+    if (_once_registry == NULL) {
+        _once_registry = PyDict_New();
+        if (_once_registry == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.once_registry);
-    if (PyModule_AddObject(m, "_onceregistry",
-                           _PyRuntime.warnings.once_registry) < 0)
+    Py_INCREF(_once_registry);
+    if (PyModule_AddObject(m, "_onceregistry", _once_registry) < 0)
         return NULL;
 
-    if (_PyRuntime.warnings.default_action == NULL) {
-        _PyRuntime.warnings.default_action = PyUnicode_FromString("default");
-        if (_PyRuntime.warnings.default_action == NULL)
+    if (_default_action == NULL) {
+        _default_action = PyUnicode_FromString("default");
+        if (_default_action == NULL)
             return NULL;
     }
-    Py_INCREF(_PyRuntime.warnings.default_action);
-    if (PyModule_AddObject(m, "_defaultaction",
-                           _PyRuntime.warnings.default_action) < 0)
+    Py_INCREF(_default_action);
+    if (PyModule_AddObject(m, "_defaultaction", _default_action) < 0)
         return NULL;
 
-    _PyRuntime.warnings.filters_version = 0;
+    _filters_version = 0;
     return m;
 }
diff --git a/Python/ceval.c b/Python/ceval.c
index 9741c15..436e5ca 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -36,8 +36,7 @@
 typedef PyObject *(*callproc)(PyObject *, PyObject *, PyObject *);
 
 /* Forward declarations */
-Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t,
-                                          PyObject *);
+Py_LOCAL_INLINE(PyObject *) call_function(PyObject ***, Py_ssize_t, PyObject *);
 static PyObject * do_call_core(PyObject *, PyObject *, PyObject *);
 
 #ifdef LLTRACE
@@ -53,15 +52,13 @@
 static void call_exc_trace(Py_tracefunc, PyObject *,
                            PyThreadState *, PyFrameObject *);
 static int maybe_call_line_trace(Py_tracefunc, PyObject *,
-                                 PyThreadState *, PyFrameObject *,
-                                 int *, int *, int *);
+                                 PyThreadState *, PyFrameObject *, int *, int *, int *);
 static void maybe_dtrace_line(PyFrameObject *, int *, int *, int *);
 static void dtrace_function_entry(PyFrameObject *);
 static void dtrace_function_return(PyFrameObject *);
 
 static PyObject * cmp_outcome(int, PyObject *, PyObject *);
-static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *,
-                              PyObject *);
+static PyObject * import_name(PyFrameObject *, PyObject *, PyObject *, PyObject *);
 static PyObject * import_from(PyObject *, PyObject *);
 static int import_all_from(PyObject *, PyObject *);
 static void format_exc_check_arg(PyObject *, const char *, PyObject *);
@@ -91,7 +88,7 @@
 #endif
 
 #ifdef WITH_THREAD
-#define GIL_REQUEST _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)
+#define GIL_REQUEST _Py_atomic_load_relaxed(&gil_drop_request)
 #else
 #define GIL_REQUEST 0
 #endif
@@ -101,22 +98,22 @@
    the GIL eventually anyway. */
 #define COMPUTE_EVAL_BREAKER() \
     _Py_atomic_store_relaxed( \
-        &_PyRuntime.ceval.eval_breaker, \
+        &eval_breaker, \
         GIL_REQUEST | \
-        _Py_atomic_load_relaxed(&_PyRuntime.ceval.pending.calls_to_do) | \
-        _PyRuntime.ceval.pending.async_exc)
+        _Py_atomic_load_relaxed(&pendingcalls_to_do) | \
+        pending_async_exc)
 
 #ifdef WITH_THREAD
 
 #define SET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&gil_drop_request, 1); \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define RESET_GIL_DROP_REQUEST() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil_drop_request, 0); \
+        _Py_atomic_store_relaxed(&gil_drop_request, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
@@ -125,35 +122,47 @@
 /* Pending calls are only modified under pending_lock */
 #define SIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 1); \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        _Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_PENDING_CALLS() \
     do { \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.pending.calls_to_do, 0); \
+        _Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \
         COMPUTE_EVAL_BREAKER(); \
     } while (0)
 
 #define SIGNAL_ASYNC_EXC() \
     do { \
-        _PyRuntime.ceval.pending.async_exc = 1; \
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.eval_breaker, 1); \
+        pending_async_exc = 1; \
+        _Py_atomic_store_relaxed(&eval_breaker, 1); \
     } while (0)
 
 #define UNSIGNAL_ASYNC_EXC() \
-    do { \
-        _PyRuntime.ceval.pending.async_exc = 0; \
-        COMPUTE_EVAL_BREAKER(); \
-    } while (0)
+    do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
 
 
+/* This single variable consolidates all requests to break out of the fast path
+   in the eval loop. */
+static _Py_atomic_int eval_breaker = {0};
+/* Request for running pending calls. */
+static _Py_atomic_int pendingcalls_to_do = {0};
+/* Request for looking at the `async_exc` field of the current thread state.
+   Guarded by the GIL. */
+static int pending_async_exc = 0;
+
 #ifdef WITH_THREAD
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #include "pythread.h"
+
+static PyThread_type_lock pending_lock = 0; /* for pending calls */
+static unsigned long main_thread = 0;
+/* Request for dropping the GIL */
+static _Py_atomic_int gil_drop_request = {0};
+
 #include "ceval_gil.h"
 
 int
@@ -169,9 +178,9 @@
         return;
     create_gil();
     take_gil(PyThreadState_GET());
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
-    if (!_PyRuntime.ceval.pending.lock)
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+    main_thread = PyThread_get_thread_ident();
+    if (!pending_lock)
+        pending_lock = PyThread_allocate_lock();
 }
 
 void
@@ -239,9 +248,9 @@
     if (!gil_created())
         return;
     recreate_gil();
-    _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
+    pending_lock = PyThread_allocate_lock();
     take_gil(current_tstate);
-    _PyRuntime.ceval.pending.main_thread = PyThread_get_thread_ident();
+    main_thread = PyThread_get_thread_ident();
 
     /* Destroy all threads except the current one */
     _PyThreadState_DeleteExcept(current_tstate);
@@ -285,7 +294,7 @@
         int err = errno;
         take_gil(tstate);
         /* _Py_Finalizing is protected by the GIL */
-        if (_Py_IS_FINALIZING() && !_Py_CURRENTLY_FINALIZING(tstate)) {
+        if (_Py_Finalizing && tstate != _Py_Finalizing) {
             drop_gil(tstate);
             PyThread_exit_thread();
             assert(0);  /* unreachable */
@@ -337,11 +346,19 @@
    callback.
  */
 
+#define NPENDINGCALLS 32
+static struct {
+    int (*func)(void *);
+    void *arg;
+} pendingcalls[NPENDINGCALLS];
+static int pendingfirst = 0;
+static int pendinglast = 0;
+
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
     int i, j, result=0;
-    PyThread_type_lock lock = _PyRuntime.ceval.pending.lock;
+    PyThread_type_lock lock = pending_lock;
 
     /* try a few times for the lock.  Since this mechanism is used
      * for signal handling (on the main thread), there is a (slim)
@@ -363,14 +380,14 @@
             return -1;
     }
 
-    i = _PyRuntime.ceval.pending.last;
+    i = pendinglast;
     j = (i + 1) % NPENDINGCALLS;
-    if (j == _PyRuntime.ceval.pending.first) {
+    if (j == pendingfirst) {
         result = -1; /* Queue full */
     } else {
-        _PyRuntime.ceval.pending.calls[i].func = func;
-        _PyRuntime.ceval.pending.calls[i].arg = arg;
-        _PyRuntime.ceval.pending.last = j;
+        pendingcalls[i].func = func;
+        pendingcalls[i].arg = arg;
+        pendinglast = j;
     }
     /* signal main loop */
     SIGNAL_PENDING_CALLS();
@@ -388,19 +405,16 @@
 
     assert(PyGILState_Check());
 
-    if (!_PyRuntime.ceval.pending.lock) {
+    if (!pending_lock) {
         /* initial allocation of the lock */
-        _PyRuntime.ceval.pending.lock = PyThread_allocate_lock();
-        if (_PyRuntime.ceval.pending.lock == NULL)
+        pending_lock = PyThread_allocate_lock();
+        if (pending_lock == NULL)
             return -1;
     }
 
     /* only service pending calls on main thread */
-    if (_PyRuntime.ceval.pending.main_thread &&
-        PyThread_get_thread_ident() != _PyRuntime.ceval.pending.main_thread)
-    {
+    if (main_thread && PyThread_get_thread_ident() != main_thread)
         return 0;
-    }
     /* don't perform recursive pending calls */
     if (busy)
         return 0;
@@ -422,16 +436,16 @@
         void *arg = NULL;
 
         /* pop one item off the queue while holding the lock */
-        PyThread_acquire_lock(_PyRuntime.ceval.pending.lock, WAIT_LOCK);
-        j = _PyRuntime.ceval.pending.first;
-        if (j == _PyRuntime.ceval.pending.last) {
+        PyThread_acquire_lock(pending_lock, WAIT_LOCK);
+        j = pendingfirst;
+        if (j == pendinglast) {
             func = NULL; /* Queue empty */
         } else {
-            func = _PyRuntime.ceval.pending.calls[j].func;
-            arg = _PyRuntime.ceval.pending.calls[j].arg;
-            _PyRuntime.ceval.pending.first = (j + 1) % NPENDINGCALLS;
+            func = pendingcalls[j].func;
+            arg = pendingcalls[j].arg;
+            pendingfirst = (j + 1) % NPENDINGCALLS;
         }
-        PyThread_release_lock(_PyRuntime.ceval.pending.lock);
+        PyThread_release_lock(pending_lock);
         /* having released the lock, perform the callback */
         if (func == NULL)
             break;
@@ -475,6 +489,14 @@
    The two threads could theoretically wiggle around the "busy" variable.
 */
 
+#define NPENDINGCALLS 32
+static struct {
+    int (*func)(void *);
+    void *arg;
+} pendingcalls[NPENDINGCALLS];
+static volatile int pendingfirst = 0;
+static volatile int pendinglast = 0;
+
 int
 Py_AddPendingCall(int (*func)(void *), void *arg)
 {
@@ -484,15 +506,15 @@
     if (busy)
         return -1;
     busy = 1;
-    i = _PyRuntime.ceval.pending.last;
+    i = pendinglast;
     j = (i + 1) % NPENDINGCALLS;
-    if (j == _PyRuntime.ceval.pending.first) {
+    if (j == pendingfirst) {
         busy = 0;
         return -1; /* Queue full */
     }
-    _PyRuntime.ceval.pending.calls[i].func = func;
-    _PyRuntime.ceval.pending.calls[i].arg = arg;
-    _PyRuntime.ceval.pending.last = j;
+    pendingcalls[i].func = func;
+    pendingcalls[i].arg = arg;
+    pendinglast = j;
 
     SIGNAL_PENDING_CALLS();
     busy = 0;
@@ -521,12 +543,12 @@
         int i;
         int (*func)(void *);
         void *arg;
-        i = _PyRuntime.ceval.pending.first;
-        if (i == _PyRuntime.ceval.pending.last)
+        i = pendingfirst;
+        if (i == pendinglast)
             break; /* Queue empty */
-        func = _PyRuntime.ceval.pending.calls[i].func;
-        arg = _PyRuntime.ceval.pending.calls[i].arg;
-        _PyRuntime.ceval.pending.first = (i + 1) % NPENDINGCALLS;
+        func = pendingcalls[i].func;
+        arg = pendingcalls[i].arg;
+        pendingfirst = (i + 1) % NPENDINGCALLS;
         if (func(arg) < 0) {
             goto error;
         }
@@ -548,32 +570,20 @@
 #ifndef Py_DEFAULT_RECURSION_LIMIT
 #define Py_DEFAULT_RECURSION_LIMIT 1000
 #endif
-
-void
-_PyEval_Initialize(struct _ceval_runtime_state *state)
-{
-    state->recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
-    state->check_recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
-    _gil_initialize(&state->gil);
-}
-
-int
-_PyEval_CheckRecursionLimit(void)
-{
-    return _PyRuntime.ceval.check_recursion_limit;
-}
+static int recursion_limit = Py_DEFAULT_RECURSION_LIMIT;
+int _Py_CheckRecursionLimit = Py_DEFAULT_RECURSION_LIMIT;
 
 int
 Py_GetRecursionLimit(void)
 {
-    return _PyRuntime.ceval.recursion_limit;
+    return recursion_limit;
 }
 
 void
 Py_SetRecursionLimit(int new_limit)
 {
-    _PyRuntime.ceval.recursion_limit = new_limit;
-    _PyRuntime.ceval.check_recursion_limit = _PyRuntime.ceval.recursion_limit;
+    recursion_limit = new_limit;
+    _Py_CheckRecursionLimit = recursion_limit;
 }
 
 /* the macro Py_EnterRecursiveCall() only calls _Py_CheckRecursiveCall()
@@ -585,7 +595,6 @@
 _Py_CheckRecursiveCall(const char *where)
 {
     PyThreadState *tstate = PyThreadState_GET();
-    int recursion_limit = _PyRuntime.ceval.recursion_limit;
 
 #ifdef USE_STACKCHECK
     if (PyOS_CheckStack()) {
@@ -594,7 +603,7 @@
         return -1;
     }
 #endif
-    _PyRuntime.ceval.check_recursion_limit = recursion_limit;
+    _Py_CheckRecursionLimit = recursion_limit;
     if (tstate->recursion_critical)
         /* Somebody asked that we don't check for recursion. */
         return 0;
@@ -633,7 +642,13 @@
 static int do_raise(PyObject *, PyObject *);
 static int unpack_iterable(PyObject *, int, int, PyObject **);
 
-#define _Py_TracingPossible _PyRuntime.ceval.tracing_possible
+/* Records whether tracing is on for any thread.  Counts the number of
+   threads for which tstate->c_tracefunc is non-NULL, so if the value
+   is 0, we know we don't have to check this thread's c_tracefunc.
+   This speeds up the if statement in PyEval_EvalFrameEx() after
+   fast_next_opcode*/
+static int _Py_TracingPossible = 0;
+
 
 
 PyObject *
@@ -764,7 +779,7 @@
 
 #define DISPATCH() \
     { \
-        if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) { \
+        if (!_Py_atomic_load_relaxed(&eval_breaker)) {      \
                     FAST_DISPATCH(); \
         } \
         continue; \
@@ -812,8 +827,7 @@
 /* Code access macros */
 
 /* The integer overflow is checked by an assertion below. */
-#define INSTR_OFFSET()  \
-    (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
+#define INSTR_OFFSET()  (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
 #define NEXTOPARG()  do { \
         _Py_CODEUNIT word = *next_instr; \
         opcode = _Py_OPCODE(word); \
@@ -1066,7 +1080,7 @@
            async I/O handler); see Py_AddPendingCall() and
            Py_MakePendingCalls() above. */
 
-        if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.eval_breaker)) {
+        if (_Py_atomic_load_relaxed(&eval_breaker)) {
             if (_Py_OPCODE(*next_instr) == SETUP_FINALLY ||
                 _Py_OPCODE(*next_instr) == YIELD_FROM) {
                 /* Two cases where we skip running signal handlers and other
@@ -1083,16 +1097,12 @@
                 */
                 goto fast_next_opcode;
             }
-            if (_Py_atomic_load_relaxed(
-                        &_PyRuntime.ceval.pending.calls_to_do))
-            {
+            if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
                 if (Py_MakePendingCalls() < 0)
                     goto error;
             }
 #ifdef WITH_THREAD
-            if (_Py_atomic_load_relaxed(
-                        &_PyRuntime.ceval.gil_drop_request))
-            {
+            if (_Py_atomic_load_relaxed(&gil_drop_request)) {
                 /* Give another thread a chance */
                 if (PyThreadState_Swap(NULL) != tstate)
                     Py_FatalError("ceval: tstate mix-up");
@@ -1103,9 +1113,7 @@
                 take_gil(tstate);
 
                 /* Check if we should make a quick exit. */
-                if (_Py_IS_FINALIZING() &&
-                    !_Py_CURRENTLY_FINALIZING(tstate))
-                {
+                if (_Py_Finalizing && _Py_Finalizing != tstate) {
                     drop_gil(tstate);
                     PyThread_exit_thread();
                 }
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index ef51890..a3b450b 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -8,13 +8,20 @@
 
 /* First some general settings */
 
-#define INTERVAL (_PyRuntime.ceval.gil.interval >= 1 ? _PyRuntime.ceval.gil.interval : 1)
+/* microseconds (the Python API uses seconds, though) */
+#define DEFAULT_INTERVAL 5000
+static unsigned long gil_interval = DEFAULT_INTERVAL;
+#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)
+
+/* Enable if you want to force the switching of threads at least every `gil_interval` */
+#undef FORCE_SWITCHING
+#define FORCE_SWITCHING
 
 
 /*
    Notes about the implementation:
 
-   - The GIL is just a boolean variable (locked) whose access is protected
+   - The GIL is just a boolean variable (gil_locked) whose access is protected
      by a mutex (gil_mutex), and whose changes are signalled by a condition
      variable (gil_cond). gil_mutex is taken for short periods of time,
      and therefore mostly uncontended.
@@ -41,7 +48,7 @@
    - When a thread releases the GIL and gil_drop_request is set, that thread
      ensures that another GIL-awaiting thread gets scheduled.
      It does so by waiting on a condition variable (switch_cond) until
-     the value of last_holder is changed to something else than its
+     the value of gil_last_holder is changed to something else than its
      own thread state pointer, indicating that another thread was able to
      take the GIL.
 
@@ -53,7 +60,11 @@
 */
 
 #include "condvar.h"
+#ifndef Py_HAVE_CONDVAR
+#error You need either a POSIX-compatible or a Windows system!
+#endif
 
+#define MUTEX_T PyMUTEX_T
 #define MUTEX_INIT(mut) \
     if (PyMUTEX_INIT(&(mut))) { \
         Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); };
@@ -67,6 +78,7 @@
     if (PyMUTEX_UNLOCK(&(mut))) { \
         Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); };
 
+#define COND_T PyCOND_T
 #define COND_INIT(cond) \
     if (PyCOND_INIT(&(cond))) { \
         Py_FatalError("PyCOND_INIT(" #cond ") failed"); };
@@ -91,36 +103,48 @@
     } \
 
 
-#define DEFAULT_INTERVAL 5000
 
-static void _gil_initialize(struct _gil_runtime_state *state)
-{
-    _Py_atomic_int uninitialized = {-1};
-    state->locked = uninitialized;
-    state->interval = DEFAULT_INTERVAL;
-}
+/* Whether the GIL is already taken (-1 if uninitialized). This is atomic
+   because it can be read without any lock taken in ceval.c. */
+static _Py_atomic_int gil_locked = {-1};
+/* Number of GIL switches since the beginning. */
+static unsigned long gil_switch_number = 0;
+/* Last PyThreadState holding / having held the GIL. This helps us know
+   whether anyone else was scheduled after we dropped the GIL. */
+static _Py_atomic_address gil_last_holder = {0};
+
+/* This condition variable allows one or several threads to wait until
+   the GIL is released. In addition, the mutex also protects the above
+   variables. */
+static COND_T gil_cond;
+static MUTEX_T gil_mutex;
+
+#ifdef FORCE_SWITCHING
+/* This condition variable helps the GIL-releasing thread wait for
+   a GIL-awaiting thread to be scheduled and take the GIL. */
+static COND_T switch_cond;
+static MUTEX_T switch_mutex;
+#endif
+
 
 static int gil_created(void)
 {
-    return (_Py_atomic_load_explicit(&_PyRuntime.ceval.gil.locked,
-                                     _Py_memory_order_acquire)
-            ) >= 0;
+    return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0;
 }
 
 static void create_gil(void)
 {
-    MUTEX_INIT(_PyRuntime.ceval.gil.mutex);
+    MUTEX_INIT(gil_mutex);
 #ifdef FORCE_SWITCHING
-    MUTEX_INIT(_PyRuntime.ceval.gil.switch_mutex);
+    MUTEX_INIT(switch_mutex);
 #endif
-    COND_INIT(_PyRuntime.ceval.gil.cond);
+    COND_INIT(gil_cond);
 #ifdef FORCE_SWITCHING
-    COND_INIT(_PyRuntime.ceval.gil.switch_cond);
+    COND_INIT(switch_cond);
 #endif
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder, 0);
-    _Py_ANNOTATE_RWLOCK_CREATE(&_PyRuntime.ceval.gil.locked);
-    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, 0,
-                              _Py_memory_order_release);
+    _Py_atomic_store_relaxed(&gil_last_holder, 0);
+    _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
+    _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
 }
 
 static void destroy_gil(void)
@@ -128,62 +152,54 @@
     /* some pthread-like implementations tie the mutex to the cond
      * and must have the cond destroyed first.
      */
-    COND_FINI(_PyRuntime.ceval.gil.cond);
-    MUTEX_FINI(_PyRuntime.ceval.gil.mutex);
+    COND_FINI(gil_cond);
+    MUTEX_FINI(gil_mutex);
 #ifdef FORCE_SWITCHING
-    COND_FINI(_PyRuntime.ceval.gil.switch_cond);
-    MUTEX_FINI(_PyRuntime.ceval.gil.switch_mutex);
+    COND_FINI(switch_cond);
+    MUTEX_FINI(switch_mutex);
 #endif
-    _Py_atomic_store_explicit(&_PyRuntime.ceval.gil.locked, -1,
-                              _Py_memory_order_release);
-    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
+    _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
 }
 
 static void recreate_gil(void)
 {
-    _Py_ANNOTATE_RWLOCK_DESTROY(&_PyRuntime.ceval.gil.locked);
+    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
     /* XXX should we destroy the old OS resources here? */
     create_gil();
 }
 
 static void drop_gil(PyThreadState *tstate)
 {
-    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         Py_FatalError("drop_gil: GIL is not locked");
     /* tstate is allowed to be NULL (early interpreter init) */
     if (tstate != NULL) {
         /* Sub-interpreter support: threads might have been switched
            under our feet using PyThreadState_Swap(). Fix the GIL last
            holder variable so that our heuristics work. */
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
-                                 (uintptr_t)tstate);
+        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
     }
 
-    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
-    _Py_ANNOTATE_RWLOCK_RELEASED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 0);
-    COND_SIGNAL(_PyRuntime.ceval.gil.cond);
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_LOCK(gil_mutex);
+    _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&gil_locked, 0);
+    COND_SIGNAL(gil_cond);
+    MUTEX_UNLOCK(gil_mutex);
 
 #ifdef FORCE_SWITCHING
-    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request) &&
-        tstate != NULL)
-    {
-        MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
+    if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) {
+        MUTEX_LOCK(switch_mutex);
         /* Not switched yet => wait */
-        if (((PyThreadState*)_Py_atomic_load_relaxed(
-                    &_PyRuntime.ceval.gil.last_holder)
-            ) == tstate)
-        {
+        if ((PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder) == tstate) {
         RESET_GIL_DROP_REQUEST();
             /* NOTE: if COND_WAIT does not atomically start waiting when
                releasing the mutex, another thread can run through, take
                the GIL and drop it again, and reset the condition
                before we even had a chance to wait for it. */
-            COND_WAIT(_PyRuntime.ceval.gil.switch_cond,
-                      _PyRuntime.ceval.gil.switch_mutex);
+            COND_WAIT(switch_cond, switch_mutex);
     }
-        MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
+        MUTEX_UNLOCK(switch_mutex);
     }
 #endif
 }
@@ -195,65 +211,60 @@
         Py_FatalError("take_gil: NULL tstate");
 
     err = errno;
-    MUTEX_LOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_LOCK(gil_mutex);
 
-    if (!_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked))
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         goto _ready;
 
-    while (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked)) {
+    while (_Py_atomic_load_relaxed(&gil_locked)) {
         int timed_out = 0;
         unsigned long saved_switchnum;
 
-        saved_switchnum = _PyRuntime.ceval.gil.switch_number;
-        COND_TIMED_WAIT(_PyRuntime.ceval.gil.cond, _PyRuntime.ceval.gil.mutex,
-                        INTERVAL, timed_out);
+        saved_switchnum = gil_switch_number;
+        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
         /* If we timed out and no switch occurred in the meantime, it is time
            to ask the GIL-holding thread to drop it. */
         if (timed_out &&
-            _Py_atomic_load_relaxed(&_PyRuntime.ceval.gil.locked) &&
-            _PyRuntime.ceval.gil.switch_number == saved_switchnum) {
+            _Py_atomic_load_relaxed(&gil_locked) &&
+            gil_switch_number == saved_switchnum) {
             SET_GIL_DROP_REQUEST();
         }
     }
 _ready:
 #ifdef FORCE_SWITCHING
-    /* This mutex must be taken before modifying
-       _PyRuntime.ceval.gil.last_holder (see drop_gil()). */
-    MUTEX_LOCK(_PyRuntime.ceval.gil.switch_mutex);
+    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
+    MUTEX_LOCK(switch_mutex);
 #endif
     /* We now hold the GIL */
-    _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.locked, 1);
-    _Py_ANNOTATE_RWLOCK_ACQUIRED(&_PyRuntime.ceval.gil.locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&gil_locked, 1);
+    _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
 
-    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(
-                    &_PyRuntime.ceval.gil.last_holder))
-    {
-        _Py_atomic_store_relaxed(&_PyRuntime.ceval.gil.last_holder,
-                                 (uintptr_t)tstate);
-        ++_PyRuntime.ceval.gil.switch_number;
+    if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) {
+        _Py_atomic_store_relaxed(&gil_last_holder, (uintptr_t)tstate);
+        ++gil_switch_number;
     }
 
 #ifdef FORCE_SWITCHING
-    COND_SIGNAL(_PyRuntime.ceval.gil.switch_cond);
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.switch_mutex);
+    COND_SIGNAL(switch_cond);
+    MUTEX_UNLOCK(switch_mutex);
 #endif
-    if (_Py_atomic_load_relaxed(&_PyRuntime.ceval.gil_drop_request)) {
+    if (_Py_atomic_load_relaxed(&gil_drop_request)) {
         RESET_GIL_DROP_REQUEST();
     }
     if (tstate->async_exc != NULL) {
         _PyEval_SignalAsyncExc();
     }
 
-    MUTEX_UNLOCK(_PyRuntime.ceval.gil.mutex);
+    MUTEX_UNLOCK(gil_mutex);
     errno = err;
 }
 
 void _PyEval_SetSwitchInterval(unsigned long microseconds)
 {
-    _PyRuntime.ceval.gil.interval = microseconds;
+    gil_interval = microseconds;
 }
 
 unsigned long _PyEval_GetSwitchInterval()
 {
-    return _PyRuntime.ceval.gil.interval;
+    return gil_interval;
 }
diff --git a/Python/condvar.h b/Python/condvar.h
index aaa8043..9a71b17 100644
--- a/Python/condvar.h
+++ b/Python/condvar.h
@@ -37,16 +37,27 @@
  *    Condition Variable.
  */
 
-#ifndef _CONDVAR_IMPL_H_
-#define _CONDVAR_IMPL_H_
+#ifndef _CONDVAR_H_
+#define _CONDVAR_H_
 
 #include "Python.h"
-#include "internal/_condvar.h"
+
+#ifndef _POSIX_THREADS
+/* This means pthreads are not implemented in libc headers, hence the macro
+   not present in unistd.h. But they still can be implemented as an external
+   library (e.g. gnu pth in pthread emulation) */
+# ifdef HAVE_PTHREAD_H
+#  include <pthread.h> /* _POSIX_THREADS */
+# endif
+#endif
 
 #ifdef _POSIX_THREADS
 /*
  * POSIX support
  */
+#define Py_HAVE_CONDVAR
+
+#include <pthread.h>
 
 #define PyCOND_ADD_MICROSECONDS(tv, interval) \
 do { /* TODO: add overflow and truncation checks */ \
@@ -63,11 +74,13 @@
 #endif
 
 /* The following functions return 0 on success, nonzero on error */
+#define PyMUTEX_T pthread_mutex_t
 #define PyMUTEX_INIT(mut)       pthread_mutex_init((mut), NULL)
 #define PyMUTEX_FINI(mut)       pthread_mutex_destroy(mut)
 #define PyMUTEX_LOCK(mut)       pthread_mutex_lock(mut)
 #define PyMUTEX_UNLOCK(mut)     pthread_mutex_unlock(mut)
 
+#define PyCOND_T pthread_cond_t
 #define PyCOND_INIT(cond)       pthread_cond_init((cond), NULL)
 #define PyCOND_FINI(cond)       pthread_cond_destroy(cond)
 #define PyCOND_SIGNAL(cond)     pthread_cond_signal(cond)
@@ -103,11 +116,45 @@
  * Emulated condition variables ones that work with XP and later, plus
  * example native support on VISTA and onwards.
  */
+#define Py_HAVE_CONDVAR
+
+
+/* include windows if it hasn't been done before */
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* options */
+/* non-emulated condition variables are provided for those that want
+ * to target Windows Vista.  Modify this macro to enable them.
+ */
+#ifndef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1  /* use emulated condition variables */
+#endif
+
+/* fall back to emulation if not targeting Vista */
+#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA
+#undef _PY_EMULATED_WIN_CV
+#define _PY_EMULATED_WIN_CV 1
+#endif
+
 
 #if _PY_EMULATED_WIN_CV
 
 /* The mutex is a CriticalSection object and
    The condition variables is emulated with the help of a semaphore.
+   Semaphores are available on Windows XP (2003 server) and later.
+   We use a Semaphore rather than an auto-reset event, because although
+   an auto-resent event might appear to solve the lost-wakeup bug (race
+   condition between releasing the outer lock and waiting) because it
+   maintains state even though a wait hasn't happened, there is still
+   a lost wakeup problem if more than one thread are interrupted in the
+   critical place.  A semaphore solves that, because its state is counted,
+   not Boolean.
+   Because it is ok to signal a condition variable with no one
+   waiting, we need to keep track of the number of
+   waiting threads.  Otherwise, the semaphore's state could rise
+   without bound.  This also helps reduce the number of "spurious wakeups"
+   that would otherwise happen.
 
    This implementation still has the problem that the threads woken
    with a "signal" aren't necessarily those that are already
@@ -121,6 +168,8 @@
    http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
 */
 
+typedef CRITICAL_SECTION PyMUTEX_T;
+
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
 {
@@ -149,6 +198,15 @@
     return 0;
 }
 
+/* The ConditionVariable object.  From XP onwards it is easily emulated with
+ * a Semaphore
+ */
+
+typedef struct _PyCOND_T
+{
+    HANDLE sem;
+    int waiting; /* to allow PyCOND_SIGNAL to be a no-op */
+} PyCOND_T;
 
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
@@ -246,7 +304,12 @@
     return 0;
 }
 
-#else /* !_PY_EMULATED_WIN_CV */
+#else
+
+/* Use native Win7 primitives if build target is Win7 or higher */
+
+/* SRWLOCK is faster and better than CriticalSection */
+typedef SRWLOCK PyMUTEX_T;
 
 Py_LOCAL_INLINE(int)
 PyMUTEX_INIT(PyMUTEX_T *cs)
@@ -276,6 +339,8 @@
 }
 
 
+typedef CONDITION_VARIABLE  PyCOND_T;
+
 Py_LOCAL_INLINE(int)
 PyCOND_INIT(PyCOND_T *cv)
 {
@@ -322,4 +387,4 @@
 
 #endif /* _POSIX_THREADS, NT_THREADS */
 
-#endif /* _CONDVAR_IMPL_H_ */
+#endif /* _CONDVAR_H_ */
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 3f405b1..662405b 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -77,30 +77,6 @@
 extern void _PyGILState_Fini(void);
 #endif /* WITH_THREAD */
 
-_PyRuntimeState _PyRuntime = {0, 0};
-
-void
-_PyRuntime_Initialize(void)
-{
-    /* XXX We only initialize once in the process, which aligns with
-       the static initialization of the former globals now found in
-       _PyRuntime.  However, _PyRuntime *should* be initialized with
-       every Py_Initialize() call, but doing so breaks the runtime.
-       This is because the runtime state is not properly finalized
-       currently. */
-    static int initialized = 0;
-    if (initialized)
-        return;
-    initialized = 1;
-    _PyRuntimeState_Init(&_PyRuntime);
-}
-
-void
-_PyRuntime_Finalize(void)
-{
-    _PyRuntimeState_Fini(&_PyRuntime);
-}
-
 /* Global configuration variable declarations are in pydebug.h */
 /* XXX (ncoghlan): move those declarations to pylifecycle.h? */
 int Py_DebugFlag; /* Needed by parser.c */
@@ -124,6 +100,8 @@
 int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */
 #endif
 
+PyThreadState *_Py_Finalizing = NULL;
+
 /* Hack to force loading of object files */
 int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \
     PyOS_mystrnicmp; /* Python/pystrcmp.o */
@@ -141,17 +119,19 @@
  *
  * Can be called prior to Py_Initialize.
  */
+int _Py_CoreInitialized = 0;
+int _Py_Initialized = 0;
 
 int
 _Py_IsCoreInitialized(void)
 {
-    return _PyRuntime.core_initialized;
+    return _Py_CoreInitialized;
 }
 
 int
 Py_IsInitialized(void)
 {
-    return _PyRuntime.initialized;
+    return _Py_Initialized;
 }
 
 /* Helper to allow an embedding application to override the normal
@@ -564,16 +544,14 @@
     _PyCoreConfig core_config = _PyCoreConfig_INIT;
     _PyMainInterpreterConfig preinit_config = _PyMainInterpreterConfig_INIT;
 
-    _PyRuntime_Initialize();
-
     if (config != NULL) {
         core_config = *config;
     }
 
-    if (_PyRuntime.initialized) {
+    if (_Py_Initialized) {
         Py_FatalError("Py_InitializeCore: main interpreter already initialized");
     }
-    if (_PyRuntime.core_initialized) {
+    if (_Py_CoreInitialized) {
         Py_FatalError("Py_InitializeCore: runtime core already initialized");
     }
 
@@ -586,14 +564,7 @@
      * threads still hanging around from a previous Py_Initialize/Finalize
      * pair :(
      */
-    _PyRuntime.finalizing = NULL;
-
-    if (_PyMem_SetupAllocators(core_config.allocator) < 0) {
-        fprintf(stderr,
-            "Error in PYTHONMALLOC: unknown allocator \"%s\"!\n",
-            core_config.allocator);
-        exit(1);
-    }
+    _Py_Finalizing = NULL;
 
 #ifdef __ANDROID__
     /* Passing "" to setlocale() on Android requests the C locale rather
@@ -635,7 +606,7 @@
         Py_HashRandomizationFlag = 1;
     }
 
-    _PyInterpreterState_Enable(&_PyRuntime);
+    _PyInterpreterState_Init();
     interp = PyInterpreterState_New();
     if (interp == NULL)
         Py_FatalError("Py_InitializeCore: can't make main interpreter");
@@ -727,7 +698,7 @@
     }
 
     /* Only when we get here is the runtime core fully initialized */
-    _PyRuntime.core_initialized = 1;
+    _Py_CoreInitialized = 1;
 }
 
 /* Read configuration settings from standard locations
@@ -768,10 +739,10 @@
     PyInterpreterState *interp;
     PyThreadState *tstate;
 
-    if (!_PyRuntime.core_initialized) {
+    if (!_Py_CoreInitialized) {
         Py_FatalError("Py_InitializeMainInterpreter: runtime core not initialized");
     }
-    if (_PyRuntime.initialized) {
+    if (_Py_Initialized) {
         Py_FatalError("Py_InitializeMainInterpreter: main interpreter already initialized");
     }
 
@@ -792,7 +763,7 @@
          * This means anything which needs support from extension modules
          * or pure Python code in the standard library won't work.
          */
-        _PyRuntime.initialized = 1;
+        _Py_Initialized = 1;
         return 0;
     }
     /* TODO: Report exceptions rather than fatal errors below here */
@@ -837,7 +808,7 @@
         Py_XDECREF(warnings_module);
     }
 
-    _PyRuntime.initialized = 1;
+    _Py_Initialized = 1;
 
     if (!Py_NoSiteFlag)
         initsite(); /* Module site */
@@ -953,7 +924,7 @@
     PyThreadState *tstate;
     int status = 0;
 
-    if (!_PyRuntime.initialized)
+    if (!_Py_Initialized)
         return status;
 
     wait_for_thread_shutdown();
@@ -975,9 +946,9 @@
 
     /* Remaining threads (e.g. daemon threads) will automatically exit
        after taking the GIL (in PyEval_RestoreThread()). */
-    _PyRuntime.finalizing = tstate;
-    _PyRuntime.initialized = 0;
-    _PyRuntime.core_initialized = 0;
+    _Py_Finalizing = tstate;
+    _Py_Initialized = 0;
+    _Py_CoreInitialized = 0;
 
     /* Flush sys.stdout and sys.stderr */
     if (flush_std_files() < 0) {
@@ -1139,7 +1110,6 @@
 #endif
 
     call_ll_exitfuncs();
-    _PyRuntime_Finalize();
     return status;
 }
 
@@ -1169,7 +1139,7 @@
     PyThreadState *tstate, *save_tstate;
     PyObject *bimod, *sysmod;
 
-    if (!_PyRuntime.initialized)
+    if (!_Py_Initialized)
         Py_FatalError("Py_NewInterpreter: call Py_Initialize first");
 
 #ifdef WITH_THREAD
@@ -1884,19 +1854,20 @@
 #  include "pythread.h"
 #endif
 
+static void (*pyexitfunc)(void) = NULL;
 /* For the atexit module. */
 void _Py_PyAtExit(void (*func)(void))
 {
-    _PyRuntime.pyexitfunc = func;
+    pyexitfunc = func;
 }
 
 static void
 call_py_exitfuncs(void)
 {
-    if (_PyRuntime.pyexitfunc == NULL)
+    if (pyexitfunc == NULL)
         return;
 
-    (*_PyRuntime.pyexitfunc)();
+    (*pyexitfunc)();
     PyErr_Clear();
 }
 
@@ -1929,19 +1900,22 @@
 }
 
 #define NEXITFUNCS 32
+static void (*exitfuncs[NEXITFUNCS])(void);
+static int nexitfuncs = 0;
+
 int Py_AtExit(void (*func)(void))
 {
-    if (_PyRuntime.nexitfuncs >= NEXITFUNCS)
+    if (nexitfuncs >= NEXITFUNCS)
         return -1;
-    _PyRuntime.exitfuncs[_PyRuntime.nexitfuncs++] = func;
+    exitfuncs[nexitfuncs++] = func;
     return 0;
 }
 
 static void
 call_ll_exitfuncs(void)
 {
-    while (_PyRuntime.nexitfuncs > 0)
-        (*_PyRuntime.exitfuncs[--_PyRuntime.nexitfuncs])();
+    while (nexitfuncs > 0)
+        (*exitfuncs[--nexitfuncs])();
 
     fflush(stdout);
     fflush(stderr);
diff --git a/Python/pystate.c b/Python/pystate.c
index 2d92637..30a3722 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -34,65 +34,55 @@
 extern "C" {
 #endif
 
-void
-_PyRuntimeState_Init(_PyRuntimeState *runtime)
-{
-    memset(runtime, 0, sizeof(*runtime));
-
-    _PyObject_Initialize(&runtime->obj);
-    _PyMem_Initialize(&runtime->mem);
-    _PyGC_Initialize(&runtime->gc);
-    _PyEval_Initialize(&runtime->ceval);
-
-    runtime->gilstate.check_enabled = 1;
-    runtime->gilstate.autoTLSkey = -1;
+int _PyGILState_check_enabled = 1;
 
 #ifdef WITH_THREAD
-    runtime->interpreters.mutex = PyThread_allocate_lock();
-    if (runtime->interpreters.mutex == NULL)
-        Py_FatalError("Can't initialize threads for interpreter");
-#endif
-    runtime->interpreters.next_id = -1;
-}
+#include "pythread.h"
+static PyThread_type_lock head_mutex = NULL; /* Protects interp->tstate_head */
+#define HEAD_INIT() (void)(head_mutex || (head_mutex = PyThread_allocate_lock()))
+#define HEAD_LOCK() PyThread_acquire_lock(head_mutex, WAIT_LOCK)
+#define HEAD_UNLOCK() PyThread_release_lock(head_mutex)
 
-void
-_PyRuntimeState_Fini(_PyRuntimeState *runtime)
-{
-#ifdef WITH_THREAD
-    if (runtime->interpreters.mutex != NULL) {
-        PyThread_free_lock(runtime->interpreters.mutex);
-        runtime->interpreters.mutex = NULL;
-    }
-#endif
-}
-
-#ifdef WITH_THREAD
-#define HEAD_LOCK() PyThread_acquire_lock(_PyRuntime.interpreters.mutex, \
-                                          WAIT_LOCK)
-#define HEAD_UNLOCK() PyThread_release_lock(_PyRuntime.interpreters.mutex)
+/* The single PyInterpreterState used by this process'
+   GILState implementation
+*/
+/* TODO: Given interp_main, it may be possible to kill this ref */
+static PyInterpreterState *autoInterpreterState = NULL;
+static int autoTLSkey = -1;
 #else
+#define HEAD_INIT() /* Nothing */
 #define HEAD_LOCK() /* Nothing */
 #define HEAD_UNLOCK() /* Nothing */
 #endif
 
+static PyInterpreterState *interp_head = NULL;
+static PyInterpreterState *interp_main = NULL;
+
+/* Assuming the current thread holds the GIL, this is the
+   PyThreadState for the current thread. */
+_Py_atomic_address _PyThreadState_Current = {0};
+PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
+
 #ifdef WITH_THREAD
 static void _PyGILState_NoteThreadState(PyThreadState* tstate);
 #endif
 
+/* _next_interp_id is an auto-numbered sequence of small integers.
+   It gets initialized in _PyInterpreterState_Init(), which is called
+   in Py_Initialize(), and used in PyInterpreterState_New().  A negative
+   interpreter ID indicates an error occurred.  The main interpreter
+   will always have an ID of 0.  Overflow results in a RuntimeError.
+   If that becomes a problem later then we can adjust, e.g. by using
+   a Python int.
+
+   We initialize this to -1 so that the pre-Py_Initialize() value
+   results in an error. */
+static int64_t _next_interp_id = -1;
+
 void
-_PyInterpreterState_Enable(_PyRuntimeState *runtime)
+_PyInterpreterState_Init(void)
 {
-    runtime->interpreters.next_id = 0;
-#ifdef WITH_THREAD
-    /* Since we only call _PyRuntimeState_Init() once per process
-       (see _PyRuntime_Initialize()), we make sure the mutex is
-       initialized here. */
-    if (runtime->interpreters.mutex == NULL) {
-        runtime->interpreters.mutex = PyThread_allocate_lock();
-        if (runtime->interpreters.mutex == NULL)
-            Py_FatalError("Can't initialize threads for interpreter");
-    }
-#endif
+    _next_interp_id = 0;
 }
 
 PyInterpreterState *
@@ -102,16 +92,16 @@
                                  PyMem_RawMalloc(sizeof(PyInterpreterState));
 
     if (interp != NULL) {
+        HEAD_INIT();
+#ifdef WITH_THREAD
+        if (head_mutex == NULL)
+            Py_FatalError("Can't initialize threads for interpreter");
+#endif
         interp->modules_by_index = NULL;
         interp->sysdict = NULL;
         interp->builtins = NULL;
         interp->builtins_copy = NULL;
         interp->tstate_head = NULL;
-        interp->check_interval = 100;
-        interp->warnoptions = NULL;
-        interp->xoptions = NULL;
-        interp->num_threads = 0;
-        interp->pythread_stacksize = 0;
         interp->codec_search_path = NULL;
         interp->codec_search_cache = NULL;
         interp->codec_error_registry = NULL;
@@ -135,19 +125,19 @@
 #endif
 
         HEAD_LOCK();
-        interp->next = _PyRuntime.interpreters.head;
-        if (_PyRuntime.interpreters.main == NULL) {
-            _PyRuntime.interpreters.main = interp;
+        interp->next = interp_head;
+        if (interp_main == NULL) {
+            interp_main = interp;
         }
-        _PyRuntime.interpreters.head = interp;
-        if (_PyRuntime.interpreters.next_id < 0) {
+        interp_head = interp;
+        if (_next_interp_id < 0) {
             /* overflow or Py_Initialize() not called! */
             PyErr_SetString(PyExc_RuntimeError,
                             "failed to get an interpreter ID");
             interp = NULL;
         } else {
-            interp->id = _PyRuntime.interpreters.next_id;
-            _PyRuntime.interpreters.next_id += 1;
+            interp->id = _next_interp_id;
+            _next_interp_id += 1;
         }
         HEAD_UNLOCK();
     }
@@ -199,7 +189,7 @@
     PyInterpreterState **p;
     zapthreads(interp);
     HEAD_LOCK();
-    for (p = &_PyRuntime.interpreters.head; ; p = &(*p)->next) {
+    for (p = &interp_head; ; p = &(*p)->next) {
         if (*p == NULL)
             Py_FatalError(
                 "PyInterpreterState_Delete: invalid interp");
@@ -209,13 +199,19 @@
     if (interp->tstate_head != NULL)
         Py_FatalError("PyInterpreterState_Delete: remaining threads");
     *p = interp->next;
-    if (_PyRuntime.interpreters.main == interp) {
-        _PyRuntime.interpreters.main = NULL;
-        if (_PyRuntime.interpreters.head != NULL)
+    if (interp_main == interp) {
+        interp_main = NULL;
+        if (interp_head != NULL)
             Py_FatalError("PyInterpreterState_Delete: remaining subinterpreters");
     }
     HEAD_UNLOCK();
     PyMem_RawFree(interp);
+#ifdef WITH_THREAD
+    if (interp_head == NULL && head_mutex != NULL) {
+        PyThread_free_lock(head_mutex);
+        head_mutex = NULL;
+    }
+#endif
 }
 
 
@@ -503,11 +499,8 @@
     if (tstate == GET_TSTATE())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
 #ifdef WITH_THREAD
-    if (_PyRuntime.gilstate.autoInterpreterState &&
-        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
-    {
-        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
-    }
+    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
+        PyThread_delete_key_value(autoTLSkey);
 #endif /* WITH_THREAD */
     tstate_delete_common(tstate);
 }
@@ -522,11 +515,8 @@
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
     tstate_delete_common(tstate);
-    if (_PyRuntime.gilstate.autoInterpreterState &&
-        PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == tstate)
-    {
-        PyThread_delete_key_value(_PyRuntime.gilstate.autoTLSkey);
-    }
+    if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
+        PyThread_delete_key_value(autoTLSkey);
     SET_TSTATE(NULL);
     PyEval_ReleaseLock();
 }
@@ -686,13 +676,13 @@
 PyInterpreterState *
 PyInterpreterState_Head(void)
 {
-    return _PyRuntime.interpreters.head;
+    return interp_head;
 }
 
 PyInterpreterState *
 PyInterpreterState_Main(void)
 {
-    return _PyRuntime.interpreters.main;
+    return interp_main;
 }
 
 PyInterpreterState *
@@ -732,7 +722,7 @@
      * need to grab head_mutex for the duration.
      */
     HEAD_LOCK();
-    for (i = _PyRuntime.interpreters.head; i != NULL; i = i->next) {
+    for (i = interp_head; i != NULL; i = i->next) {
         PyThreadState *t;
         for (t = i->tstate_head; t != NULL; t = t->next) {
             PyObject *id;
@@ -784,11 +774,11 @@
 _PyGILState_Init(PyInterpreterState *i, PyThreadState *t)
 {
     assert(i && t); /* must init with valid states */
-    _PyRuntime.gilstate.autoTLSkey = PyThread_create_key();
-    if (_PyRuntime.gilstate.autoTLSkey == -1)
+    autoTLSkey = PyThread_create_key();
+    if (autoTLSkey == -1)
         Py_FatalError("Could not allocate TLS entry");
-    _PyRuntime.gilstate.autoInterpreterState = i;
-    assert(PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL);
+    autoInterpreterState = i;
+    assert(PyThread_get_key_value(autoTLSkey) == NULL);
     assert(t->gilstate_counter == 0);
 
     _PyGILState_NoteThreadState(t);
@@ -797,15 +787,15 @@
 PyInterpreterState *
 _PyGILState_GetInterpreterStateUnsafe(void)
 {
-    return _PyRuntime.gilstate.autoInterpreterState;
+    return autoInterpreterState;
 }
 
 void
 _PyGILState_Fini(void)
 {
-    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
-    _PyRuntime.gilstate.autoTLSkey = -1;
-    _PyRuntime.gilstate.autoInterpreterState = NULL;
+    PyThread_delete_key(autoTLSkey);
+    autoTLSkey = -1;
+    autoInterpreterState = NULL;
 }
 
 /* Reset the TLS key - called by PyOS_AfterFork_Child().
@@ -816,19 +806,17 @@
 _PyGILState_Reinit(void)
 {
 #ifdef WITH_THREAD
-    _PyRuntime.interpreters.mutex = PyThread_allocate_lock();
-    if (_PyRuntime.interpreters.mutex == NULL)
-        Py_FatalError("Can't initialize threads for interpreter");
+    head_mutex = NULL;
+    HEAD_INIT();
 #endif
     PyThreadState *tstate = PyGILState_GetThisThreadState();
-    PyThread_delete_key(_PyRuntime.gilstate.autoTLSkey);
-    if ((_PyRuntime.gilstate.autoTLSkey = PyThread_create_key()) == -1)
+    PyThread_delete_key(autoTLSkey);
+    if ((autoTLSkey = PyThread_create_key()) == -1)
         Py_FatalError("Could not allocate TLS entry");
 
     /* If the thread had an associated auto thread state, reassociate it with
      * the new key. */
-    if (tstate && PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
-                                         (void *)tstate) < 0)
+    if (tstate && PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
         Py_FatalError("Couldn't create autoTLSkey mapping");
 }
 
@@ -843,7 +831,7 @@
     /* If autoTLSkey isn't initialized, this must be the very first
        threadstate created in Py_Initialize().  Don't do anything for now
        (we'll be back here when _PyGILState_Init is called). */
-    if (!_PyRuntime.gilstate.autoInterpreterState)
+    if (!autoInterpreterState)
         return;
 
     /* Stick the thread state for this thread in thread local storage.
@@ -858,13 +846,9 @@
        The first thread state created for that given OS level thread will
        "win", which seems reasonable behaviour.
     */
-    if (PyThread_get_key_value(_PyRuntime.gilstate.autoTLSkey) == NULL) {
-        if ((PyThread_set_key_value(_PyRuntime.gilstate.autoTLSkey,
-                                    (void *)tstate)
-             ) < 0)
-        {
+    if (PyThread_get_key_value(autoTLSkey) == NULL) {
+        if (PyThread_set_key_value(autoTLSkey, (void *)tstate) < 0)
             Py_FatalError("Couldn't create autoTLSkey mapping");
-        }
     }
 
     /* PyGILState_Release must not try to delete this thread state. */
@@ -875,10 +859,9 @@
 PyThreadState *
 PyGILState_GetThisThreadState(void)
 {
-    if (_PyRuntime.gilstate.autoInterpreterState == NULL)
+    if (autoInterpreterState == NULL)
         return NULL;
-    return (PyThreadState *)PyThread_get_key_value(
-                _PyRuntime.gilstate.autoTLSkey);
+    return (PyThreadState *)PyThread_get_key_value(autoTLSkey);
 }
 
 int
@@ -889,7 +872,7 @@
     if (!_PyGILState_check_enabled)
         return 1;
 
-    if (_PyRuntime.gilstate.autoTLSkey == -1)
+    if (autoTLSkey == -1)
         return 1;
 
     tstate = GET_TSTATE();
@@ -909,10 +892,8 @@
        spells out other issues.  Embedders are expected to have
        called Py_Initialize() and usually PyEval_InitThreads().
     */
-    /* Py_Initialize() hasn't been called! */
-    assert(_PyRuntime.gilstate.autoInterpreterState);
-    tcur = (PyThreadState *)PyThread_get_key_value(
-                _PyRuntime.gilstate.autoTLSkey);
+    assert(autoInterpreterState); /* Py_Initialize() hasn't been called! */
+    tcur = (PyThreadState *)PyThread_get_key_value(autoTLSkey);
     if (tcur == NULL) {
         /* At startup, Python has no concrete GIL. If PyGILState_Ensure() is
            called from a new thread for the first time, we need the create the
@@ -920,7 +901,7 @@
         PyEval_InitThreads();
 
         /* Create a new thread state for this thread */
-        tcur = PyThreadState_New(_PyRuntime.gilstate.autoInterpreterState);
+        tcur = PyThreadState_New(autoInterpreterState);
         if (tcur == NULL)
             Py_FatalError("Couldn't create thread-state for new thread");
         /* This is our thread state!  We'll need to delete it in the
@@ -945,7 +926,7 @@
 PyGILState_Release(PyGILState_STATE oldstate)
 {
     PyThreadState *tcur = (PyThreadState *)PyThread_get_key_value(
-                                _PyRuntime.gilstate.autoTLSkey);
+                                                            autoTLSkey);
     if (tcur == NULL)
         Py_FatalError("auto-releasing thread-state, "
                       "but no thread-state for this thread");
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 080c541..852babb 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -519,6 +519,8 @@
 See the profiler chapter in the library manual."
 );
 
+static int _check_interval = 100;
+
 static PyObject *
 sys_setcheckinterval(PyObject *self, PyObject *args)
 {
@@ -527,8 +529,7 @@
                      "are deprecated.  Use sys.setswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    PyInterpreterState *interp = PyThreadState_GET()->interp;
-    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &interp->check_interval))
+    if (!PyArg_ParseTuple(args, "i:setcheckinterval", &_check_interval))
         return NULL;
     Py_RETURN_NONE;
 }
@@ -548,8 +549,7 @@
                      "are deprecated.  Use sys.getswitchinterval() "
                      "instead.", 1) < 0)
         return NULL;
-    PyInterpreterState *interp = PyThreadState_GET()->interp;
-    return PyLong_FromLong(interp->check_interval);
+    return PyLong_FromLong(_check_interval);
 }
 
 PyDoc_STRVAR(getcheckinterval_doc,
@@ -1339,7 +1339,7 @@
 static PyObject *
 sys_is_finalizing(PyObject* self, PyObject* args)
 {
-    return PyBool_FromLong(_Py_IS_FINALIZING());
+    return PyBool_FromLong(_Py_Finalizing != NULL);
 }
 
 PyDoc_STRVAR(is_finalizing_doc,
@@ -1479,24 +1479,11 @@
     return list;
 }
 
-static PyObject *
-get_warnoptions(void)
-{
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
-    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
-        Py_XDECREF(warnoptions);
-        warnoptions = PyList_New(0);
-        if (warnoptions == NULL)
-            return NULL;
-        PyThreadState_GET()->interp->warnoptions = warnoptions;
-    }
-    return warnoptions;
-}
+static PyObject *warnoptions = NULL;
 
 void
 PySys_ResetWarnOptions(void)
 {
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     if (warnoptions == NULL || !PyList_Check(warnoptions))
         return;
     PyList_SetSlice(warnoptions, 0, PyList_GET_SIZE(warnoptions), NULL);
@@ -1505,9 +1492,12 @@
 void
 PySys_AddWarnOptionUnicode(PyObject *unicode)
 {
-    PyObject *warnoptions = get_warnoptions();
-    if (warnoptions == NULL)
-        return;
+    if (warnoptions == NULL || !PyList_Check(warnoptions)) {
+        Py_XDECREF(warnoptions);
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return;
+    }
     PyList_Append(warnoptions, unicode);
 }
 
@@ -1525,20 +1515,17 @@
 int
 PySys_HasWarnOptions(void)
 {
-    PyObject *warnoptions = PyThreadState_GET()->interp->warnoptions;
     return (warnoptions != NULL && (PyList_Size(warnoptions) > 0)) ? 1 : 0;
 }
 
+static PyObject *xoptions = NULL;
+
 static PyObject *
 get_xoptions(void)
 {
-    PyObject *xoptions = PyThreadState_GET()->interp->xoptions;
     if (xoptions == NULL || !PyDict_Check(xoptions)) {
         Py_XDECREF(xoptions);
         xoptions = PyDict_New();
-        if (xoptions == NULL)
-            return NULL;
-        PyThreadState_GET()->interp->xoptions = xoptions;
     }
     return xoptions;
 }
@@ -2143,15 +2130,17 @@
     SET_SYS_FROM_STRING_INT_RESULT("base_exec_prefix",
                         PyUnicode_FromWideChar(Py_GetExecPrefix(), -1));
 
-    PyObject *warnoptions = get_warnoptions();
-    if (warnoptions == NULL)
-        return -1;
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("warnoptions", warnoptions);
+    if (warnoptions == NULL) {
+        warnoptions = PyList_New(0);
+        if (warnoptions == NULL)
+            return -1;
+    }
 
-    PyObject *xoptions = get_xoptions();
-    if (xoptions == NULL)
-        return -1;
-    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", xoptions);
+    SET_SYS_FROM_STRING_INT_RESULT("warnoptions",
+                                   PyList_GetSlice(warnoptions,
+                                                   0, Py_SIZE(warnoptions)));
+
+    SET_SYS_FROM_STRING_BORROW_INT_RESULT("_xoptions", get_xoptions());
 
     if (PyErr_Occurred())
         return -1;
diff --git a/Python/thread.c b/Python/thread.c
index 6fd594f..4d2f2c3 100644
--- a/Python/thread.c
+++ b/Python/thread.c
@@ -76,6 +76,11 @@
     PyThread__init_thread();
 }
 
+/* Support for runtime thread stack size tuning.
+   A value of 0 means using the platform's default stack size
+   or the size specified by the THREAD_STACK_SIZE macro. */
+static size_t _pythread_stacksize = 0;
+
 #if defined(_POSIX_THREADS)
 #   define PYTHREAD_NAME "pthread"
 #   include "thread_pthread.h"
@@ -91,7 +96,7 @@
 size_t
 PyThread_get_stacksize(void)
 {
-    return PyThreadState_GET()->interp->pythread_stacksize;
+    return _pythread_stacksize;
 }
 
 /* Only platforms defining a THREAD_SET_STACKSIZE() macro
diff --git a/Python/thread_nt.h b/Python/thread_nt.h
index 2f3a71b..47eb4b6 100644
--- a/Python/thread_nt.h
+++ b/Python/thread_nt.h
@@ -189,10 +189,9 @@
         return PYTHREAD_INVALID_THREAD_ID;
     obj->func = func;
     obj->arg = arg;
-    PyThreadState *tstate = PyThreadState_GET();
-    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
     hThread = (HANDLE)_beginthreadex(0,
-                      Py_SAFE_DOWNCAST(stacksize, Py_ssize_t, unsigned int),
+                      Py_SAFE_DOWNCAST(_pythread_stacksize,
+                                       Py_ssize_t, unsigned int),
                       bootstrap, obj,
                       0, &threadID);
     if (hThread == 0) {
@@ -333,13 +332,13 @@
 {
     /* set to default */
     if (size == 0) {
-        PyThreadState_GET()->interp->pythread_stacksize = 0;
+        _pythread_stacksize = 0;
         return 0;
     }
 
     /* valid range? */
     if (size >= THREAD_MIN_STACKSIZE && size < THREAD_MAX_STACKSIZE) {
-        PyThreadState_GET()->interp->pythread_stacksize = size;
+        _pythread_stacksize = size;
         return 0;
     }
 
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index ea05b6f..268dec4 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -205,9 +205,8 @@
         return PYTHREAD_INVALID_THREAD_ID;
 #endif
 #if defined(THREAD_STACK_SIZE)
-    PyThreadState *tstate = PyThreadState_GET();
-    size_t stacksize = tstate ? tstate->interp->pythread_stacksize : 0;
-    tss = (stacksize != 0) ? stacksize : THREAD_STACK_SIZE;
+    tss = (_pythread_stacksize != 0) ? _pythread_stacksize
+                                     : THREAD_STACK_SIZE;
     if (tss != 0) {
         if (pthread_attr_setstacksize(&attrs, tss) != 0) {
             pthread_attr_destroy(&attrs);
@@ -579,7 +578,7 @@
 
     /* set to default */
     if (size == 0) {
-        PyThreadState_GET()->interp->pythread_stacksize = 0;
+        _pythread_stacksize = 0;
         return 0;
     }
 
@@ -596,7 +595,7 @@
             rc = pthread_attr_setstacksize(&attrs, size);
             pthread_attr_destroy(&attrs);
             if (rc == 0) {
-                PyThreadState_GET()->interp->pythread_stacksize = size;
+                _pythread_stacksize = size;
                 return 0;
             }
         }