bpo-40522: _PyThreadState_Swap() sets autoTSSkey (GH-19939)

In the experimental isolated subinterpreters build mode,
_PyThreadState_GET() gets the autoTSSkey variable and
_PyThreadState_Swap() sets the autoTSSkey variable.

* Add _PyThreadState_GetTSS()
* _PyRuntimeState_GetThreadState() and _PyThreadState_GET()
  return _PyThreadState_GetTSS()
* PyEval_SaveThread() sets the autoTSSkey variable to current Python
  thread state rather than NULL.
* eval_frame_handle_pending() doesn't check that
  _PyThreadState_Swap() result is NULL.
* _PyThreadState_Swap() gets the current Python thread state with
  _PyThreadState_GetTSS() rather than
  _PyRuntimeGILState_GetThreadState().
* PyGILState_Ensure() no longer checks _PyEval_ThreadsInitialized()
  since it cannot access the current interpreter.
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index c82e8db..d96ba31 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -49,8 +49,18 @@
 /* Variable and macro for in-line access to current thread
    and interpreter state */
 
-static inline PyThreadState* _PyRuntimeState_GetThreadState(_PyRuntimeState *runtime) {
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+PyAPI_FUNC(PyThreadState*) _PyThreadState_GetTSS(void);
+#endif
+
+static inline PyThreadState*
+_PyRuntimeState_GetThreadState(_PyRuntimeState *runtime)
+{
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    return _PyThreadState_GetTSS();
+#else
     return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->gilstate.tstate_current);
+#endif
 }
 
 /* Get the current Python thread state.
@@ -62,8 +72,14 @@
    The caller must hold the GIL.
 
    See also PyThreadState_Get() and PyThreadState_GET(). */
-static inline PyThreadState *_PyThreadState_GET(void) {
+static inline PyThreadState*
+_PyThreadState_GET(void)
+{
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    return _PyThreadState_GetTSS();
+#else
     return _PyRuntimeState_GetThreadState(&_PyRuntime);
+#endif
 }
 
 /* Redefine PyThreadState_GET() as an alias to _PyThreadState_GET() */
diff --git a/Python/ceval.c b/Python/ceval.c
index 0c08a76..b5854d3 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -380,9 +380,13 @@
     take_gil(tstate);
 
     struct _gilstate_runtime_state *gilstate = &tstate->interp->runtime->gilstate;
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    (void)_PyThreadState_Swap(gilstate, tstate);
+#else
     if (_PyThreadState_Swap(gilstate, tstate) != NULL) {
         Py_FatalError("non-NULL old thread state");
     }
+#endif
 }
 
 void
@@ -443,7 +447,12 @@
 PyEval_SaveThread(void)
 {
     _PyRuntimeState *runtime = &_PyRuntime;
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    PyThreadState *old_tstate = _PyThreadState_GET();
+    PyThreadState *tstate = _PyThreadState_Swap(&runtime->gilstate, old_tstate);
+#else
     PyThreadState *tstate = _PyThreadState_Swap(&runtime->gilstate, NULL);
+#endif
     ensure_tstate_not_null(__func__, tstate);
 
     struct _ceval_runtime_state *ceval = &runtime->ceval;
@@ -866,9 +875,13 @@
 
         take_gil(tstate);
 
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+        (void)_PyThreadState_Swap(&runtime->gilstate, tstate);
+#else
         if (_PyThreadState_Swap(&runtime->gilstate, tstate) != NULL) {
             Py_FatalError("orphan tstate");
         }
+#endif
     }
 
     /* Check for asynchronous exception. */
diff --git a/Python/pystate.c b/Python/pystate.c
index dd95750..119fe31 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -956,6 +956,14 @@
 }
 
 
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+PyThreadState*
+_PyThreadState_GetTSS(void) {
+    return PyThread_tss_get(&_PyRuntime.gilstate.autoTSSkey);
+}
+#endif
+
+
 PyThreadState *
 _PyThreadState_UncheckedGet(void)
 {
@@ -975,7 +983,11 @@
 PyThreadState *
 _PyThreadState_Swap(struct _gilstate_runtime_state *gilstate, PyThreadState *newts)
 {
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    PyThreadState *oldts = _PyThreadState_GetTSS();
+#else
     PyThreadState *oldts = _PyRuntimeGILState_GetThreadState(gilstate);
+#endif
 
     _PyRuntimeGILState_SetThreadState(gilstate, newts);
     /* It should not be possible for more than one thread state
@@ -994,6 +1006,9 @@
         errno = err;
     }
 #endif
+#ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+    PyThread_tss_set(&gilstate->autoTSSkey, newts);
+#endif
     return oldts;
 }
 
@@ -1363,7 +1378,9 @@
 
     /* Ensure that _PyEval_InitThreads() and _PyGILState_Init() have been
        called by Py_Initialize() */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
     assert(_PyEval_ThreadsInitialized(runtime));
+#endif
     assert(gilstate->autoInterpreterState);
 
     PyThreadState *tcur = (PyThreadState *)PyThread_tss_get(&gilstate->autoTSSkey);