bpo-35081: Cleanup pystate.c and pystate.h (GH-10240)

* Remove _PyThreadState_Current
* Replace GET_TSTATE() with PyThreadState_GET()
* Replace GET_INTERP_STATE() with _PyInterpreterState_GET_UNSAFE()
* Replace direct access to _PyThreadState_Current with
  PyThreadState_GET()
* Replace _PyThreadState_Current with
  _PyRuntime.gilstate.tstate_current
* Rename SET_TSTATE() to _PyThreadState_SET(), name more
  consistent with _PyThreadState_GET()
* Update outdated comments
diff --git a/Include/pystate.h b/Include/pystate.h
index 80ee0d1..8860f12 100644
--- a/Include/pystate.h
+++ b/Include/pystate.h
@@ -208,8 +208,8 @@
      * if the thread holds the last reference to the lock, decref'ing the
      * lock will delete the lock, and that may trigger arbitrary Python code
      * if there's a weakref, with a callback, to the lock.  But by this time
-     * _PyThreadState_Current is already NULL, so only the simplest of C code
-     * can be allowed to run (in particular it must not be possible to
+     * _PyRuntime.gilstate.tstate_current is already NULL, so only the simplest
+     * of C code can be allowed to run (in particular it must not be possible to
      * release the GIL).
      * So instead of holding the lock directly, the tstate holds a weakref to
      * the lock:  that's the value of on_delete_data below.  Decref'ing a
@@ -307,9 +307,8 @@
 /* Assuming the current thread holds the GIL, this is the
    PyThreadState for the current thread. */
 #ifdef Py_BUILD_CORE
-#  define _PyThreadState_Current _PyRuntime.gilstate.tstate_current
 #  define PyThreadState_GET() \
-             ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
+             ((PyThreadState*)_Py_atomic_load_relaxed(&_PyRuntime.gilstate.tstate_current))
 #else
 #  define PyThreadState_GET() PyThreadState_Get()
 #endif
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index ee65695..363d902 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1314,8 +1314,8 @@
     /* We can arrive here with a NULL tstate during initialization: try
        running "python -Wi" for an example related to string interning.
        Let's just hope that no exception occurs then...  This must be
-       _PyThreadState_Current and not PyThreadState_GET() because in debug
-       mode, the latter complains if tstate is NULL. */
+       PyThreadState_GET() and not PyThreadState_Get() because the latter
+       abort Python if tstate is NULL. */
     tstate = PyThreadState_GET();
     if (tstate != NULL && tstate->curexc_type != NULL) {
         /* preserve the existing exception */
diff --git a/Python/ceval.c b/Python/ceval.c
index 9a96dc7..6443123 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -188,8 +188,7 @@
        We therefore avoid PyThreadState_GET() which dumps a fatal error
        in debug mode.
     */
-    drop_gil((PyThreadState*)_Py_atomic_load_relaxed(
-        &_PyThreadState_Current));
+    drop_gil(PyThreadState_GET());
 }
 
 void
diff --git a/Python/pystate.c b/Python/pystate.c
index 7b3d3d4..d049811 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -4,12 +4,9 @@
 #include "Python.h"
 #include "internal/pystate.h"
 
-#define GET_TSTATE() \
-    ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
-#define SET_TSTATE(value) \
-    _Py_atomic_store_relaxed(&_PyThreadState_Current, (uintptr_t)(value))
-#define GET_INTERP_STATE() \
-    (GET_TSTATE()->interp)
+#define _PyThreadState_SET(value) \
+    _Py_atomic_store_relaxed(&_PyRuntime.gilstate.tstate_current, \
+                             (uintptr_t)(value))
 
 
 /* --------------------------------------------------------------------------
@@ -309,7 +306,7 @@
 PyInterpreterState *
 _PyInterpreterState_Get(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL) {
         Py_FatalError("_PyInterpreterState_Get(): no current thread state");
     }
@@ -508,7 +505,7 @@
 PyState_FindModule(struct PyModuleDef* module)
 {
     Py_ssize_t index = module->m_base.m_index;
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     PyObject *res;
     if (module->m_slots) {
         return NULL;
@@ -536,7 +533,7 @@
                         "PyState_AddModule called on module with slots");
         return -1;
     }
-    state = GET_INTERP_STATE();
+    state = _PyInterpreterState_GET_UNSAFE();
     if (!state->modules_by_index) {
         state->modules_by_index = PyList_New(0);
         if (!state->modules_by_index)
@@ -554,7 +551,7 @@
 PyState_AddModule(PyObject* module, struct PyModuleDef* def)
 {
     Py_ssize_t index;
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     if (!def) {
         Py_FatalError("PyState_AddModule: Module Definition is NULL");
         return -1;
@@ -581,7 +578,7 @@
                         "PyState_RemoveModule called on module with slots");
         return -1;
     }
-    state = GET_INTERP_STATE();
+    state = _PyInterpreterState_GET_UNSAFE();
     if (index == 0) {
         Py_FatalError("PyState_RemoveModule: Module index invalid.");
         return -1;
@@ -601,7 +598,7 @@
 void
 _PyState_ClearModules(void)
 {
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     if (state->modules_by_index) {
         Py_ssize_t i;
         for (i = 0; i < PyList_GET_SIZE(state->modules_by_index); i++) {
@@ -691,7 +688,7 @@
 void
 PyThreadState_Delete(PyThreadState *tstate)
 {
-    if (tstate == GET_TSTATE())
+    if (tstate == PyThreadState_GET())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
     if (_PyRuntime.gilstate.autoInterpreterState &&
         PyThread_tss_get(&_PyRuntime.gilstate.autoTSSkey) == tstate)
@@ -705,7 +702,7 @@
 void
 PyThreadState_DeleteCurrent()
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
@@ -715,7 +712,7 @@
     {
         PyThread_tss_set(&_PyRuntime.gilstate.autoTSSkey, NULL);
     }
-    SET_TSTATE(NULL);
+    _PyThreadState_SET(NULL);
     PyEval_ReleaseLock();
 }
 
@@ -760,14 +757,14 @@
 PyThreadState *
 _PyThreadState_UncheckedGet(void)
 {
-    return GET_TSTATE();
+    return PyThreadState_GET();
 }
 
 
 PyThreadState *
 PyThreadState_Get(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         Py_FatalError("PyThreadState_Get: no current thread");
 
@@ -778,9 +775,9 @@
 PyThreadState *
 PyThreadState_Swap(PyThreadState *newts)
 {
-    PyThreadState *oldts = GET_TSTATE();
+    PyThreadState *oldts = PyThreadState_GET();
 
-    SET_TSTATE(newts);
+    _PyThreadState_SET(newts);
     /* It should not be possible for more than one thread state
        to be used for a thread.  Check this the best we can in debug
        builds.
@@ -809,7 +806,7 @@
 PyObject *
 PyThreadState_GetDict(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         return NULL;
 
@@ -834,7 +831,7 @@
 int
 PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 {
-    PyInterpreterState *interp = GET_INTERP_STATE();
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     PyThreadState *p;
 
     /* Although the GIL is held, a few C API functions can be called
@@ -960,7 +957,7 @@
 {
     /* Must be the tstate for this thread */
     assert(PyGILState_GetThisThreadState()==tstate);
-    return tstate == GET_TSTATE();
+    return tstate == PyThreadState_GET();
 }
 
 /* Internal initialization/finalization functions called by
@@ -1087,7 +1084,7 @@
         return 1;
     }
 
-    tstate = GET_TSTATE();
+    tstate = PyThreadState_GET();
     if (tstate == NULL)
         return 0;