bpo-35081: Cleanup pystate.c and pystate.h (GH-10240)

* Remove _PyThreadState_Current
* Replace GET_TSTATE() with PyThreadState_GET()
* Replace GET_INTERP_STATE() with _PyInterpreterState_GET_UNSAFE()
* Replace direct access to _PyThreadState_Current with
  PyThreadState_GET()
* Replace _PyThreadState_Current with
  _PyRuntime.gilstate.tstate_current
* Rename SET_TSTATE() to _PyThreadState_SET(), name more
  consistent with _PyThreadState_GET()
* Update outdated comments
diff --git a/Python/ceval.c b/Python/ceval.c
index 9a96dc7..6443123 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -188,8 +188,7 @@
        We therefore avoid PyThreadState_GET() which dumps a fatal error
        in debug mode.
     */
-    drop_gil((PyThreadState*)_Py_atomic_load_relaxed(
-        &_PyThreadState_Current));
+    drop_gil(PyThreadState_GET());
 }
 
 void
diff --git a/Python/pystate.c b/Python/pystate.c
index 7b3d3d4..d049811 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -4,12 +4,9 @@
 #include "Python.h"
 #include "internal/pystate.h"
 
-#define GET_TSTATE() \
-    ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
-#define SET_TSTATE(value) \
-    _Py_atomic_store_relaxed(&_PyThreadState_Current, (uintptr_t)(value))
-#define GET_INTERP_STATE() \
-    (GET_TSTATE()->interp)
+#define _PyThreadState_SET(value) \
+    _Py_atomic_store_relaxed(&_PyRuntime.gilstate.tstate_current, \
+                             (uintptr_t)(value))
 
 
 /* --------------------------------------------------------------------------
@@ -309,7 +306,7 @@
 PyInterpreterState *
 _PyInterpreterState_Get(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL) {
         Py_FatalError("_PyInterpreterState_Get(): no current thread state");
     }
@@ -508,7 +505,7 @@
 PyState_FindModule(struct PyModuleDef* module)
 {
     Py_ssize_t index = module->m_base.m_index;
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     PyObject *res;
     if (module->m_slots) {
         return NULL;
@@ -536,7 +533,7 @@
                         "PyState_AddModule called on module with slots");
         return -1;
     }
-    state = GET_INTERP_STATE();
+    state = _PyInterpreterState_GET_UNSAFE();
     if (!state->modules_by_index) {
         state->modules_by_index = PyList_New(0);
         if (!state->modules_by_index)
@@ -554,7 +551,7 @@
 PyState_AddModule(PyObject* module, struct PyModuleDef* def)
 {
     Py_ssize_t index;
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     if (!def) {
         Py_FatalError("PyState_AddModule: Module Definition is NULL");
         return -1;
@@ -581,7 +578,7 @@
                         "PyState_RemoveModule called on module with slots");
         return -1;
     }
-    state = GET_INTERP_STATE();
+    state = _PyInterpreterState_GET_UNSAFE();
     if (index == 0) {
         Py_FatalError("PyState_RemoveModule: Module index invalid.");
         return -1;
@@ -601,7 +598,7 @@
 void
 _PyState_ClearModules(void)
 {
-    PyInterpreterState *state = GET_INTERP_STATE();
+    PyInterpreterState *state = _PyInterpreterState_GET_UNSAFE();
     if (state->modules_by_index) {
         Py_ssize_t i;
         for (i = 0; i < PyList_GET_SIZE(state->modules_by_index); i++) {
@@ -691,7 +688,7 @@
 void
 PyThreadState_Delete(PyThreadState *tstate)
 {
-    if (tstate == GET_TSTATE())
+    if (tstate == PyThreadState_GET())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
     if (_PyRuntime.gilstate.autoInterpreterState &&
         PyThread_tss_get(&_PyRuntime.gilstate.autoTSSkey) == tstate)
@@ -705,7 +702,7 @@
 void
 PyThreadState_DeleteCurrent()
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
@@ -715,7 +712,7 @@
     {
         PyThread_tss_set(&_PyRuntime.gilstate.autoTSSkey, NULL);
     }
-    SET_TSTATE(NULL);
+    _PyThreadState_SET(NULL);
     PyEval_ReleaseLock();
 }
 
@@ -760,14 +757,14 @@
 PyThreadState *
 _PyThreadState_UncheckedGet(void)
 {
-    return GET_TSTATE();
+    return PyThreadState_GET();
 }
 
 
 PyThreadState *
 PyThreadState_Get(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         Py_FatalError("PyThreadState_Get: no current thread");
 
@@ -778,9 +775,9 @@
 PyThreadState *
 PyThreadState_Swap(PyThreadState *newts)
 {
-    PyThreadState *oldts = GET_TSTATE();
+    PyThreadState *oldts = PyThreadState_GET();
 
-    SET_TSTATE(newts);
+    _PyThreadState_SET(newts);
     /* It should not be possible for more than one thread state
        to be used for a thread.  Check this the best we can in debug
        builds.
@@ -809,7 +806,7 @@
 PyObject *
 PyThreadState_GetDict(void)
 {
-    PyThreadState *tstate = GET_TSTATE();
+    PyThreadState *tstate = PyThreadState_GET();
     if (tstate == NULL)
         return NULL;
 
@@ -834,7 +831,7 @@
 int
 PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc)
 {
-    PyInterpreterState *interp = GET_INTERP_STATE();
+    PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
     PyThreadState *p;
 
     /* Although the GIL is held, a few C API functions can be called
@@ -960,7 +957,7 @@
 {
     /* Must be the tstate for this thread */
     assert(PyGILState_GetThisThreadState()==tstate);
-    return tstate == GET_TSTATE();
+    return tstate == PyThreadState_GET();
 }
 
 /* Internal initialization/finalization functions called by
@@ -1087,7 +1084,7 @@
         return 1;
     }
 
-    tstate = GET_TSTATE();
+    tstate = PyThreadState_GET();
     if (tstate == NULL)
         return 0;