bpo-39877: _PyRuntimeState.finalizing becomes atomic (GH-18816)

Convert _PyRuntimeState.finalizing field to an atomic variable:

* Rename it to _finalizing
* Change its type to _Py_atomic_address
* Add _PyRuntimeState_GetFinalizing() and _PyRuntimeState_SetFinalizing()
  functions
* Remove _Py_CURRENTLY_FINALIZING() function: replace it with testing
  directly _PyRuntimeState_GetFinalizing() value

Convert _PyRuntimeState_GetThreadState() to static inline function.
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index 405efb9..b5f5095 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -223,8 +223,11 @@
     int initialized;
 
     /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize()
-       is called again. */
-    PyThreadState *finalizing;
+       is called again.
+
+       Use _PyRuntimeState_GetFinalizing() and _PyRuntimeState_SetFinalizing()
+       to access it, don't access it directly. */
+    _Py_atomic_address _finalizing;
 
     struct pyinterpreters {
         PyThread_type_lock mutex;
@@ -279,8 +282,15 @@
 
 PyAPI_FUNC(void) _PyRuntime_Finalize(void);
 
-#define _Py_CURRENTLY_FINALIZING(runtime, tstate) \
-    (runtime->finalizing == tstate)
+static inline PyThreadState*
+_PyRuntimeState_GetFinalizing(_PyRuntimeState *runtime) {
+    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->_finalizing);
+}
+
+static inline void
+_PyRuntimeState_SetFinalizing(_PyRuntimeState *runtime, PyThreadState *tstate) {
+    _Py_atomic_store_relaxed(&runtime->_finalizing, (uintptr_t)tstate);
+}
 
 PyAPI_FUNC(int) _Py_IsMainInterpreter(PyThreadState* tstate);
 
@@ -288,8 +298,9 @@
 /* Variable and macro for in-line access to current thread
    and interpreter state */
 
-#define _PyRuntimeState_GetThreadState(runtime) \
-    ((PyThreadState*)_Py_atomic_load_relaxed(&(runtime)->gilstate.tstate_current))
+static inline PyThreadState* _PyRuntimeState_GetThreadState(_PyRuntimeState *runtime) {
+    return (PyThreadState*)_Py_atomic_load_relaxed(&runtime->gilstate.tstate_current);
+}
 
 /* Get the current Python thread state.
 
diff --git a/Python/ceval.c b/Python/ceval.c
index ef4aac2..20e32e2 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -240,7 +240,8 @@
 {
     _PyRuntimeState *runtime = tstate->interp->runtime;
     /* _Py_Finalizing is protected by the GIL */
-    if (runtime->finalizing != NULL && !_Py_CURRENTLY_FINALIZING(runtime, tstate)) {
+    PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(runtime);
+    if (finalizing != NULL && finalizing != tstate) {
         drop_gil(&runtime->ceval, tstate);
         PyThread_exit_thread();
     }
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c
index 84ced42..23d74ee 100644
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -103,7 +103,7 @@
 int
 _Py_IsFinalizing(void)
 {
-    return _PyRuntime.finalizing != NULL;
+    return _PyRuntimeState_GetFinalizing(&_PyRuntime) != NULL;
 }
 
 /* Hack to force loading of object files */
@@ -507,7 +507,7 @@
      * threads still hanging around from a previous Py_Initialize/Finalize
      * pair :(
      */
-    runtime->finalizing = NULL;
+    _PyRuntimeState_SetFinalizing(runtime, NULL);
 
     PyStatus status = _Py_HashRandomization_Init(config);
     if (_PyStatus_EXCEPTION(status)) {
@@ -1366,7 +1366,7 @@
 
     /* Remaining threads (e.g. daemon threads) will automatically exit
        after taking the GIL (in PyEval_RestoreThread()). */
-    runtime->finalizing = tstate;
+    _PyRuntimeState_SetFinalizing(runtime, tstate);
     runtime->initialized = 0;
     runtime->core_initialized = 0;
 
@@ -2131,8 +2131,9 @@
 fatal_error_dump_runtime(FILE *stream, _PyRuntimeState *runtime)
 {
     fprintf(stream, "Python runtime state: ");
-    if (runtime->finalizing) {
-        fprintf(stream, "finalizing (tstate=%p)", runtime->finalizing);
+    PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(runtime);
+    if (finalizing) {
+        fprintf(stream, "finalizing (tstate=%p)", finalizing);
     }
     else if (runtime->initialized) {
         fprintf(stream, "initialized");
diff --git a/Python/pystate.c b/Python/pystate.c
index 4001c63..9002669 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -292,7 +292,7 @@
     Py_CLEAR(interp->after_forkers_parent);
     Py_CLEAR(interp->after_forkers_child);
 #endif
-    if (runtime->finalizing == NULL) {
+    if (_PyRuntimeState_GetFinalizing(runtime) == NULL) {
         _PyWarnings_Fini(interp);
     }
     // XXX Once we have one allocator per interpreter (i.e.
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index bfacf31..f086514 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -289,8 +289,9 @@
     /* Must be finalizing to clear hooks */
     _PyRuntimeState *runtime = &_PyRuntime;
     PyThreadState *ts = _PyRuntimeState_GetThreadState(runtime);
-    assert(!ts || _Py_CURRENTLY_FINALIZING(runtime, ts));
-    if (!ts || !_Py_CURRENTLY_FINALIZING(runtime, ts)) {
+    PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(runtime);
+    assert(!ts || finalizing == ts);
+    if (!ts || finalizing != ts) {
         return;
     }