Use Py_uintptr_t for atomic pointers

Issue #26161: Use Py_uintptr_t instead of void* for atomic pointers in
pyatomic.h. Use atomic_uintptr_t when <stdatomic.h> is used.

Using void* causes compilation warnings depending on which implementation of
atomic types is used.
diff --git a/Include/pyatomic.h b/Include/pyatomic.h
index 892a217..89028ef 100644
--- a/Include/pyatomic.h
+++ b/Include/pyatomic.h
@@ -30,7 +30,7 @@
 } _Py_memory_order;
 
 typedef struct _Py_atomic_address {
-    _Atomic void *_value;
+    atomic_uintptr_t _value;
 } _Py_atomic_address;
 
 typedef struct _Py_atomic_int {
@@ -61,7 +61,7 @@
 } _Py_memory_order;
 
 typedef struct _Py_atomic_address {
-    void *_value;
+    Py_uintptr_t _value;
 } _Py_atomic_address;
 
 typedef struct _Py_atomic_int {
@@ -98,7 +98,7 @@
 } _Py_memory_order;
 
 typedef struct _Py_atomic_address {
-    void *_value;
+    Py_uintptr_t _value;
 } _Py_atomic_address;
 
 typedef struct _Py_atomic_int {
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index aafcbc2..8d38ee9 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -111,7 +111,7 @@
 static unsigned long gil_switch_number = 0;
 /* Last PyThreadState holding / having held the GIL. This helps us know
    whether anyone else was scheduled after we dropped the GIL. */
-static _Py_atomic_address gil_last_holder = {NULL};
+static _Py_atomic_address gil_last_holder = {0};
 
 /* This condition variable allows one or several threads to wait until
    the GIL is released. In addition, the mutex also protects the above
@@ -142,7 +142,7 @@
 #ifdef FORCE_SWITCHING
     COND_INIT(switch_cond);
 #endif
-    _Py_atomic_store_relaxed(&gil_last_holder, NULL);
+    _Py_atomic_store_relaxed(&gil_last_holder, 0);
     _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
     _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
 }
@@ -178,7 +178,7 @@
         /* Sub-interpreter support: threads might have been switched
            under our feet using PyThreadState_Swap(). Fix the GIL last
            holder variable so that our heuristics work. */
-        _Py_atomic_store_relaxed(&gil_last_holder, tstate);
+        _Py_atomic_store_relaxed(&gil_last_holder, (Py_uintptr_t)tstate);
     }
 
     MUTEX_LOCK(gil_mutex);
@@ -240,7 +240,7 @@
     _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
 
     if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil_last_holder)) {
-        _Py_atomic_store_relaxed(&gil_last_holder, tstate);
+        _Py_atomic_store_relaxed(&gil_last_holder, (Py_uintptr_t)tstate);
         ++gil_switch_number;
     }
 
diff --git a/Python/pystate.c b/Python/pystate.c
index 6d0696d..853e5c7 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -3,11 +3,13 @@
 
 #include "Python.h"
 
-#ifndef Py_BUILD_CORE
-/* ensure that PyThreadState_GET() is a macro, not an alias to
- * PyThreadState_Get() */
-#  error "pystate.c must be compiled with Py_BUILD_CORE defined"
-#endif
+#define GET_TSTATE() \
+    ((PyThreadState*)_Py_atomic_load_relaxed(&_PyThreadState_Current))
+#define SET_TSTATE(value) \
+    _Py_atomic_store_relaxed(&_PyThreadState_Current, (Py_uintptr_t)(value))
+#define GET_INTERP_STATE() \
+    (GET_TSTATE()->interp)
+
 
 /* --------------------------------------------------------------------------
 CAUTION
@@ -54,7 +56,7 @@
 
 /* Assuming the current thread holds the GIL, this is the
    PyThreadState for the current thread. */
-_Py_atomic_address _PyThreadState_Current = {NULL};
+_Py_atomic_address _PyThreadState_Current = {0};
 PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
 
 #ifdef WITH_THREAD
@@ -260,7 +262,7 @@
 PyState_FindModule(struct PyModuleDef* module)
 {
     Py_ssize_t index = module->m_base.m_index;
-    PyInterpreterState *state = PyThreadState_GET()->interp;
+    PyInterpreterState *state = GET_INTERP_STATE();
     PyObject *res;
     if (module->m_slots) {
         return NULL;
@@ -284,7 +286,7 @@
                         "PyState_AddModule called on module with slots");
         return -1;
     }
-    state = PyThreadState_GET()->interp;
+    state = GET_INTERP_STATE();
     if (!def)
         return -1;
     if (!state->modules_by_index) {
@@ -304,7 +306,7 @@
 PyState_AddModule(PyObject* module, struct PyModuleDef* def)
 {
     Py_ssize_t index;
-    PyInterpreterState *state = PyThreadState_GET()->interp;
+    PyInterpreterState *state = GET_INTERP_STATE();
     if (!def) {
         Py_FatalError("PyState_AddModule: Module Definition is NULL");
         return -1;
@@ -331,7 +333,7 @@
                         "PyState_RemoveModule called on module with slots");
         return -1;
     }
-    state = PyThreadState_GET()->interp;
+    state = GET_INTERP_STATE();
     if (index == 0) {
         Py_FatalError("PyState_RemoveModule: Module index invalid.");
         return -1;
@@ -351,7 +353,7 @@
 void
 _PyState_ClearModules(void)
 {
-    PyInterpreterState *state = PyThreadState_GET()->interp;
+    PyInterpreterState *state = GET_INTERP_STATE();
     if (state->modules_by_index) {
         Py_ssize_t i;
         for (i = 0; i < PyList_GET_SIZE(state->modules_by_index); i++) {
@@ -429,7 +431,7 @@
 void
 PyThreadState_Delete(PyThreadState *tstate)
 {
-    if (tstate == PyThreadState_GET())
+    if (tstate == GET_TSTATE())
         Py_FatalError("PyThreadState_Delete: tstate is still current");
 #ifdef WITH_THREAD
     if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
@@ -443,11 +445,11 @@
 void
 PyThreadState_DeleteCurrent()
 {
-    PyThreadState *tstate = PyThreadState_GET();
+    PyThreadState *tstate = GET_TSTATE();
     if (tstate == NULL)
         Py_FatalError(
             "PyThreadState_DeleteCurrent: no current tstate");
-    _Py_atomic_store_relaxed(&_PyThreadState_Current, NULL);
+    SET_TSTATE(NULL);
     if (autoInterpreterState && PyThread_get_key_value(autoTLSkey) == tstate)
         PyThread_delete_key_value(autoTLSkey);
     tstate_delete_common(tstate);
@@ -496,14 +498,14 @@
 PyThreadState *
 _PyThreadState_UncheckedGet(void)
 {
-    return PyThreadState_GET();
+    return GET_TSTATE();
 }
 
 
 PyThreadState *
 PyThreadState_Get(void)
 {
-    PyThreadState *tstate = PyThreadState_GET();
+    PyThreadState *tstate = GET_TSTATE();
     if (tstate == NULL)
         Py_FatalError("PyThreadState_Get: no current thread");
 
@@ -514,9 +516,9 @@
 PyThreadState *
 PyThreadState_Swap(PyThreadState *newts)
 {
-    PyThreadState *oldts = PyThreadState_GET();
+    PyThreadState *oldts = GET_TSTATE();
 
-    _Py_atomic_store_relaxed(&_PyThreadState_Current, newts);
+    SET_TSTATE(newts);
     /* It should not be possible for more than one thread state
        to be used for a thread.  Check this the best we can in debug
        builds.
@@ -545,7 +547,7 @@
 PyObject *
 PyThreadState_GetDict(void)
 {
-    PyThreadState *tstate = PyThreadState_GET();
+    PyThreadState *tstate = GET_TSTATE();
     if (tstate == NULL)
         return NULL;
 
@@ -569,8 +571,7 @@
 
 int
 PyThreadState_SetAsyncExc(long id, PyObject *exc) {
-    PyThreadState *tstate = PyThreadState_GET();
-    PyInterpreterState *interp = tstate->interp;
+    PyInterpreterState *interp = GET_INTERP_STATE();
     PyThreadState *p;
 
     /* Although the GIL is held, a few C API functions can be called
@@ -691,7 +692,7 @@
 {
     /* Must be the tstate for this thread */
     assert(PyGILState_GetThisThreadState()==tstate);
-    return tstate == PyThreadState_GET();
+    return tstate == GET_TSTATE();
 }
 
 /* Internal initialization/finalization functions called by
@@ -783,7 +784,7 @@
 int
 PyGILState_Check(void)
 {
-    PyThreadState *tstate = PyThreadState_GET();
+    PyThreadState *tstate = GET_TSTATE();
     return tstate && (tstate == PyGILState_GetThisThreadState());
 }