bpo-39465: Add pycore_atomic_funcs.h header (GH-20766)

Add pycore_atomic_funcs.h internal header file: similar to
pycore_atomic.h but don't require to declare variables as atomic.

Add _Py_atomic_size_get() and _Py_atomic_size_set() functions.
diff --git a/Include/internal/pycore_atomic.h b/Include/internal/pycore_atomic.h
index 1d5c562..3d42e54 100644
--- a/Include/internal/pycore_atomic.h
+++ b/Include/internal/pycore_atomic.h
@@ -11,8 +11,8 @@
 #include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
 #include "pyconfig.h"
 
-#if defined(HAVE_STD_ATOMIC)
-#include <stdatomic.h>
+#ifdef HAVE_STD_ATOMIC
+#  include <stdatomic.h>
 #endif
 
 
@@ -62,7 +62,7 @@
 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
     atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
 
-/* Use builtin atomic operations in GCC >= 4.7 */
+// Use builtin atomic operations in GCC >= 4.7 and clang
 #elif defined(HAVE_BUILTIN_ATOMIC)
 
 typedef enum _Py_memory_order {
diff --git a/Include/internal/pycore_atomic_funcs.h b/Include/internal/pycore_atomic_funcs.h
new file mode 100644
index 0000000..a708789
--- /dev/null
+++ b/Include/internal/pycore_atomic_funcs.h
@@ -0,0 +1,94 @@
+/* Atomic functions: similar to pycore_atomic.h, but don't need
+   to declare variables as atomic.
+
+   Py_ssize_t type:
+
+   * value = _Py_atomic_size_get(&var)
+   * _Py_atomic_size_set(&var, value)
+
+   Use sequentially-consistent ordering (__ATOMIC_SEQ_CST memory order):
+   enforce total ordering with all other atomic functions.
+*/
+#ifndef Py_ATOMIC_FUNC_H
+#define Py_ATOMIC_FUNC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "this header requires Py_BUILD_CORE define"
+#endif
+
+#if defined(_MSC_VER)
+#  include <intrin.h>             // _InterlockedExchange()
+#endif
+
+
+// Use builtin atomic operations in GCC >= 4.7 and clang
+#ifdef HAVE_BUILTIN_ATOMIC
+
+static inline Py_ssize_t _Py_atomic_size_get(Py_ssize_t *var)
+{
+    return __atomic_load_n(var, __ATOMIC_SEQ_CST);
+}
+
+static inline void _Py_atomic_size_set(Py_ssize_t *var, Py_ssize_t value)
+{
+    __atomic_store_n(var, value, __ATOMIC_SEQ_CST);
+}
+
+#elif defined(_MSC_VER)
+
+static inline Py_ssize_t _Py_atomic_size_get(Py_ssize_t *var)
+{
+#if SIZEOF_VOID_P == 8
+    Py_BUILD_ASSERT(sizeof(__int64) == sizeof(*var));
+    volatile __int64 *volatile_var = (volatile __int64 *)var;
+    __int64 old;
+    do {
+        old = *volatile_var;
+    } while(_InterlockedCompareExchange64(volatile_var, old, old) != old);
+#else
+    Py_BUILD_ASSERT(sizeof(long) == sizeof(*var));
+    volatile long *volatile_var = (volatile long *)var;
+    long old;
+    do {
+        old = *volatile_var;
+    } while(_InterlockedCompareExchange(volatile_var, old, old) != old);
+#endif
+    return old;
+}
+
+static inline void _Py_atomic_size_set(Py_ssize_t *var, Py_ssize_t value)
+{
+#if SIZEOF_VOID_P == 8
+    Py_BUILD_ASSERT(sizeof(__int64) == sizeof(*var));
+    volatile __int64 *volatile_var = (volatile __int64 *)var;
+    _InterlockedExchange64(volatile_var, value);
+#else
+    Py_BUILD_ASSERT(sizeof(long) == sizeof(*var));
+    volatile long *volatile_var = (volatile long *)var;
+    _InterlockedExchange(volatile_var, value);
+#endif
+}
+
+#else
+// Fallback implementation using volatile
+
+static inline Py_ssize_t _Py_atomic_size_get(Py_ssize_t *var)
+{
+    volatile Py_ssize_t *volatile_var = (volatile Py_ssize_t *)var;
+    return *volatile_var;
+}
+
+static inline void _Py_atomic_size_set(Py_ssize_t *var, Py_ssize_t value)
+{
+    volatile Py_ssize_t *volatile_var = (volatile Py_ssize_t *)var;
+    *volatile_var = value;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* Py_ATOMIC_FUNC_H */
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 69ed2519..5c93b0b 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -1111,6 +1111,7 @@
 		$(srcdir)/Include/internal/pycore_abstract.h \
 		$(srcdir)/Include/internal/pycore_accu.h \
 		$(srcdir)/Include/internal/pycore_atomic.h \
+		$(srcdir)/Include/internal/pycore_atomic_funcs.h \
 		$(srcdir)/Include/internal/pycore_bitutils.h \
 		$(srcdir)/Include/internal/pycore_bytes_methods.h \
 		$(srcdir)/Include/internal/pycore_call.h \
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index df4725e..ab6c596 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -12,6 +12,7 @@
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
+#include "pycore_atomic_funcs.h" // _Py_atomic_int_get()
 #include "pycore_bitutils.h"     // _Py_bswap32()
 #include "pycore_gc.h"           // PyGC_Head
 #include "pycore_hashtable.h"    // _Py_hashtable_new()
@@ -267,6 +268,17 @@
 }
 
 
+static PyObject*
+test_atomic_funcs(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    // Test _Py_atomic_size_get() and _Py_atomic_size_set()
+    Py_ssize_t var = 1;
+    _Py_atomic_size_set(&var, 2);
+    assert(_Py_atomic_size_get(&var) == 2);
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
@@ -276,6 +288,7 @@
     {"test_hashtable", test_hashtable, METH_NOARGS},
     {"get_config", test_get_config, METH_NOARGS},
     {"set_config", test_set_config, METH_O},
+    {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index bbceb02..fd27dea 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -168,6 +168,7 @@
     <ClInclude Include="..\Include\internal\pycore_abstract.h" />
     <ClInclude Include="..\Include\internal\pycore_accu.h" />
     <ClInclude Include="..\Include\internal\pycore_atomic.h" />
+    <ClInclude Include="..\Include\internal\pycore_atomic_funcs.h" />
     <ClInclude Include="..\Include\internal\pycore_bitutils.h" />
     <ClInclude Include="..\Include\internal\pycore_bytes_methods.h" />
     <ClInclude Include="..\Include\internal\pycore_call.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index ee1aa90..75a653d 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -486,6 +486,9 @@
     <ClInclude Include="..\Include\internal\pycore_atomic.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pycore_atomic_funcs.h">
+      <Filter>Include</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pycore_bitutils.h">
       <Filter>Include\internal</Filter>
     </ClInclude>
diff --git a/configure b/configure
index f07edff..530c04a 100755
--- a/configure
+++ b/configure
@@ -15429,6 +15429,7 @@
 
 fi
 
+
 EXT_SUFFIX=.${SOABI}${SHLIB_SUFFIX}
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking LDVERSION" >&5
@@ -17095,16 +17096,17 @@
 
 fi
 
-# Check for GCC >= 4.7 __atomic builtins
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for GCC >= 4.7 __atomic builtins" >&5
-$as_echo_n "checking for GCC >= 4.7 __atomic builtins... " >&6; }
+# Check for GCC >= 4.7 and clang __atomic builtin functions
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for builtin __atomic_load_n and __atomic_store_n functions" >&5
+$as_echo_n "checking for builtin __atomic_load_n and __atomic_store_n functions... " >&6; }
 cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
-    volatile int val = 1;
+    int val;
     int main() {
-      __atomic_load_n(&val, __ATOMIC_SEQ_CST);
+      __atomic_store_n(&val, 1, __ATOMIC_SEQ_CST);
+      (void)__atomic_load_n(&val, __ATOMIC_SEQ_CST);
       return 0;
     }
 
diff --git a/configure.ac b/configure.ac
index ee5573c..39eadfe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -5586,14 +5586,15 @@
               [Has stdatomic.h with atomic_int and atomic_uintptr_t])
 fi
 
-# Check for GCC >= 4.7 __atomic builtins
-AC_MSG_CHECKING(for GCC >= 4.7 __atomic builtins)
+# Check for GCC >= 4.7 and clang __atomic builtin functions
+AC_MSG_CHECKING(for builtin __atomic_load_n and __atomic_store_n functions)
 AC_LINK_IFELSE(
 [
   AC_LANG_SOURCE([[
-    volatile int val = 1;
+    int val;
     int main() {
-      __atomic_load_n(&val, __ATOMIC_SEQ_CST);
+      __atomic_store_n(&val, 1, __ATOMIC_SEQ_CST);
+      (void)__atomic_load_n(&val, __ATOMIC_SEQ_CST);
       return 0;
     }
   ]])
@@ -5602,7 +5603,7 @@
 AC_MSG_RESULT($have_builtin_atomic)
 
 if test "$have_builtin_atomic" = yes; then
-    AC_DEFINE(HAVE_BUILTIN_ATOMIC, 1, [Has builtin atomics])
+    AC_DEFINE(HAVE_BUILTIN_ATOMIC, 1, [Has builtin __atomic_load_n() and __atomic_store_n() functions])
 fi
 
 # ensurepip option
diff --git a/pyconfig.h.in b/pyconfig.h.in
index 6ff5fc9..045cbd5 100644
--- a/pyconfig.h.in
+++ b/pyconfig.h.in
@@ -115,7 +115,7 @@
 /* Define if `unsetenv` does not return an int. */
 #undef HAVE_BROKEN_UNSETENV
 
-/* Has builtin atomics */
+/* Has builtin __atomic_load_n() and __atomic_store_n() functions */
 #undef HAVE_BUILTIN_ATOMIC
 
 /* Define to 1 if you have the 'chflags' function. */
@@ -287,6 +287,9 @@
 /* Define to 1 if you have the `dup3' function. */
 #undef HAVE_DUP3
 
+/* Define if you have the '_dyld_shared_cache_contains_path' function. */
+#undef HAVE_DYLD_SHARED_CACHE_CONTAINS_PATH
+
 /* Defined when any dynamic module loading is enabled. */
 #undef HAVE_DYNAMIC_LOADING
 
@@ -787,9 +790,6 @@
 /* Define if you have the 'prlimit' functions. */
 #undef HAVE_PRLIMIT
 
-/* Define if you have the '_dyld_shared_cache_contains_path' function. */
-#undef HAVE_DYLD_SHARED_CACHE_CONTAINS_PATH
-
 /* Define to 1 if you have the <process.h> header file. */
 #undef HAVE_PROCESS_H