bpo-40302: Add pycore_byteswap.h header file (GH-19552)

Add a new internal pycore_byteswap.h header file with the following
functions:

* _Py_bswap16()
* _Py_bswap32()
* _Py_bswap64()

Use these functions in _ctypes, sha256 and sha512 modules,
and also use in the UTF-32 encoder.

sha256, sha512 and _ctypes modules are now built with the internal
C API.
diff --git a/Modules/Setup b/Modules/Setup
index 40266a1..9dcca13 100644
--- a/Modules/Setup
+++ b/Modules/Setup
@@ -247,8 +247,8 @@
 # The _sha module implements the SHA checksum algorithms.
 # (NIST's Secure Hash Algorithms.)
 #_sha1 sha1module.c
-#_sha256 sha256module.c
-#_sha512 sha512module.c
+#_sha256 sha256module.c -DPy_BUILD_CORE_BUILTIN
+#_sha512 sha512module.c -DPy_BUILD_CORE_BUILTIN
 #_sha3 _sha3/sha3module.c
 
 # _blake module
diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c
index 2060d15..a72682d 100644
--- a/Modules/_ctypes/cfield.c
+++ b/Modules/_ctypes/cfield.c
@@ -1,4 +1,5 @@
 #include "Python.h"
+#include "pycore_byteswap.h"      // _Py_bswap32()
 
 #include <ffi.h>
 #ifdef MS_WIN32
@@ -448,46 +449,32 @@
      ( ( (type)x & ~(BIT_MASK(type, size) << LOW_BIT(size)) ) | ( ((type)v & BIT_MASK(type, size)) << LOW_BIT(size) ) ) \
      : (type)v)
 
-/* byte swapping macros */
-#define SWAP_2(v)                               \
-    ( ( (v >> 8) & 0x00FF) |                    \
-      ( (v << 8) & 0xFF00) )
-
-#define SWAP_4(v)                       \
-    ( ( (v & 0x000000FF) << 24 ) |  \
-      ( (v & 0x0000FF00) <<  8 ) |  \
-      ( (v & 0x00FF0000) >>  8 ) |  \
-      ( ((v >> 24) & 0xFF)) )
-
-#ifdef _MSC_VER
-#define SWAP_8(v)                               \
-    ( ( (v & 0x00000000000000FFL) << 56 ) |  \
-      ( (v & 0x000000000000FF00L) << 40 ) |  \
-      ( (v & 0x0000000000FF0000L) << 24 ) |  \
-      ( (v & 0x00000000FF000000L) <<  8 ) |  \
-      ( (v & 0x000000FF00000000L) >>  8 ) |  \
-      ( (v & 0x0000FF0000000000L) >> 24 ) |  \
-      ( (v & 0x00FF000000000000L) >> 40 ) |  \
-      ( ((v >> 56) & 0xFF)) )
+#if SIZEOF_SHORT == 2
+#  define SWAP_SHORT _Py_bswap16
 #else
-#define SWAP_8(v)                               \
-    ( ( (v & 0x00000000000000FFLL) << 56 ) |  \
-      ( (v & 0x000000000000FF00LL) << 40 ) |  \
-      ( (v & 0x0000000000FF0000LL) << 24 ) |  \
-      ( (v & 0x00000000FF000000LL) <<  8 ) |  \
-      ( (v & 0x000000FF00000000LL) >>  8 ) |  \
-      ( (v & 0x0000FF0000000000LL) >> 24 ) |  \
-      ( (v & 0x00FF000000000000LL) >> 40 ) |  \
-      ( ((v >> 56) & 0xFF)) )
+#  error "unsupported short size"
 #endif
 
-#define SWAP_INT SWAP_4
+#if SIZEOF_INT == 4
+#  define SWAP_INT _Py_bswap32
+#else
+#  error "unsupported int size"
+#endif
 
 #if SIZEOF_LONG == 4
-# define SWAP_LONG SWAP_4
+#  define SWAP_LONG _Py_bswap32
 #elif SIZEOF_LONG == 8
-# define SWAP_LONG SWAP_8
+#  define SWAP_LONG _Py_bswap64
+#else
+#  error "unsupported long size"
 #endif
+
+#if SIZEOF_LONG_LONG == 8
+#  define SWAP_LONG_LONG _Py_bswap64
+#else
+#  error "unsupported long long size"
+#endif
+
 /*****************************************************************
  * The setter methods return an object which must be kept alive, to keep the
  * data valid which has been stored in the memory block.  The ctypes object
@@ -569,12 +556,13 @@
 {
     long val;
     short field;
-    if (get_long(value, &val) < 0)
+    if (get_long(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
-    field = SWAP_2(field);
+    field = SWAP_SHORT(field);
     field = SET(short, field, val, size);
-    field = SWAP_2(field);
+    field = SWAP_SHORT(field);
     memcpy(ptr, &field, sizeof(field));
     _RET(value);
 }
@@ -593,7 +581,7 @@
 {
     short val;
     memcpy(&val, ptr, sizeof(val));
-    val = SWAP_2(val);
+    val = SWAP_SHORT(val);
     GET_BITFIELD(val, size);
     return PyLong_FromLong(val);
 }
@@ -616,12 +604,13 @@
 {
     unsigned long val;
     unsigned short field;
-    if (get_ulong(value, &val) < 0)
+    if (get_ulong(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
-    field = SWAP_2(field);
+    field = SWAP_SHORT(field);
     field = SET(unsigned short, field, val, size);
-    field = SWAP_2(field);
+    field = SWAP_SHORT(field);
     memcpy(ptr, &field, sizeof(field));
     _RET(value);
 }
@@ -641,7 +630,7 @@
 {
     unsigned short val;
     memcpy(&val, ptr, sizeof(val));
-    val = SWAP_2(val);
+    val = SWAP_SHORT(val);
     GET_BITFIELD(val, size);
     return PyLong_FromLong(val);
 }
@@ -664,8 +653,9 @@
 {
     long val;
     int field;
-    if (get_long(value, &val) < 0)
+    if (get_long(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
     field = SWAP_INT(field);
     field = SET(int, field, val, size);
@@ -757,8 +747,9 @@
 {
     unsigned long val;
     unsigned int field;
-    if (get_ulong(value, &val) < 0)
+    if (get_ulong(value, &val) < 0) {
         return  NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
     field = SWAP_INT(field);
     field = SET(unsigned int, field, (unsigned int)val, size);
@@ -805,8 +796,9 @@
 {
     long val;
     long field;
-    if (get_long(value, &val) < 0)
+    if (get_long(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
     field = SWAP_LONG(field);
     field = SET(long, field, val, size);
@@ -853,8 +845,9 @@
 {
     unsigned long val;
     unsigned long field;
-    if (get_ulong(value, &val) < 0)
+    if (get_ulong(value, &val) < 0) {
         return  NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
     field = SWAP_LONG(field);
     field = SET(unsigned long, field, val, size);
@@ -901,12 +894,13 @@
 {
     long long val;
     long long field;
-    if (get_longlong(value, &val) < 0)
+    if (get_longlong(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
-    field = SWAP_8(field);
+    field = SWAP_LONG_LONG(field);
     field = SET(long long, field, val, size);
-    field = SWAP_8(field);
+    field = SWAP_LONG_LONG(field);
     memcpy(ptr, &field, sizeof(field));
     _RET(value);
 }
@@ -925,7 +919,7 @@
 {
     long long val;
     memcpy(&val, ptr, sizeof(val));
-    val = SWAP_8(val);
+    val = SWAP_LONG_LONG(val);
     GET_BITFIELD(val, size);
     return PyLong_FromLongLong(val);
 }
@@ -948,12 +942,13 @@
 {
     unsigned long long val;
     unsigned long long field;
-    if (get_ulonglong(value, &val) < 0)
+    if (get_ulonglong(value, &val) < 0) {
         return NULL;
+    }
     memcpy(&field, ptr, sizeof(field));
-    field = SWAP_8(field);
+    field = SWAP_LONG_LONG(field);
     field = SET(unsigned long long, field, val, size);
-    field = SWAP_8(field);
+    field = SWAP_LONG_LONG(field);
     memcpy(ptr, &field, sizeof(field));
     _RET(value);
 }
@@ -972,7 +967,7 @@
 {
     unsigned long long val;
     memcpy(&val, ptr, sizeof(val));
-    val = SWAP_8(val);
+    val = SWAP_LONG_LONG(val);
     GET_BITFIELD(val, size);
     return PyLong_FromUnsignedLongLong(val);
 }
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 8352f6e..9330e26 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -9,6 +9,7 @@
 #define PY_SSIZE_T_CLEAN
 
 #include "Python.h"
+#include "pycore_byteswap.h"     // _Py_bswap32()
 #include "pycore_initconfig.h"   // _Py_GetConfigsAsDict()
 #include "pycore_gc.h"           // PyGC_Head
 
@@ -21,7 +22,7 @@
 
 
 static PyObject*
-get_recursion_depth(PyObject *self, PyObject *args)
+get_recursion_depth(PyObject *self, PyObject *Py_UNUSED(args))
 {
     PyThreadState *tstate = PyThreadState_Get();
 
@@ -30,9 +31,38 @@
 }
 
 
+static PyObject*
+test_bswap(PyObject *self, PyObject *Py_UNUSED(args))
+{
+    uint16_t u16 = _Py_bswap16(UINT16_C(0x3412));
+    if (u16 != UINT16_C(0x1234)) {
+        PyErr_Format(PyExc_AssertionError,
+                     "_Py_bswap16(0x3412) returns %u", u16);
+        return NULL;
+    }
+
+    uint32_t u32 = _Py_bswap32(UINT32_C(0x78563412));
+    if (u32 != UINT32_C(0x12345678)) {
+        PyErr_Format(PyExc_AssertionError,
+                     "_Py_bswap32(0x78563412) returns %lu", u32);
+        return NULL;
+    }
+
+    uint64_t u64 = _Py_bswap64(UINT64_C(0xEFCDAB9078563412));
+    if (u64 != UINT64_C(0x1234567890ABCDEF)) {
+        PyErr_Format(PyExc_AssertionError,
+                     "_Py_bswap64(0xEFCDAB9078563412) returns %llu", u64);
+        return NULL;
+    }
+
+    Py_RETURN_NONE;
+}
+
+
 static PyMethodDef TestMethods[] = {
     {"get_configs", get_configs, METH_NOARGS},
     {"get_recursion_depth", get_recursion_depth, METH_NOARGS},
+    {"test_bswap", test_bswap, METH_NOARGS},
     {NULL, NULL} /* sentinel */
 };
 
diff --git a/Modules/sha256module.c b/Modules/sha256module.c
index 7310826..e0ff9b2 100644
--- a/Modules/sha256module.c
+++ b/Modules/sha256module.c
@@ -17,6 +17,7 @@
 /* SHA objects */
 
 #include "Python.h"
+#include "pycore_byteswap.h"      // _Py_bswap32()
 #include "structmember.h"         // PyMemberDef
 #include "hashlib.h"
 #include "pystrhex.h"
@@ -30,12 +31,7 @@
 /* Some useful types */
 
 typedef unsigned char SHA_BYTE;
-
-#if SIZEOF_INT == 4
-typedef unsigned int SHA_INT32; /* 32-bit integer */
-#else
-/* not defined. compilation will die. */
-#endif
+typedef uint32_t SHA_INT32;  /* 32-bit integer */
 
 /* The SHA block size and message digest sizes, in bytes */
 
@@ -61,14 +57,9 @@
 #if PY_LITTLE_ENDIAN
 static void longReverse(SHA_INT32 *buffer, int byteCount)
 {
-    SHA_INT32 value;
-
     byteCount /= sizeof(*buffer);
-    while (byteCount--) {
-        value = *buffer;
-        value = ( ( value & 0xFF00FF00L ) >> 8  ) | \
-                ( ( value & 0x00FF00FFL ) << 8 );
-        *buffer++ = ( value << 16 ) | ( value >> 16 );
+    for (; byteCount--; buffer++) {
+        *buffer = _Py_bswap32(*buffer);
     }
 }
 #endif
diff --git a/Modules/sha512module.c b/Modules/sha512module.c
index 38d303d..780f8e7 100644
--- a/Modules/sha512module.c
+++ b/Modules/sha512module.c
@@ -17,6 +17,7 @@
 /* SHA objects */
 
 #include "Python.h"
+#include "pycore_byteswap.h"      // _Py_bswap32()
 #include "structmember.h"         // PyMemberDef
 #include "hashlib.h"
 #include "pystrhex.h"
@@ -30,13 +31,8 @@
 /* Some useful types */
 
 typedef unsigned char SHA_BYTE;
-
-#if SIZEOF_INT == 4
-typedef unsigned int SHA_INT32; /* 32-bit integer */
-typedef unsigned long long SHA_INT64;        /* 64-bit integer */
-#else
-/* not defined. compilation will die. */
-#endif
+typedef uint32_t SHA_INT32;  /* 32-bit integer */
+typedef uint64_t SHA_INT64;  /* 64-bit integer */
 
 /* The SHA block size and message digest sizes, in bytes */
 
@@ -62,22 +58,9 @@
 #if PY_LITTLE_ENDIAN
 static void longReverse(SHA_INT64 *buffer, int byteCount)
 {
-    SHA_INT64 value;
-
     byteCount /= sizeof(*buffer);
-    while (byteCount--) {
-        value = *buffer;
-
-                ((unsigned char*)buffer)[0] = (unsigned char)(value >> 56) & 0xff;
-                ((unsigned char*)buffer)[1] = (unsigned char)(value >> 48) & 0xff;
-                ((unsigned char*)buffer)[2] = (unsigned char)(value >> 40) & 0xff;
-                ((unsigned char*)buffer)[3] = (unsigned char)(value >> 32) & 0xff;
-                ((unsigned char*)buffer)[4] = (unsigned char)(value >> 24) & 0xff;
-                ((unsigned char*)buffer)[5] = (unsigned char)(value >> 16) & 0xff;
-                ((unsigned char*)buffer)[6] = (unsigned char)(value >>  8) & 0xff;
-                ((unsigned char*)buffer)[7] = (unsigned char)(value      ) & 0xff;
-
-                buffer++;
+    for (; byteCount--; buffer++) {
+        *buffer = _Py_bswap64(*buffer);
     }
 }
 #endif