bpo-40302: Add pycore_byteswap.h header file (GH-19552)

Add a new internal pycore_byteswap.h header file with the following
functions:

* _Py_bswap16()
* _Py_bswap32()
* _Py_bswap64()

Use these functions in _ctypes, sha256 and sha512 modules,
and also use in the UTF-32 encoder.

sha256, sha512 and _ctypes modules are now built with the internal
C API.
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
index cd7aa69..208e8fe 100644
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -4,6 +4,8 @@
 # error "codecs.h is specific to Unicode"
 #endif
 
+#include "pycore_byteswap.h"      // _Py_bswap32()
+
 /* Mask to quickly check whether a C 'long' contains a
    non-ASCII, UTF8-encoded char. */
 #if (SIZEOF_LONG == 8)
@@ -732,24 +734,28 @@
 #endif
 }
 
+static inline uint32_t
+STRINGLIB(SWAB4)(STRINGLIB_CHAR ch)
+{
+    uint32_t word = ch;
 #if STRINGLIB_SIZEOF_CHAR == 1
-# define SWAB4(CH, tmp)  ((CH) << 24) /* high bytes are zero */
+    /* high bytes are zero */
+    return (word << 24);
 #elif STRINGLIB_SIZEOF_CHAR == 2
-# define SWAB4(CH, tmp)  (tmp = (CH), \
-            ((tmp & 0x00FFu) << 24) + ((tmp & 0xFF00u) << 8))
-            /* high bytes are zero */
+    /* high bytes are zero */
+    return ((word & 0x00FFu) << 24) + ((word & 0xFF00u) << 8);
 #else
-# define SWAB4(CH, tmp)  (tmp = (CH), \
-            tmp = ((tmp & 0x00FF00FFu) << 8) + ((tmp >> 8) & 0x00FF00FFu), \
-            ((tmp & 0x0000FFFFu) << 16) + ((tmp >> 16) & 0x0000FFFFu))
+    return _Py_bswap32(word);
 #endif
+}
+
 Py_LOCAL_INLINE(Py_ssize_t)
 STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
                         Py_ssize_t len,
-                        PY_UINT32_T **outptr,
+                        uint32_t **outptr,
                         int native_ordering)
 {
-    PY_UINT32_T *out = *outptr;
+    uint32_t *out = *outptr;
     const STRINGLIB_CHAR *end = in + len;
     if (native_ordering) {
         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
@@ -783,7 +789,6 @@
         const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
         while (in < unrolled_end) {
 #if STRINGLIB_SIZEOF_CHAR > 1
-            Py_UCS4 ch1, ch2, ch3, ch4;
             /* check if any character is a surrogate character */
             if (((in[0] ^ 0xd800) &
                  (in[1] ^ 0xd800) &
@@ -791,10 +796,10 @@
                  (in[3] ^ 0xd800) & 0xf800) == 0)
                 break;
 #endif
-            out[0] = SWAB4(in[0], ch1);
-            out[1] = SWAB4(in[1], ch2);
-            out[2] = SWAB4(in[2], ch3);
-            out[3] = SWAB4(in[3], ch4);
+            out[0] = STRINGLIB(SWAB4)(in[0]);
+            out[1] = STRINGLIB(SWAB4)(in[1]);
+            out[2] = STRINGLIB(SWAB4)(in[2]);
+            out[3] = STRINGLIB(SWAB4)(in[3]);
             in += 4; out += 4;
         }
         while (in < end) {
@@ -805,7 +810,7 @@
                 goto fail;
             }
 #endif
-            *out++ = SWAB4(ch, ch);
+            *out++ = STRINGLIB(SWAB4)(ch);
         }
     }
     *outptr = out;
@@ -816,6 +821,5 @@
     return len - (end - in + 1);
 #endif
 }
-#undef SWAB4
 
 #endif