bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index c4e73eb..f963deb 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5025,21 +5025,21 @@
#include "stringlib/codecs.h"
#include "stringlib/undef.h"
-/* Mask to quickly check whether a C 'long' contains a
+/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
-#if (SIZEOF_LONG == 8)
-# define ASCII_CHAR_MASK 0x8080808080808080UL
-#elif (SIZEOF_LONG == 4)
-# define ASCII_CHAR_MASK 0x80808080UL
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
#else
-# error C 'long' size should be either 4 or 8!
+# error C 'size_t' size should be either 4 or 8!
#endif
static Py_ssize_t
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
{
const char *p = start;
- const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
+ const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
/*
* Issue #17237: m68k is a bit different from most architectures in
@@ -5049,21 +5049,21 @@
* version" will even speed up m68k.
*/
#if !defined(__m68k__)
-#if SIZEOF_LONG <= SIZEOF_VOID_P
- assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG));
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
+ assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Fast path, see in STRINGLIB(utf8_decode) for
an explanation. */
/* Help allocation */
const char *_p = p;
Py_UCS1 * q = dest;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
- *((unsigned long *)q) = value;
- _p += SIZEOF_LONG;
- q += SIZEOF_LONG;
+ *((size_t *)q) = value;
+ _p += SIZEOF_SIZE_T;
+ q += SIZEOF_SIZE_T;
}
p = _p;
while (p < end) {
@@ -5078,14 +5078,14 @@
while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
- if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
+ if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
- unsigned long value = *(const unsigned long *) _p;
+ size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK)
break;
- _p += SIZEOF_LONG;
+ _p += SIZEOF_SIZE_T;
}
p = _p;
if (_p == end)