delta encoding of upper/lower/title makes a glorious return (#12736)
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
index 0ebdedb..a10d319 100644
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -27,9 +27,13 @@
#define EXTENDED_CASE_MASK 0x4000
typedef struct {
- const Py_UCS4 upper;
- const Py_UCS4 lower;
- const Py_UCS4 title;
+ /*
+ These are either deltas to the character or offsets in
+ _PyUnicode_ExtendedCase.
+ */
+ const int upper;
+ const int lower;
+ const int title;
const unsigned char decimal;
const unsigned char digit;
const unsigned short flags;
@@ -60,7 +64,7 @@
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
- return ctype->title ? ctype->title : ch;
+ return ch + ctype->title;
}
/* Returns 1 for Unicode characters having the category 'Lt', 0
@@ -186,7 +190,7 @@
if (ctype->flags & EXTENDED_CASE_MASK)
return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF];
- return ctype->upper ? ctype->upper : ch;
+ return ch + ctype->upper;
}
/* Returns the lowercase Unicode characters corresponding to ch or just
@@ -198,7 +202,7 @@
if (ctype->flags & EXTENDED_CASE_MASK)
return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF];
- return ctype->lower ? ctype->lower : ch;
+ return ch + ctype->lower;
}
int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res)
@@ -213,7 +217,7 @@
res[i] = _PyUnicode_ExtendedCase[index + i];
return n;
}
- res[0] = ctype->lower ? ctype->lower : ch;
+ res[0] = ch + ctype->lower;
return 1;
}
@@ -229,7 +233,7 @@
res[i] = _PyUnicode_ExtendedCase[index + i];
return n;
}
- res[0] = ctype->title ? ctype->title : ch;
+ res[0] = ch + ctype->title;
return 1;
}
@@ -245,7 +249,7 @@
res[i] = _PyUnicode_ExtendedCase[index + i];
return n;
}
- res[0] = ctype->upper ? ctype->upper : ch;
+ res[0] = ch + ctype->upper;
return 1;
}