Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 1 | /* |
| 2 | Unicode character type helpers. |
| 3 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 6 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 7 | Copyright (c) Corporation for National Research Initiatives. |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 8 | |
| 9 | */ |
| 10 | |
| 11 | #include "Python.h" |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 12 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 13 | #define ALPHA_MASK 0x01 |
| 14 | #define DECIMAL_MASK 0x02 |
| 15 | #define DIGIT_MASK 0x04 |
| 16 | #define LOWER_MASK 0x08 |
| 17 | #define LINEBREAK_MASK 0x10 |
| 18 | #define SPACE_MASK 0x20 |
| 19 | #define TITLE_MASK 0x40 |
| 20 | #define UPPER_MASK 0x80 |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 21 | #define XID_START_MASK 0x100 |
| 22 | #define XID_CONTINUE_MASK 0x200 |
Georg Brandl | d52429f | 2008-07-04 15:55:02 +0000 | [diff] [blame] | 23 | #define PRINTABLE_MASK 0x400 |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 24 | #define NUMERIC_MASK 0x800 |
| 25 | #define CASE_IGNORABLE_MASK 0x1000 |
| 26 | #define CASED_MASK 0x2000 |
| 27 | #define EXTENDED_CASE_MASK 0x4000 |
Jack Jansen | 56cdce3 | 2000-07-06 13:57:38 +0000 | [diff] [blame] | 28 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 29 | typedef struct { |
Serhiy Storchaka | 009b811 | 2015-03-18 21:53:15 +0200 | [diff] [blame] | 30 | /* |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 31 | These are either deltas to the character or offsets in |
| 32 | _PyUnicode_ExtendedCase. |
| 33 | */ |
| 34 | const int upper; |
| 35 | const int lower; |
| 36 | const int title; |
Benjamin Peterson | 9487c4d | 2012-01-15 21:26:23 -0500 | [diff] [blame] | 37 | /* Note if more flag space is needed, decimal and digit could be unified. */ |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 38 | const unsigned char decimal; |
| 39 | const unsigned char digit; |
Hye-Shik Chang | 974ed7c | 2004-06-02 16:49:17 +0000 | [diff] [blame] | 40 | const unsigned short flags; |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 41 | } _PyUnicode_TypeRecord; |
| 42 | |
| 43 | #include "unicodetype_db.h" |
| 44 | |
| 45 | static const _PyUnicode_TypeRecord * |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 46 | gettyperecord(Py_UCS4 code) |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 47 | { |
| 48 | int index; |
| 49 | |
Martin v. Löwis | 9def6a3 | 2002-10-18 16:11:54 +0000 | [diff] [blame] | 50 | if (code >= 0x110000) |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 51 | index = 0; |
Hye-Shik Chang | 7db07e6 | 2003-12-29 01:36:01 +0000 | [diff] [blame] | 52 | else |
Hye-Shik Chang | 7db07e6 | 2003-12-29 01:36:01 +0000 | [diff] [blame] | 53 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 54 | index = index1[(code>>SHIFT)]; |
| 55 | index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))]; |
| 56 | } |
Fredrik Lundh | ee13dba | 2001-06-26 20:36:12 +0000 | [diff] [blame] | 57 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 58 | return &_PyUnicode_TypeRecords[index]; |
| 59 | } |
Jack Jansen | 56cdce3 | 2000-07-06 13:57:38 +0000 | [diff] [blame] | 60 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 61 | /* Returns the titlecase Unicode characters corresponding to ch or just |
| 62 | ch if no titlecase mapping is known. */ |
| 63 | |
Antoine Pitrou | 9ed5f27 | 2013-08-13 20:18:52 +0200 | [diff] [blame] | 64 | Py_UCS4 _PyUnicode_ToTitlecase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 65 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 66 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 67 | |
Benjamin Peterson | c6630b9 | 2012-01-15 21:33:32 -0500 | [diff] [blame] | 68 | if (ctype->flags & EXTENDED_CASE_MASK) |
| 69 | return _PyUnicode_ExtendedCase[ctype->title & 0xFFFF]; |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 70 | return ch + ctype->title; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 71 | } |
| 72 | |
| 73 | /* Returns 1 for Unicode characters having the category 'Lt', 0 |
| 74 | otherwise. */ |
| 75 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 76 | int _PyUnicode_IsTitlecase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 77 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 78 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 79 | |
| 80 | return (ctype->flags & TITLE_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 81 | } |
| 82 | |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 83 | /* Returns 1 for Unicode characters having the XID_Start property, 0 |
| 84 | otherwise. */ |
| 85 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 86 | int _PyUnicode_IsXidStart(Py_UCS4 ch) |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 87 | { |
| 88 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 89 | |
| 90 | return (ctype->flags & XID_START_MASK) != 0; |
| 91 | } |
| 92 | |
| 93 | /* Returns 1 for Unicode characters having the XID_Continue property, |
| 94 | 0 otherwise. */ |
| 95 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 96 | int _PyUnicode_IsXidContinue(Py_UCS4 ch) |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 97 | { |
| 98 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 99 | |
| 100 | return (ctype->flags & XID_CONTINUE_MASK) != 0; |
| 101 | } |
| 102 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 103 | /* Returns the integer decimal (0-9) for Unicode characters having |
| 104 | this property, -1 otherwise. */ |
| 105 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 106 | int _PyUnicode_ToDecimalDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 107 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 108 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 109 | |
| 110 | return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 111 | } |
| 112 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 113 | int _PyUnicode_IsDecimalDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 114 | { |
| 115 | if (_PyUnicode_ToDecimalDigit(ch) < 0) |
Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 116 | return 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 117 | return 1; |
| 118 | } |
| 119 | |
| 120 | /* Returns the integer digit (0-9) for Unicode characters having |
| 121 | this property, -1 otherwise. */ |
| 122 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 123 | int _PyUnicode_ToDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 124 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 125 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 126 | |
| 127 | return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 128 | } |
| 129 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 130 | int _PyUnicode_IsDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 131 | { |
| 132 | if (_PyUnicode_ToDigit(ch) < 0) |
Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 133 | return 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 134 | return 1; |
| 135 | } |
| 136 | |
| 137 | /* Returns the numeric value as double for Unicode characters having |
| 138 | this property, -1.0 otherwise. */ |
| 139 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 140 | int _PyUnicode_IsNumeric(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 141 | { |
Amaury Forgeot d'Arc | 7d52079 | 2009-10-06 21:03:20 +0000 | [diff] [blame] | 142 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 143 | |
| 144 | return (ctype->flags & NUMERIC_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 145 | } |
| 146 | |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 147 | /* Returns 1 for Unicode characters to be hex-escaped when repr()ed, |
| 148 | 0 otherwise. |
| 149 | All characters except those characters defined in the Unicode character |
| 150 | database as following categories are considered printable. |
| 151 | * Cc (Other, Control) |
| 152 | * Cf (Other, Format) |
| 153 | * Cs (Other, Surrogate) |
| 154 | * Co (Other, Private Use) |
| 155 | * Cn (Other, Not Assigned) |
| 156 | * Zl Separator, Line ('\u2028', LINE SEPARATOR) |
| 157 | * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) |
| 158 | * Zs (Separator, Space) other than ASCII space('\x20'). |
| 159 | */ |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 160 | int _PyUnicode_IsPrintable(Py_UCS4 ch) |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 161 | { |
| 162 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 163 | |
Georg Brandl | d52429f | 2008-07-04 15:55:02 +0000 | [diff] [blame] | 164 | return (ctype->flags & PRINTABLE_MASK) != 0; |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 165 | } |
| 166 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 167 | /* Returns 1 for Unicode characters having the category 'Ll', 0 |
| 168 | otherwise. */ |
| 169 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 170 | int _PyUnicode_IsLowercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 171 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 172 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 173 | |
| 174 | return (ctype->flags & LOWER_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 175 | } |
| 176 | |
| 177 | /* Returns 1 for Unicode characters having the category 'Lu', 0 |
| 178 | otherwise. */ |
| 179 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 180 | int _PyUnicode_IsUppercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 181 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 182 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 183 | |
| 184 | return (ctype->flags & UPPER_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 185 | } |
| 186 | |
| 187 | /* Returns the uppercase Unicode characters corresponding to ch or just |
| 188 | ch if no uppercase mapping is known. */ |
| 189 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 190 | Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 191 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 192 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 193 | |
| 194 | if (ctype->flags & EXTENDED_CASE_MASK) |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 195 | return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF]; |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 196 | return ch + ctype->upper; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 197 | } |
| 198 | |
| 199 | /* Returns the lowercase Unicode characters corresponding to ch or just |
| 200 | ch if no lowercase mapping is known. */ |
| 201 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 202 | Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 203 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 204 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 205 | |
| 206 | if (ctype->flags & EXTENDED_CASE_MASK) |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 207 | return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF]; |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 208 | return ch + ctype->lower; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 209 | } |
| 210 | |
| 211 | int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res) |
| 212 | { |
| 213 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 214 | |
| 215 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 216 | int index = ctype->lower & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 217 | int n = ctype->lower >> 24; |
| 218 | int i; |
| 219 | for (i = 0; i < n; i++) |
| 220 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 221 | return n; |
| 222 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 223 | res[0] = ch + ctype->lower; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 224 | return 1; |
| 225 | } |
| 226 | |
| 227 | int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res) |
| 228 | { |
| 229 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 230 | |
| 231 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 232 | int index = ctype->title & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 233 | int n = ctype->title >> 24; |
| 234 | int i; |
| 235 | for (i = 0; i < n; i++) |
| 236 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 237 | return n; |
| 238 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 239 | res[0] = ch + ctype->title; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 240 | return 1; |
| 241 | } |
| 242 | |
| 243 | int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res) |
| 244 | { |
| 245 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 246 | |
| 247 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 248 | int index = ctype->upper & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 249 | int n = ctype->upper >> 24; |
| 250 | int i; |
| 251 | for (i = 0; i < n; i++) |
| 252 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 253 | return n; |
| 254 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 255 | res[0] = ch + ctype->upper; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 256 | return 1; |
| 257 | } |
| 258 | |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 259 | int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res) |
| 260 | { |
| 261 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 262 | |
| 263 | if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) { |
| 264 | int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24); |
| 265 | int n = (ctype->lower >> 20) & 7; |
| 266 | int i; |
| 267 | for (i = 0; i < n; i++) |
| 268 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 269 | return n; |
| 270 | } |
| 271 | return _PyUnicode_ToLowerFull(ch, res); |
| 272 | } |
| 273 | |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 274 | int _PyUnicode_IsCased(Py_UCS4 ch) |
| 275 | { |
| 276 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 277 | |
| 278 | return (ctype->flags & CASED_MASK) != 0; |
| 279 | } |
| 280 | |
| 281 | int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch) |
| 282 | { |
| 283 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 284 | |
| 285 | return (ctype->flags & CASE_IGNORABLE_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 286 | } |
| 287 | |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 288 | /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', |
| 289 | 'Lo' or 'Lm', 0 otherwise. */ |
| 290 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 291 | int _PyUnicode_IsAlpha(Py_UCS4 ch) |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 292 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 293 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 294 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 295 | return (ctype->flags & ALPHA_MASK) != 0; |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 296 | } |
| 297 | |