Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 1 | /* |
| 2 | Unicode character type helpers. |
| 3 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 4 | Written by Marc-Andre Lemburg (mal@lemburg.com). |
| 5 | Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 6 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 7 | Copyright (c) Corporation for National Research Initiatives. |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 8 | |
| 9 | */ |
| 10 | |
| 11 | #include "Python.h" |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 12 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 13 | #define ALPHA_MASK 0x01 |
| 14 | #define DECIMAL_MASK 0x02 |
| 15 | #define DIGIT_MASK 0x04 |
| 16 | #define LOWER_MASK 0x08 |
| 17 | #define LINEBREAK_MASK 0x10 |
| 18 | #define SPACE_MASK 0x20 |
| 19 | #define TITLE_MASK 0x40 |
| 20 | #define UPPER_MASK 0x80 |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 21 | #define XID_START_MASK 0x100 |
| 22 | #define XID_CONTINUE_MASK 0x200 |
Georg Brandl | d52429f | 2008-07-04 15:55:02 +0000 | [diff] [blame] | 23 | #define PRINTABLE_MASK 0x400 |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 24 | #define NUMERIC_MASK 0x800 |
| 25 | #define CASE_IGNORABLE_MASK 0x1000 |
| 26 | #define CASED_MASK 0x2000 |
| 27 | #define EXTENDED_CASE_MASK 0x4000 |
Jack Jansen | 56cdce3 | 2000-07-06 13:57:38 +0000 | [diff] [blame] | 28 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 29 | typedef struct { |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 30 | /* |
| 31 | These are either deltas to the character or offsets in |
| 32 | _PyUnicode_ExtendedCase. |
| 33 | */ |
| 34 | const int upper; |
| 35 | const int lower; |
| 36 | const int title; |
Benjamin Peterson | 9487c4d | 2012-01-15 21:26:23 -0500 | [diff] [blame^] | 37 | /* Note if more flag space is needed, decimal and digit could be unified. */ |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 38 | const unsigned char decimal; |
| 39 | const unsigned char digit; |
Hye-Shik Chang | 974ed7c | 2004-06-02 16:49:17 +0000 | [diff] [blame] | 40 | const unsigned short flags; |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 41 | } _PyUnicode_TypeRecord; |
| 42 | |
| 43 | #include "unicodetype_db.h" |
| 44 | |
| 45 | static const _PyUnicode_TypeRecord * |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 46 | gettyperecord(Py_UCS4 code) |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 47 | { |
| 48 | int index; |
| 49 | |
Martin v. Löwis | 9def6a3 | 2002-10-18 16:11:54 +0000 | [diff] [blame] | 50 | if (code >= 0x110000) |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 51 | index = 0; |
Hye-Shik Chang | 7db07e6 | 2003-12-29 01:36:01 +0000 | [diff] [blame] | 52 | else |
Hye-Shik Chang | 7db07e6 | 2003-12-29 01:36:01 +0000 | [diff] [blame] | 53 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 54 | index = index1[(code>>SHIFT)]; |
| 55 | index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))]; |
| 56 | } |
Fredrik Lundh | ee13dba | 2001-06-26 20:36:12 +0000 | [diff] [blame] | 57 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 58 | return &_PyUnicode_TypeRecords[index]; |
| 59 | } |
Jack Jansen | 56cdce3 | 2000-07-06 13:57:38 +0000 | [diff] [blame] | 60 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 61 | /* Returns the titlecase Unicode characters corresponding to ch or just |
| 62 | ch if no titlecase mapping is known. */ |
| 63 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 64 | Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 65 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 66 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 67 | |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 68 | return ch + ctype->title; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 69 | } |
| 70 | |
| 71 | /* Returns 1 for Unicode characters having the category 'Lt', 0 |
| 72 | otherwise. */ |
| 73 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 74 | int _PyUnicode_IsTitlecase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 75 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 76 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 77 | |
| 78 | return (ctype->flags & TITLE_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 79 | } |
| 80 | |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 81 | /* Returns 1 for Unicode characters having the XID_Start property, 0 |
| 82 | otherwise. */ |
| 83 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 84 | int _PyUnicode_IsXidStart(Py_UCS4 ch) |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 85 | { |
| 86 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 87 | |
| 88 | return (ctype->flags & XID_START_MASK) != 0; |
| 89 | } |
| 90 | |
| 91 | /* Returns 1 for Unicode characters having the XID_Continue property, |
| 92 | 0 otherwise. */ |
| 93 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 94 | int _PyUnicode_IsXidContinue(Py_UCS4 ch) |
Martin v. Löwis | 13c3e38 | 2007-08-14 22:37:03 +0000 | [diff] [blame] | 95 | { |
| 96 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 97 | |
| 98 | return (ctype->flags & XID_CONTINUE_MASK) != 0; |
| 99 | } |
| 100 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 101 | /* Returns the integer decimal (0-9) for Unicode characters having |
| 102 | this property, -1 otherwise. */ |
| 103 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 104 | int _PyUnicode_ToDecimalDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 105 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 106 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 107 | |
| 108 | return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 109 | } |
| 110 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 111 | int _PyUnicode_IsDecimalDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 112 | { |
| 113 | if (_PyUnicode_ToDecimalDigit(ch) < 0) |
Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 114 | return 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 115 | return 1; |
| 116 | } |
| 117 | |
| 118 | /* Returns the integer digit (0-9) for Unicode characters having |
| 119 | this property, -1 otherwise. */ |
| 120 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 121 | int _PyUnicode_ToDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 122 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 123 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 124 | |
| 125 | return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 126 | } |
| 127 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 128 | int _PyUnicode_IsDigit(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 129 | { |
| 130 | if (_PyUnicode_ToDigit(ch) < 0) |
Alexander Belopolsky | f0f4514 | 2010-08-11 17:31:17 +0000 | [diff] [blame] | 131 | return 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 132 | return 1; |
| 133 | } |
| 134 | |
| 135 | /* Returns the numeric value as double for Unicode characters having |
| 136 | this property, -1.0 otherwise. */ |
| 137 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 138 | int _PyUnicode_IsNumeric(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 139 | { |
Amaury Forgeot d'Arc | 7d52079 | 2009-10-06 21:03:20 +0000 | [diff] [blame] | 140 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 141 | |
| 142 | return (ctype->flags & NUMERIC_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 143 | } |
| 144 | |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 145 | /* Returns 1 for Unicode characters to be hex-escaped when repr()ed, |
| 146 | 0 otherwise. |
| 147 | All characters except those characters defined in the Unicode character |
| 148 | database as following categories are considered printable. |
| 149 | * Cc (Other, Control) |
| 150 | * Cf (Other, Format) |
| 151 | * Cs (Other, Surrogate) |
| 152 | * Co (Other, Private Use) |
| 153 | * Cn (Other, Not Assigned) |
| 154 | * Zl Separator, Line ('\u2028', LINE SEPARATOR) |
| 155 | * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) |
| 156 | * Zs (Separator, Space) other than ASCII space('\x20'). |
| 157 | */ |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 158 | int _PyUnicode_IsPrintable(Py_UCS4 ch) |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 159 | { |
| 160 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 161 | |
Georg Brandl | d52429f | 2008-07-04 15:55:02 +0000 | [diff] [blame] | 162 | return (ctype->flags & PRINTABLE_MASK) != 0; |
Georg Brandl | 559e5d7 | 2008-06-11 18:37:52 +0000 | [diff] [blame] | 163 | } |
| 164 | |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 165 | /* Returns 1 for Unicode characters having the category 'Ll', 0 |
| 166 | otherwise. */ |
| 167 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 168 | int _PyUnicode_IsLowercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 169 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 170 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 171 | |
| 172 | return (ctype->flags & LOWER_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 173 | } |
| 174 | |
| 175 | /* Returns 1 for Unicode characters having the category 'Lu', 0 |
| 176 | otherwise. */ |
| 177 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 178 | int _PyUnicode_IsUppercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 179 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 180 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 181 | |
| 182 | return (ctype->flags & UPPER_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 183 | } |
| 184 | |
| 185 | /* Returns the uppercase Unicode characters corresponding to ch or just |
| 186 | ch if no uppercase mapping is known. */ |
| 187 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 188 | Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 189 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 190 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 191 | |
| 192 | if (ctype->flags & EXTENDED_CASE_MASK) |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 193 | return _PyUnicode_ExtendedCase[ctype->upper & 0xFFFF]; |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 194 | return ch + ctype->upper; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 195 | } |
| 196 | |
| 197 | /* Returns the lowercase Unicode characters corresponding to ch or just |
| 198 | ch if no lowercase mapping is known. */ |
| 199 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 200 | Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch) |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 201 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 202 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 203 | |
| 204 | if (ctype->flags & EXTENDED_CASE_MASK) |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 205 | return _PyUnicode_ExtendedCase[ctype->lower & 0xFFFF]; |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 206 | return ch + ctype->lower; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 207 | } |
| 208 | |
| 209 | int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res) |
| 210 | { |
| 211 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 212 | |
| 213 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 214 | int index = ctype->lower & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 215 | int n = ctype->lower >> 24; |
| 216 | int i; |
| 217 | for (i = 0; i < n; i++) |
| 218 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 219 | return n; |
| 220 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 221 | res[0] = ch + ctype->lower; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 222 | return 1; |
| 223 | } |
| 224 | |
| 225 | int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res) |
| 226 | { |
| 227 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 228 | |
| 229 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 230 | int index = ctype->title & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 231 | int n = ctype->title >> 24; |
| 232 | int i; |
| 233 | for (i = 0; i < n; i++) |
| 234 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 235 | return n; |
| 236 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 237 | res[0] = ch + ctype->title; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 238 | return 1; |
| 239 | } |
| 240 | |
| 241 | int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res) |
| 242 | { |
| 243 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 244 | |
| 245 | if (ctype->flags & EXTENDED_CASE_MASK) { |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 246 | int index = ctype->upper & 0xFFFF; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 247 | int n = ctype->upper >> 24; |
| 248 | int i; |
| 249 | for (i = 0; i < n; i++) |
| 250 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 251 | return n; |
| 252 | } |
Benjamin Peterson | ad9c569 | 2012-01-15 21:19:20 -0500 | [diff] [blame] | 253 | res[0] = ch + ctype->upper; |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 254 | return 1; |
| 255 | } |
| 256 | |
Benjamin Peterson | d5890c8 | 2012-01-14 13:23:30 -0500 | [diff] [blame] | 257 | int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res) |
| 258 | { |
| 259 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 260 | |
| 261 | if (ctype->flags & EXTENDED_CASE_MASK && (ctype->lower >> 20) & 7) { |
| 262 | int index = (ctype->lower & 0xFFFF) + (ctype->lower >> 24); |
| 263 | int n = (ctype->lower >> 20) & 7; |
| 264 | int i; |
| 265 | for (i = 0; i < n; i++) |
| 266 | res[i] = _PyUnicode_ExtendedCase[index + i]; |
| 267 | return n; |
| 268 | } |
| 269 | return _PyUnicode_ToLowerFull(ch, res); |
| 270 | } |
| 271 | |
Benjamin Peterson | b2bf01d | 2012-01-11 18:17:06 -0500 | [diff] [blame] | 272 | int _PyUnicode_IsCased(Py_UCS4 ch) |
| 273 | { |
| 274 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 275 | |
| 276 | return (ctype->flags & CASED_MASK) != 0; |
| 277 | } |
| 278 | |
| 279 | int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch) |
| 280 | { |
| 281 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
| 282 | |
| 283 | return (ctype->flags & CASE_IGNORABLE_MASK) != 0; |
Guido van Rossum | 603484d | 2000-03-10 22:52:46 +0000 | [diff] [blame] | 284 | } |
| 285 | |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 286 | /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt', |
| 287 | 'Lo' or 'Lm', 0 otherwise. */ |
| 288 | |
Amaury Forgeot d'Arc | 324ac65 | 2010-08-18 20:44:58 +0000 | [diff] [blame] | 289 | int _PyUnicode_IsAlpha(Py_UCS4 ch) |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 290 | { |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 291 | const _PyUnicode_TypeRecord *ctype = gettyperecord(ch); |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 292 | |
Fredrik Lundh | 9e7dd4c | 2000-09-25 21:48:13 +0000 | [diff] [blame] | 293 | return (ctype->flags & ALPHA_MASK) != 0; |
Marc-André Lemburg | f3938f5 | 2000-07-05 09:48:59 +0000 | [diff] [blame] | 294 | } |
| 295 | |