blob: 2afafb8da25c13bf067e70846d26aa51291c39a8 [file] [log] [blame]
Guido van Rossum603484d2000-03-10 22:52:46 +00001/*
2 Unicode character type helpers.
3
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00004 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Guido van Rossum603484d2000-03-10 22:52:46 +00006
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00007 Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum603484d2000-03-10 22:52:46 +00008
9*/
10
11#include "Python.h"
Guido van Rossum603484d2000-03-10 22:52:46 +000012#include "unicodeobject.h"
13
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000014#define ALPHA_MASK 0x01
15#define DECIMAL_MASK 0x02
16#define DIGIT_MASK 0x04
17#define LOWER_MASK 0x08
18#define LINEBREAK_MASK 0x10
19#define SPACE_MASK 0x20
20#define TITLE_MASK 0x40
21#define UPPER_MASK 0x80
Martin v. Löwis24329ba2008-09-10 13:38:12 +000022#define NODELTA_MASK 0x100
Jack Jansen56cdce32000-07-06 13:57:38 +000023
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000024typedef struct {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000025 const Py_UNICODE upper;
26 const Py_UNICODE lower;
27 const Py_UNICODE title;
28 const unsigned char decimal;
29 const unsigned char digit;
Hye-Shik Chang974ed7c2004-06-02 16:49:17 +000030 const unsigned short flags;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000031} _PyUnicode_TypeRecord;
32
33#include "unicodetype_db.h"
34
35static const _PyUnicode_TypeRecord *
Fredrik Lundhee13dba2001-06-26 20:36:12 +000036gettyperecord(Py_UNICODE code)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000037{
38 int index;
39
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000040#ifdef Py_UNICODE_WIDE
Martin v. Löwis9def6a32002-10-18 16:11:54 +000041 if (code >= 0x110000)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000042 index = 0;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000043 else
44#endif
45 {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000046 index = index1[(code>>SHIFT)];
47 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
48 }
Fredrik Lundhee13dba2001-06-26 20:36:12 +000049
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000050 return &_PyUnicode_TypeRecords[index];
51}
Jack Jansen56cdce32000-07-06 13:57:38 +000052
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000053/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
54 type 'B', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +000055
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000056int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000057{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000058 switch (ch) {
59 case 0x000A: /* LINE FEED */
60 case 0x000D: /* CARRIAGE RETURN */
61 case 0x001C: /* FILE SEPARATOR */
62 case 0x001D: /* GROUP SEPARATOR */
63 case 0x001E: /* RECORD SEPARATOR */
64 case 0x0085: /* NEXT LINE */
65 case 0x2028: /* LINE SEPARATOR */
66 case 0x2029: /* PARAGRAPH SEPARATOR */
67 return 1;
68 default:
69 return 0;
70 }
Guido van Rossum603484d2000-03-10 22:52:46 +000071}
72
73/* Returns the titlecase Unicode characters corresponding to ch or just
74 ch if no titlecase mapping is known. */
75
Martin v. Löwisce9b5a52001-06-27 06:28:56 +000076Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000077{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000078 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwis99f27792009-04-26 00:53:18 +000079 int delta = ctype->title;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000080
Martin v. Löwis24329ba2008-09-10 13:38:12 +000081 if (ctype->flags & NODELTA_MASK)
82 return delta;
83
Martin v. Löwisedf368c2002-10-18 16:40:36 +000084 if (delta >= 32768)
85 delta -= 65536;
86
87 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +000088}
89
90/* Returns 1 for Unicode characters having the category 'Lt', 0
91 otherwise. */
92
Fredrik Lundh72b06852001-06-27 22:08:26 +000093int _PyUnicode_IsTitlecase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000094{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000095 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
96
97 return (ctype->flags & TITLE_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +000098}
99
100/* Returns the integer decimal (0-9) for Unicode characters having
101 this property, -1 otherwise. */
102
Fredrik Lundh72b06852001-06-27 22:08:26 +0000103int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000104{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000105 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
106
107 return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000108}
109
Fredrik Lundh72b06852001-06-27 22:08:26 +0000110int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000111{
112 if (_PyUnicode_ToDecimalDigit(ch) < 0)
113 return 0;
114 return 1;
115}
116
117/* Returns the integer digit (0-9) for Unicode characters having
118 this property, -1 otherwise. */
119
Fredrik Lundh72b06852001-06-27 22:08:26 +0000120int _PyUnicode_ToDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000121{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000122 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
123
124 return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000125}
126
Fredrik Lundh72b06852001-06-27 22:08:26 +0000127int _PyUnicode_IsDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000128{
129 if (_PyUnicode_ToDigit(ch) < 0)
130 return 0;
131 return 1;
132}
133
134/* Returns the numeric value as double for Unicode characters having
135 this property, -1.0 otherwise. */
136
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000137/* TODO: replace with unicodetype_db.h table */
138
Fredrik Lundh72b06852001-06-27 22:08:26 +0000139double _PyUnicode_ToNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000140{
141 switch (ch) {
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000142 case 0x0F33:
143 return (double) -1 / 2;
144 case 0x17F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000145 case 0x3007:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000146#ifdef Py_UNICODE_WIDE
147 case 0x1018A:
148#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000149 return (double) 0;
150 case 0x09F4:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000151 case 0x17F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000152 case 0x215F:
153 case 0x2160:
154 case 0x2170:
155 case 0x3021:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000156 case 0x3192:
157 case 0x3220:
Guido van Rossum603484d2000-03-10 22:52:46 +0000158 case 0x3280:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000159#ifdef Py_UNICODE_WIDE
160 case 0x10107:
161 case 0x10142:
162 case 0x10158:
163 case 0x10159:
164 case 0x1015A:
165 case 0x10320:
166 case 0x103D1:
167#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000168 return (double) 1;
169 case 0x00BD:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000170 case 0x0F2A:
171 case 0x2CFD:
172#ifdef Py_UNICODE_WIDE
173 case 0x10141:
174 case 0x10175:
175 case 0x10176:
176#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000177 return (double) 1 / 2;
178 case 0x2153:
179 return (double) 1 / 3;
180 case 0x00BC:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000181#ifdef Py_UNICODE_WIDE
182 case 0x10140:
183#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000184 return (double) 1 / 4;
185 case 0x2155:
186 return (double) 1 / 5;
187 case 0x2159:
188 return (double) 1 / 6;
189 case 0x215B:
190 return (double) 1 / 8;
191 case 0x0BF0:
192 case 0x1372:
193 case 0x2169:
194 case 0x2179:
195 case 0x2469:
196 case 0x247D:
197 case 0x2491:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000198 case 0x24FE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000199 case 0x277F:
200 case 0x2789:
201 case 0x2793:
202 case 0x3038:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000203 case 0x3229:
Guido van Rossum603484d2000-03-10 22:52:46 +0000204 case 0x3289:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000205#ifdef Py_UNICODE_WIDE
206 case 0x10110:
207 case 0x10149:
208 case 0x10150:
209 case 0x10157:
210 case 0x10160:
211 case 0x10161:
212 case 0x10162:
213 case 0x10163:
214 case 0x10164:
215 case 0x10322:
216 case 0x103D3:
217 case 0x10A44:
218#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000219 return (double) 10;
220 case 0x0BF1:
221 case 0x137B:
222 case 0x216D:
223 case 0x217D:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000224#ifdef Py_UNICODE_WIDE
225 case 0x10119:
226 case 0x1014B:
227 case 0x10152:
228 case 0x1016A:
229 case 0x103D5:
230 case 0x10A46:
231#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000232 return (double) 100;
233 case 0x0BF2:
234 case 0x216F:
235 case 0x217F:
236 case 0x2180:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000237#ifdef Py_UNICODE_WIDE
238 case 0x10122:
239 case 0x1014D:
240 case 0x10154:
241 case 0x10171:
242 case 0x10A47:
243#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000244 return (double) 1000;
245 case 0x137C:
246 case 0x2182:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000247#ifdef Py_UNICODE_WIDE
248 case 0x1012B:
249 case 0x10155:
250#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000251 return (double) 10000;
252 case 0x216A:
253 case 0x217A:
254 case 0x246A:
255 case 0x247E:
256 case 0x2492:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000257 case 0x24EB:
Guido van Rossum603484d2000-03-10 22:52:46 +0000258 return (double) 11;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000259 case 0x0F2F:
260 return (double) 11 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000261 case 0x216B:
262 case 0x217B:
263 case 0x246B:
264 case 0x247F:
265 case 0x2493:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000266 case 0x24EC:
Guido van Rossum603484d2000-03-10 22:52:46 +0000267 return (double) 12;
268 case 0x246C:
269 case 0x2480:
270 case 0x2494:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000271 case 0x24ED:
Guido van Rossum603484d2000-03-10 22:52:46 +0000272 return (double) 13;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000273 case 0x0F30:
274 return (double) 13 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000275 case 0x246D:
276 case 0x2481:
277 case 0x2495:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000278 case 0x24EE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000279 return (double) 14;
280 case 0x246E:
281 case 0x2482:
282 case 0x2496:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000283 case 0x24EF:
Guido van Rossum603484d2000-03-10 22:52:46 +0000284 return (double) 15;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000285 case 0x0F31:
286 return (double) 15 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000287 case 0x09F9:
288 case 0x246F:
289 case 0x2483:
290 case 0x2497:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000291 case 0x24F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000292 return (double) 16;
293 case 0x16EE:
294 case 0x2470:
295 case 0x2484:
296 case 0x2498:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000297 case 0x24F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000298 return (double) 17;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000299 case 0x0F32:
300 return (double) 17 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000301 case 0x16EF:
302 case 0x2471:
303 case 0x2485:
304 case 0x2499:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000305 case 0x24F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000306 return (double) 18;
307 case 0x16F0:
308 case 0x2472:
309 case 0x2486:
310 case 0x249A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000311 case 0x24F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000312 return (double) 19;
313 case 0x09F5:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000314 case 0x17F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000315 case 0x2161:
316 case 0x2171:
317 case 0x3022:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000318 case 0x3193:
319 case 0x3221:
Guido van Rossum603484d2000-03-10 22:52:46 +0000320 case 0x3281:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000321#ifdef Py_UNICODE_WIDE
322 case 0x10108:
323 case 0x1015B:
324 case 0x1015C:
325 case 0x1015D:
326 case 0x1015E:
327 case 0x103D2:
328#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000329 return (double) 2;
330 case 0x2154:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000331#ifdef Py_UNICODE_WIDE
332 case 0x10177:
333#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000334 return (double) 2 / 3;
335 case 0x2156:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000336 return (double) 2 / 5;
Guido van Rossum603484d2000-03-10 22:52:46 +0000337 case 0x1373:
338 case 0x2473:
339 case 0x2487:
340 case 0x249B:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000341 case 0x24F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000342 case 0x3039:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000343#ifdef Py_UNICODE_WIDE
344 case 0x10111:
345 case 0x103D4:
346 case 0x10A45:
347#endif
348 return (double) 20;
349#ifdef Py_UNICODE_WIDE
350 case 0x1011A:
351 return (double) 200;
352 case 0x10123:
353 return (double) 2000;
354 case 0x1012C:
355 return (double) 20000;
356#endif
357 case 0x3251:
358 return (double) 21;
359 case 0x3252:
360 return (double) 22;
361 case 0x3253:
362 return (double) 23;
363 case 0x3254:
364 return (double) 24;
365 case 0x3255:
366 return (double) 25;
367 case 0x3256:
368 return (double) 26;
369 case 0x3257:
370 return (double) 27;
371 case 0x3258:
372 return (double) 28;
373 case 0x3259:
374 return (double) 29;
Guido van Rossum603484d2000-03-10 22:52:46 +0000375 case 0x09F6:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000376 case 0x17F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000377 case 0x2162:
378 case 0x2172:
379 case 0x3023:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000380 case 0x3194:
381 case 0x3222:
Guido van Rossum603484d2000-03-10 22:52:46 +0000382 case 0x3282:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000383#ifdef Py_UNICODE_WIDE
384 case 0x10109:
385#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000386 return (double) 3;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000387 case 0x0F2B:
388 return (double) 3 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000389 case 0x00BE:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000390#ifdef Py_UNICODE_WIDE
391 case 0x10178:
392#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000393 return (double) 3 / 4;
394 case 0x2157:
395 return (double) 3 / 5;
396 case 0x215C:
397 return (double) 3 / 8;
398 case 0x1374:
399 case 0x303A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000400 case 0x325A:
401#ifdef Py_UNICODE_WIDE
402 case 0x10112:
403 case 0x10165:
404#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000405 return (double) 30;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000406#ifdef Py_UNICODE_WIDE
407 case 0x1011B:
408 case 0x1016B:
409 return (double) 300;
410 case 0x10124:
411 return (double) 3000;
412 case 0x1012D:
413 return (double) 30000;
414#endif
415 case 0x325B:
416 return (double) 31;
417 case 0x325C:
418 return (double) 32;
419 case 0x325D:
420 return (double) 33;
421 case 0x325E:
422 return (double) 34;
423 case 0x325F:
424 return (double) 35;
425 case 0x32B1:
426 return (double) 36;
427 case 0x32B2:
428 return (double) 37;
429 case 0x32B3:
430 return (double) 38;
431 case 0x32B4:
432 return (double) 39;
Guido van Rossum603484d2000-03-10 22:52:46 +0000433 case 0x09F7:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000434 case 0x17F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000435 case 0x2163:
436 case 0x2173:
437 case 0x3024:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000438 case 0x3195:
439 case 0x3223:
Guido van Rossum603484d2000-03-10 22:52:46 +0000440 case 0x3283:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000441#ifdef Py_UNICODE_WIDE
442 case 0x1010A:
443#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000444 return (double) 4;
445 case 0x2158:
446 return (double) 4 / 5;
447 case 0x1375:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000448 case 0x32B5:
449#ifdef Py_UNICODE_WIDE
450 case 0x10113:
451#endif
452 return (double) 40;
453#ifdef Py_UNICODE_WIDE
454 case 0x1011C:
455 return (double) 400;
456 case 0x10125:
457 return (double) 4000;
458 case 0x1012E:
459 return (double) 40000;
460#endif
461 case 0x32B6:
462 return (double) 41;
463 case 0x32B7:
464 return (double) 42;
465 case 0x32B8:
466 return (double) 43;
467 case 0x32B9:
468 return (double) 44;
469 case 0x32BA:
470 return (double) 45;
471 case 0x32BB:
472 return (double) 46;
473 case 0x32BC:
474 return (double) 47;
475 case 0x32BD:
476 return (double) 48;
477 case 0x32BE:
478 return (double) 49;
479 case 0x17F5:
Guido van Rossum603484d2000-03-10 22:52:46 +0000480 case 0x2164:
481 case 0x2174:
482 case 0x3025:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000483 case 0x3224:
Guido van Rossum603484d2000-03-10 22:52:46 +0000484 case 0x3284:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000485#ifdef Py_UNICODE_WIDE
486 case 0x1010B:
487 case 0x10143:
488 case 0x10148:
489 case 0x1014F:
490 case 0x1015F:
491 case 0x10173:
492 case 0x10321:
493#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000494 return (double) 5;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000495 case 0x0F2C:
496 return (double) 5 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000497 case 0x215A:
498 return (double) 5 / 6;
499 case 0x215D:
500 return (double) 5 / 8;
501 case 0x1376:
502 case 0x216C:
503 case 0x217C:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000504 case 0x32BF:
505#ifdef Py_UNICODE_WIDE
506 case 0x10114:
507 case 0x10144:
508 case 0x1014A:
509 case 0x10151:
510 case 0x10166:
511 case 0x10167:
512 case 0x10168:
513 case 0x10169:
514 case 0x10174:
515 case 0x10323:
516#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000517 return (double) 50;
518 case 0x216E:
519 case 0x217E:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000520#ifdef Py_UNICODE_WIDE
521 case 0x1011D:
522 case 0x10145:
523 case 0x1014C:
524 case 0x10153:
525 case 0x1016C:
526 case 0x1016D:
527 case 0x1016E:
528 case 0x1016F:
529 case 0x10170:
530#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000531 return (double) 500;
532 case 0x2181:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000533#ifdef Py_UNICODE_WIDE
534 case 0x10126:
535 case 0x10146:
536 case 0x1014E:
537 case 0x10172:
538#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000539 return (double) 5000;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000540#ifdef Py_UNICODE_WIDE
541 case 0x1012F:
542 case 0x10147:
543 case 0x10156:
544 return (double) 50000;
545#endif
546 case 0x17F6:
Guido van Rossum603484d2000-03-10 22:52:46 +0000547 case 0x2165:
548 case 0x2175:
549 case 0x3026:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000550 case 0x3225:
Guido van Rossum603484d2000-03-10 22:52:46 +0000551 case 0x3285:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000552#ifdef Py_UNICODE_WIDE
553 case 0x1010C:
554#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000555 return (double) 6;
556 case 0x1377:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000557#ifdef Py_UNICODE_WIDE
558 case 0x10115:
559#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000560 return (double) 60;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000561#ifdef Py_UNICODE_WIDE
562 case 0x1011E:
563 return (double) 600;
564 case 0x10127:
565 return (double) 6000;
566 case 0x10130:
567 return (double) 60000;
568#endif
569 case 0x17F7:
Guido van Rossum603484d2000-03-10 22:52:46 +0000570 case 0x2166:
571 case 0x2176:
572 case 0x3027:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000573 case 0x3226:
Guido van Rossum603484d2000-03-10 22:52:46 +0000574 case 0x3286:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000575#ifdef Py_UNICODE_WIDE
576 case 0x1010D:
577#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000578 return (double) 7;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000579 case 0x0F2D:
580 return (double) 7 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000581 case 0x215E:
582 return (double) 7 / 8;
583 case 0x1378:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000584#ifdef Py_UNICODE_WIDE
585 case 0x10116:
586#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000587 return (double) 70;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000588#ifdef Py_UNICODE_WIDE
589 case 0x1011F:
590 return (double) 700;
591 case 0x10128:
592 return (double) 7000;
593 case 0x10131:
594 return (double) 70000;
595#endif
596 case 0x17F8:
Guido van Rossum603484d2000-03-10 22:52:46 +0000597 case 0x2167:
598 case 0x2177:
599 case 0x3028:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000600 case 0x3227:
Guido van Rossum603484d2000-03-10 22:52:46 +0000601 case 0x3287:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000602#ifdef Py_UNICODE_WIDE
603 case 0x1010E:
604#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000605 return (double) 8;
606 case 0x1379:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000607#ifdef Py_UNICODE_WIDE
608 case 0x10117:
609#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000610 return (double) 80;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000611#ifdef Py_UNICODE_WIDE
612 case 0x10120:
613 return (double) 800;
614 case 0x10129:
615 return (double) 8000;
616 case 0x10132:
617 return (double) 80000;
618#endif
619 case 0x17F9:
Guido van Rossum603484d2000-03-10 22:52:46 +0000620 case 0x2168:
621 case 0x2178:
622 case 0x3029:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000623 case 0x3228:
Guido van Rossum603484d2000-03-10 22:52:46 +0000624 case 0x3288:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000625#ifdef Py_UNICODE_WIDE
626 case 0x1010F:
627#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000628 return (double) 9;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000629 case 0x0F2E:
630 return (double) 9 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000631 case 0x137A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000632#ifdef Py_UNICODE_WIDE
633 case 0x10118:
634#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000635 return (double) 90;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000636#ifdef Py_UNICODE_WIDE
637 case 0x10121:
638 case 0x1034A:
639 return (double) 900;
640 case 0x1012A:
641 return (double) 9000;
642 case 0x10133:
643 return (double) 90000;
644#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000645 default:
646 return (double) _PyUnicode_ToDigit(ch);
647 }
648}
649
Fredrik Lundh72b06852001-06-27 22:08:26 +0000650int _PyUnicode_IsNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000651{
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000652 return _PyUnicode_ToNumeric(ch) != -1.0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000653}
654
655#ifndef WANT_WCTYPE_FUNCTIONS
656
Guido van Rossumdc742b32000-04-11 15:39:02 +0000657/* Returns 1 for Unicode characters having the bidirectional type
658 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +0000659
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000660int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000661{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000662 switch (ch) {
663 case 0x0009: /* HORIZONTAL TABULATION */
664 case 0x000A: /* LINE FEED */
665 case 0x000B: /* VERTICAL TABULATION */
666 case 0x000C: /* FORM FEED */
667 case 0x000D: /* CARRIAGE RETURN */
668 case 0x001C: /* FILE SEPARATOR */
669 case 0x001D: /* GROUP SEPARATOR */
670 case 0x001E: /* RECORD SEPARATOR */
671 case 0x001F: /* UNIT SEPARATOR */
672 case 0x0020: /* SPACE */
673 case 0x0085: /* NEXT LINE */
674 case 0x00A0: /* NO-BREAK SPACE */
675 case 0x1680: /* OGHAM SPACE MARK */
676 case 0x2000: /* EN QUAD */
677 case 0x2001: /* EM QUAD */
678 case 0x2002: /* EN SPACE */
679 case 0x2003: /* EM SPACE */
680 case 0x2004: /* THREE-PER-EM SPACE */
681 case 0x2005: /* FOUR-PER-EM SPACE */
682 case 0x2006: /* SIX-PER-EM SPACE */
683 case 0x2007: /* FIGURE SPACE */
684 case 0x2008: /* PUNCTUATION SPACE */
685 case 0x2009: /* THIN SPACE */
686 case 0x200A: /* HAIR SPACE */
687 case 0x200B: /* ZERO WIDTH SPACE */
688 case 0x2028: /* LINE SEPARATOR */
689 case 0x2029: /* PARAGRAPH SEPARATOR */
690 case 0x202F: /* NARROW NO-BREAK SPACE */
691 case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
692 case 0x3000: /* IDEOGRAPHIC SPACE */
693 return 1;
694 default:
695 return 0;
696 }
Guido van Rossum603484d2000-03-10 22:52:46 +0000697}
698
699/* Returns 1 for Unicode characters having the category 'Ll', 0
700 otherwise. */
701
Fredrik Lundh72b06852001-06-27 22:08:26 +0000702int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000703{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000704 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
705
706 return (ctype->flags & LOWER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000707}
708
709/* Returns 1 for Unicode characters having the category 'Lu', 0
710 otherwise. */
711
Fredrik Lundh72b06852001-06-27 22:08:26 +0000712int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000713{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000714 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
715
716 return (ctype->flags & UPPER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000717}
718
719/* Returns the uppercase Unicode characters corresponding to ch or just
720 ch if no uppercase mapping is known. */
721
Fredrik Lundh72b06852001-06-27 22:08:26 +0000722Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000723{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000724 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000725 int delta = ctype->upper;
Martin v. Löwis24329ba2008-09-10 13:38:12 +0000726 if (ctype->flags & NODELTA_MASK)
727 return delta;
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000728 if (delta >= 32768)
729 delta -= 65536;
730 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000731}
732
733/* Returns the lowercase Unicode characters corresponding to ch or just
734 ch if no lowercase mapping is known. */
735
Fredrik Lundh72b06852001-06-27 22:08:26 +0000736Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000737{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000738 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000739 int delta = ctype->lower;
Martin v. Löwis24329ba2008-09-10 13:38:12 +0000740 if (ctype->flags & NODELTA_MASK)
741 return delta;
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000742 if (delta >= 32768)
743 delta -= 65536;
744 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000745}
746
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000747/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
748 'Lo' or 'Lm', 0 otherwise. */
749
Fredrik Lundh72b06852001-06-27 22:08:26 +0000750int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000751{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000752 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000753
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000754 return (ctype->flags & ALPHA_MASK) != 0;
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000755}
756
Guido van Rossum603484d2000-03-10 22:52:46 +0000757#else
758
759/* Export the interfaces using the wchar_t type for portability
760 reasons: */
761
Fredrik Lundh72b06852001-06-27 22:08:26 +0000762int _PyUnicode_IsWhitespace(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000763{
764 return iswspace(ch);
765}
766
Fredrik Lundh72b06852001-06-27 22:08:26 +0000767int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000768{
769 return iswlower(ch);
770}
771
Fredrik Lundh72b06852001-06-27 22:08:26 +0000772int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000773{
774 return iswupper(ch);
775}
776
Fredrik Lundh72b06852001-06-27 22:08:26 +0000777Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000778{
779 return towlower(ch);
780}
781
Fredrik Lundh72b06852001-06-27 22:08:26 +0000782Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000783{
784 return towupper(ch);
785}
786
Fredrik Lundh72b06852001-06-27 22:08:26 +0000787int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000788{
789 return iswalpha(ch);
790}
791
Guido van Rossum603484d2000-03-10 22:52:46 +0000792#endif