blob: 73def09dbb88cc887fedc73ae97433abfe791d87 [file] [log] [blame]
Guido van Rossum603484d2000-03-10 22:52:46 +00001/*
2 Unicode character type helpers.
3
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00004 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Guido van Rossum603484d2000-03-10 22:52:46 +00006
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00007 Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum603484d2000-03-10 22:52:46 +00008
9*/
10
11#include "Python.h"
Guido van Rossum603484d2000-03-10 22:52:46 +000012#include "unicodeobject.h"
13
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000014#define ALPHA_MASK 0x01
15#define DECIMAL_MASK 0x02
16#define DIGIT_MASK 0x04
17#define LOWER_MASK 0x08
18#define LINEBREAK_MASK 0x10
19#define SPACE_MASK 0x20
20#define TITLE_MASK 0x40
21#define UPPER_MASK 0x80
Jack Jansen56cdce32000-07-06 13:57:38 +000022
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000023typedef struct {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000024 const Py_UNICODE upper;
25 const Py_UNICODE lower;
26 const Py_UNICODE title;
27 const unsigned char decimal;
28 const unsigned char digit;
Hye-Shik Chang974ed7c2004-06-02 16:49:17 +000029 const unsigned short flags;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000030} _PyUnicode_TypeRecord;
31
32#include "unicodetype_db.h"
33
34static const _PyUnicode_TypeRecord *
Fredrik Lundhee13dba2001-06-26 20:36:12 +000035gettyperecord(Py_UNICODE code)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000036{
37 int index;
38
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000039#ifdef Py_UNICODE_WIDE
Martin v. Löwis9def6a32002-10-18 16:11:54 +000040 if (code >= 0x110000)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000041 index = 0;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000042 else
43#endif
44 {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000045 index = index1[(code>>SHIFT)];
46 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
47 }
Fredrik Lundhee13dba2001-06-26 20:36:12 +000048
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000049 return &_PyUnicode_TypeRecords[index];
50}
Jack Jansen56cdce32000-07-06 13:57:38 +000051
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000052/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
53 type 'B', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +000054
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000055int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000056{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000057 switch (ch) {
58 case 0x000A: /* LINE FEED */
59 case 0x000D: /* CARRIAGE RETURN */
60 case 0x001C: /* FILE SEPARATOR */
61 case 0x001D: /* GROUP SEPARATOR */
62 case 0x001E: /* RECORD SEPARATOR */
63 case 0x0085: /* NEXT LINE */
64 case 0x2028: /* LINE SEPARATOR */
65 case 0x2029: /* PARAGRAPH SEPARATOR */
66 return 1;
67 default:
68 return 0;
69 }
Guido van Rossum603484d2000-03-10 22:52:46 +000070}
71
72/* Returns the titlecase Unicode characters corresponding to ch or just
73 ch if no titlecase mapping is known. */
74
Martin v. Löwisce9b5a52001-06-27 06:28:56 +000075Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000076{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000077 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +000078 int delta;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000079
80 if (ctype->title)
Martin v. Löwisedf368c2002-10-18 16:40:36 +000081 delta = ctype->title;
Martin v. Löwisce9b5a52001-06-27 06:28:56 +000082 else
Martin v. Löwisedf368c2002-10-18 16:40:36 +000083 delta = ctype->upper;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000084
Martin v. Löwisedf368c2002-10-18 16:40:36 +000085 if (delta >= 32768)
86 delta -= 65536;
87
88 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +000089}
90
91/* Returns 1 for Unicode characters having the category 'Lt', 0
92 otherwise. */
93
Fredrik Lundh72b06852001-06-27 22:08:26 +000094int _PyUnicode_IsTitlecase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000095{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000096 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
97
98 return (ctype->flags & TITLE_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +000099}
100
101/* Returns the integer decimal (0-9) for Unicode characters having
102 this property, -1 otherwise. */
103
Fredrik Lundh72b06852001-06-27 22:08:26 +0000104int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000105{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000106 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
107
108 return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000109}
110
Fredrik Lundh72b06852001-06-27 22:08:26 +0000111int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000112{
113 if (_PyUnicode_ToDecimalDigit(ch) < 0)
114 return 0;
115 return 1;
116}
117
118/* Returns the integer digit (0-9) for Unicode characters having
119 this property, -1 otherwise. */
120
Fredrik Lundh72b06852001-06-27 22:08:26 +0000121int _PyUnicode_ToDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000122{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000123 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
124
125 return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000126}
127
Fredrik Lundh72b06852001-06-27 22:08:26 +0000128int _PyUnicode_IsDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000129{
130 if (_PyUnicode_ToDigit(ch) < 0)
131 return 0;
132 return 1;
133}
134
135/* Returns the numeric value as double for Unicode characters having
136 this property, -1.0 otherwise. */
137
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000138/* TODO: replace with unicodetype_db.h table */
139
Fredrik Lundh72b06852001-06-27 22:08:26 +0000140double _PyUnicode_ToNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000141{
142 switch (ch) {
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000143 case 0x0F33:
144 return (double) -1 / 2;
145 case 0x17F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000146 case 0x3007:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000147#ifdef Py_UNICODE_WIDE
148 case 0x1018A:
149#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000150 return (double) 0;
151 case 0x09F4:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000152 case 0x17F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000153 case 0x215F:
154 case 0x2160:
155 case 0x2170:
156 case 0x3021:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000157 case 0x3192:
158 case 0x3220:
Guido van Rossum603484d2000-03-10 22:52:46 +0000159 case 0x3280:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000160#ifdef Py_UNICODE_WIDE
161 case 0x10107:
162 case 0x10142:
163 case 0x10158:
164 case 0x10159:
165 case 0x1015A:
166 case 0x10320:
167 case 0x103D1:
168#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000169 return (double) 1;
170 case 0x00BD:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000171 case 0x0F2A:
172 case 0x2CFD:
173#ifdef Py_UNICODE_WIDE
174 case 0x10141:
175 case 0x10175:
176 case 0x10176:
177#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000178 return (double) 1 / 2;
179 case 0x2153:
180 return (double) 1 / 3;
181 case 0x00BC:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000182#ifdef Py_UNICODE_WIDE
183 case 0x10140:
184#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000185 return (double) 1 / 4;
186 case 0x2155:
187 return (double) 1 / 5;
188 case 0x2159:
189 return (double) 1 / 6;
190 case 0x215B:
191 return (double) 1 / 8;
192 case 0x0BF0:
193 case 0x1372:
194 case 0x2169:
195 case 0x2179:
196 case 0x2469:
197 case 0x247D:
198 case 0x2491:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000199 case 0x24FE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000200 case 0x277F:
201 case 0x2789:
202 case 0x2793:
203 case 0x3038:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000204 case 0x3229:
Guido van Rossum603484d2000-03-10 22:52:46 +0000205 case 0x3289:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000206#ifdef Py_UNICODE_WIDE
207 case 0x10110:
208 case 0x10149:
209 case 0x10150:
210 case 0x10157:
211 case 0x10160:
212 case 0x10161:
213 case 0x10162:
214 case 0x10163:
215 case 0x10164:
216 case 0x10322:
217 case 0x103D3:
218 case 0x10A44:
219#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000220 return (double) 10;
221 case 0x0BF1:
222 case 0x137B:
223 case 0x216D:
224 case 0x217D:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000225#ifdef Py_UNICODE_WIDE
226 case 0x10119:
227 case 0x1014B:
228 case 0x10152:
229 case 0x1016A:
230 case 0x103D5:
231 case 0x10A46:
232#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000233 return (double) 100;
234 case 0x0BF2:
235 case 0x216F:
236 case 0x217F:
237 case 0x2180:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000238#ifdef Py_UNICODE_WIDE
239 case 0x10122:
240 case 0x1014D:
241 case 0x10154:
242 case 0x10171:
243 case 0x10A47:
244#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000245 return (double) 1000;
246 case 0x137C:
247 case 0x2182:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000248#ifdef Py_UNICODE_WIDE
249 case 0x1012B:
250 case 0x10155:
251#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000252 return (double) 10000;
253 case 0x216A:
254 case 0x217A:
255 case 0x246A:
256 case 0x247E:
257 case 0x2492:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000258 case 0x24EB:
Guido van Rossum603484d2000-03-10 22:52:46 +0000259 return (double) 11;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000260 case 0x0F2F:
261 return (double) 11 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000262 case 0x216B:
263 case 0x217B:
264 case 0x246B:
265 case 0x247F:
266 case 0x2493:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000267 case 0x24EC:
Guido van Rossum603484d2000-03-10 22:52:46 +0000268 return (double) 12;
269 case 0x246C:
270 case 0x2480:
271 case 0x2494:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000272 case 0x24ED:
Guido van Rossum603484d2000-03-10 22:52:46 +0000273 return (double) 13;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000274 case 0x0F30:
275 return (double) 13 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000276 case 0x246D:
277 case 0x2481:
278 case 0x2495:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000279 case 0x24EE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000280 return (double) 14;
281 case 0x246E:
282 case 0x2482:
283 case 0x2496:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000284 case 0x24EF:
Guido van Rossum603484d2000-03-10 22:52:46 +0000285 return (double) 15;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000286 case 0x0F31:
287 return (double) 15 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000288 case 0x09F9:
289 case 0x246F:
290 case 0x2483:
291 case 0x2497:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000292 case 0x24F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000293 return (double) 16;
294 case 0x16EE:
295 case 0x2470:
296 case 0x2484:
297 case 0x2498:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000298 case 0x24F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000299 return (double) 17;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000300 case 0x0F32:
301 return (double) 17 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000302 case 0x16EF:
303 case 0x2471:
304 case 0x2485:
305 case 0x2499:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000306 case 0x24F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000307 return (double) 18;
308 case 0x16F0:
309 case 0x2472:
310 case 0x2486:
311 case 0x249A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000312 case 0x24F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000313 return (double) 19;
314 case 0x09F5:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000315 case 0x17F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000316 case 0x2161:
317 case 0x2171:
318 case 0x3022:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000319 case 0x3193:
320 case 0x3221:
Guido van Rossum603484d2000-03-10 22:52:46 +0000321 case 0x3281:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000322#ifdef Py_UNICODE_WIDE
323 case 0x10108:
324 case 0x1015B:
325 case 0x1015C:
326 case 0x1015D:
327 case 0x1015E:
328 case 0x103D2:
329#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000330 return (double) 2;
331 case 0x2154:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000332#ifdef Py_UNICODE_WIDE
333 case 0x10177:
334#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000335 return (double) 2 / 3;
336 case 0x2156:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000337 return (double) 2 / 5;
Guido van Rossum603484d2000-03-10 22:52:46 +0000338 case 0x1373:
339 case 0x2473:
340 case 0x2487:
341 case 0x249B:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000342 case 0x24F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000343 case 0x3039:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000344#ifdef Py_UNICODE_WIDE
345 case 0x10111:
346 case 0x103D4:
347 case 0x10A45:
348#endif
349 return (double) 20;
350#ifdef Py_UNICODE_WIDE
351 case 0x1011A:
352 return (double) 200;
353 case 0x10123:
354 return (double) 2000;
355 case 0x1012C:
356 return (double) 20000;
357#endif
358 case 0x3251:
359 return (double) 21;
360 case 0x3252:
361 return (double) 22;
362 case 0x3253:
363 return (double) 23;
364 case 0x3254:
365 return (double) 24;
366 case 0x3255:
367 return (double) 25;
368 case 0x3256:
369 return (double) 26;
370 case 0x3257:
371 return (double) 27;
372 case 0x3258:
373 return (double) 28;
374 case 0x3259:
375 return (double) 29;
Guido van Rossum603484d2000-03-10 22:52:46 +0000376 case 0x09F6:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000377 case 0x17F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000378 case 0x2162:
379 case 0x2172:
380 case 0x3023:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000381 case 0x3194:
382 case 0x3222:
Guido van Rossum603484d2000-03-10 22:52:46 +0000383 case 0x3282:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000384#ifdef Py_UNICODE_WIDE
385 case 0x10109:
386#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000387 return (double) 3;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000388 case 0x0F2B:
389 return (double) 3 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000390 case 0x00BE:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000391#ifdef Py_UNICODE_WIDE
392 case 0x10178:
393#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000394 return (double) 3 / 4;
395 case 0x2157:
396 return (double) 3 / 5;
397 case 0x215C:
398 return (double) 3 / 8;
399 case 0x1374:
400 case 0x303A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000401 case 0x325A:
402#ifdef Py_UNICODE_WIDE
403 case 0x10112:
404 case 0x10165:
405#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000406 return (double) 30;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000407#ifdef Py_UNICODE_WIDE
408 case 0x1011B:
409 case 0x1016B:
410 return (double) 300;
411 case 0x10124:
412 return (double) 3000;
413 case 0x1012D:
414 return (double) 30000;
415#endif
416 case 0x325B:
417 return (double) 31;
418 case 0x325C:
419 return (double) 32;
420 case 0x325D:
421 return (double) 33;
422 case 0x325E:
423 return (double) 34;
424 case 0x325F:
425 return (double) 35;
426 case 0x32B1:
427 return (double) 36;
428 case 0x32B2:
429 return (double) 37;
430 case 0x32B3:
431 return (double) 38;
432 case 0x32B4:
433 return (double) 39;
Guido van Rossum603484d2000-03-10 22:52:46 +0000434 case 0x09F7:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000435 case 0x17F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000436 case 0x2163:
437 case 0x2173:
438 case 0x3024:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000439 case 0x3195:
440 case 0x3223:
Guido van Rossum603484d2000-03-10 22:52:46 +0000441 case 0x3283:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000442#ifdef Py_UNICODE_WIDE
443 case 0x1010A:
444#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000445 return (double) 4;
446 case 0x2158:
447 return (double) 4 / 5;
448 case 0x1375:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000449 case 0x32B5:
450#ifdef Py_UNICODE_WIDE
451 case 0x10113:
452#endif
453 return (double) 40;
454#ifdef Py_UNICODE_WIDE
455 case 0x1011C:
456 return (double) 400;
457 case 0x10125:
458 return (double) 4000;
459 case 0x1012E:
460 return (double) 40000;
461#endif
462 case 0x32B6:
463 return (double) 41;
464 case 0x32B7:
465 return (double) 42;
466 case 0x32B8:
467 return (double) 43;
468 case 0x32B9:
469 return (double) 44;
470 case 0x32BA:
471 return (double) 45;
472 case 0x32BB:
473 return (double) 46;
474 case 0x32BC:
475 return (double) 47;
476 case 0x32BD:
477 return (double) 48;
478 case 0x32BE:
479 return (double) 49;
480 case 0x17F5:
Guido van Rossum603484d2000-03-10 22:52:46 +0000481 case 0x2164:
482 case 0x2174:
483 case 0x3025:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000484 case 0x3224:
Guido van Rossum603484d2000-03-10 22:52:46 +0000485 case 0x3284:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000486#ifdef Py_UNICODE_WIDE
487 case 0x1010B:
488 case 0x10143:
489 case 0x10148:
490 case 0x1014F:
491 case 0x1015F:
492 case 0x10173:
493 case 0x10321:
494#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000495 return (double) 5;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000496 case 0x0F2C:
497 return (double) 5 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000498 case 0x215A:
499 return (double) 5 / 6;
500 case 0x215D:
501 return (double) 5 / 8;
502 case 0x1376:
503 case 0x216C:
504 case 0x217C:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000505 case 0x32BF:
506#ifdef Py_UNICODE_WIDE
507 case 0x10114:
508 case 0x10144:
509 case 0x1014A:
510 case 0x10151:
511 case 0x10166:
512 case 0x10167:
513 case 0x10168:
514 case 0x10169:
515 case 0x10174:
516 case 0x10323:
517#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000518 return (double) 50;
519 case 0x216E:
520 case 0x217E:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000521#ifdef Py_UNICODE_WIDE
522 case 0x1011D:
523 case 0x10145:
524 case 0x1014C:
525 case 0x10153:
526 case 0x1016C:
527 case 0x1016D:
528 case 0x1016E:
529 case 0x1016F:
530 case 0x10170:
531#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000532 return (double) 500;
533 case 0x2181:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000534#ifdef Py_UNICODE_WIDE
535 case 0x10126:
536 case 0x10146:
537 case 0x1014E:
538 case 0x10172:
539#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000540 return (double) 5000;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000541#ifdef Py_UNICODE_WIDE
542 case 0x1012F:
543 case 0x10147:
544 case 0x10156:
545 return (double) 50000;
546#endif
547 case 0x17F6:
Guido van Rossum603484d2000-03-10 22:52:46 +0000548 case 0x2165:
549 case 0x2175:
550 case 0x3026:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000551 case 0x3225:
Guido van Rossum603484d2000-03-10 22:52:46 +0000552 case 0x3285:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000553#ifdef Py_UNICODE_WIDE
554 case 0x1010C:
555#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000556 return (double) 6;
557 case 0x1377:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000558#ifdef Py_UNICODE_WIDE
559 case 0x10115:
560#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000561 return (double) 60;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000562#ifdef Py_UNICODE_WIDE
563 case 0x1011E:
564 return (double) 600;
565 case 0x10127:
566 return (double) 6000;
567 case 0x10130:
568 return (double) 60000;
569#endif
570 case 0x17F7:
Guido van Rossum603484d2000-03-10 22:52:46 +0000571 case 0x2166:
572 case 0x2176:
573 case 0x3027:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000574 case 0x3226:
Guido van Rossum603484d2000-03-10 22:52:46 +0000575 case 0x3286:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000576#ifdef Py_UNICODE_WIDE
577 case 0x1010D:
578#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000579 return (double) 7;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000580 case 0x0F2D:
581 return (double) 7 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000582 case 0x215E:
583 return (double) 7 / 8;
584 case 0x1378:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000585#ifdef Py_UNICODE_WIDE
586 case 0x10116:
587#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000588 return (double) 70;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000589#ifdef Py_UNICODE_WIDE
590 case 0x1011F:
591 return (double) 700;
592 case 0x10128:
593 return (double) 7000;
594 case 0x10131:
595 return (double) 70000;
596#endif
597 case 0x17F8:
Guido van Rossum603484d2000-03-10 22:52:46 +0000598 case 0x2167:
599 case 0x2177:
600 case 0x3028:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000601 case 0x3227:
Guido van Rossum603484d2000-03-10 22:52:46 +0000602 case 0x3287:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000603#ifdef Py_UNICODE_WIDE
604 case 0x1010E:
605#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000606 return (double) 8;
607 case 0x1379:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000608#ifdef Py_UNICODE_WIDE
609 case 0x10117:
610#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000611 return (double) 80;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000612#ifdef Py_UNICODE_WIDE
613 case 0x10120:
614 return (double) 800;
615 case 0x10129:
616 return (double) 8000;
617 case 0x10132:
618 return (double) 80000;
619#endif
620 case 0x17F9:
Guido van Rossum603484d2000-03-10 22:52:46 +0000621 case 0x2168:
622 case 0x2178:
623 case 0x3029:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000624 case 0x3228:
Guido van Rossum603484d2000-03-10 22:52:46 +0000625 case 0x3288:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000626#ifdef Py_UNICODE_WIDE
627 case 0x1010F:
628#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000629 return (double) 9;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000630 case 0x0F2E:
631 return (double) 9 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000632 case 0x137A:
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000633#ifdef Py_UNICODE_WIDE
634 case 0x10118:
635#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000636 return (double) 90;
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000637#ifdef Py_UNICODE_WIDE
638 case 0x10121:
639 case 0x1034A:
640 return (double) 900;
641 case 0x1012A:
642 return (double) 9000;
643 case 0x10133:
644 return (double) 90000;
645#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000646 default:
647 return (double) _PyUnicode_ToDigit(ch);
648 }
649}
650
Fredrik Lundh72b06852001-06-27 22:08:26 +0000651int _PyUnicode_IsNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000652{
Martin v. Löwisd004fc82006-05-27 08:36:52 +0000653 return _PyUnicode_ToNumeric(ch) != -1.0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000654}
655
656#ifndef WANT_WCTYPE_FUNCTIONS
657
Guido van Rossumdc742b32000-04-11 15:39:02 +0000658/* Returns 1 for Unicode characters having the bidirectional type
659 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +0000660
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000661int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000662{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000663 switch (ch) {
664 case 0x0009: /* HORIZONTAL TABULATION */
665 case 0x000A: /* LINE FEED */
666 case 0x000B: /* VERTICAL TABULATION */
667 case 0x000C: /* FORM FEED */
668 case 0x000D: /* CARRIAGE RETURN */
669 case 0x001C: /* FILE SEPARATOR */
670 case 0x001D: /* GROUP SEPARATOR */
671 case 0x001E: /* RECORD SEPARATOR */
672 case 0x001F: /* UNIT SEPARATOR */
673 case 0x0020: /* SPACE */
674 case 0x0085: /* NEXT LINE */
675 case 0x00A0: /* NO-BREAK SPACE */
676 case 0x1680: /* OGHAM SPACE MARK */
677 case 0x2000: /* EN QUAD */
678 case 0x2001: /* EM QUAD */
679 case 0x2002: /* EN SPACE */
680 case 0x2003: /* EM SPACE */
681 case 0x2004: /* THREE-PER-EM SPACE */
682 case 0x2005: /* FOUR-PER-EM SPACE */
683 case 0x2006: /* SIX-PER-EM SPACE */
684 case 0x2007: /* FIGURE SPACE */
685 case 0x2008: /* PUNCTUATION SPACE */
686 case 0x2009: /* THIN SPACE */
687 case 0x200A: /* HAIR SPACE */
688 case 0x200B: /* ZERO WIDTH SPACE */
689 case 0x2028: /* LINE SEPARATOR */
690 case 0x2029: /* PARAGRAPH SEPARATOR */
691 case 0x202F: /* NARROW NO-BREAK SPACE */
692 case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
693 case 0x3000: /* IDEOGRAPHIC SPACE */
694 return 1;
695 default:
696 return 0;
697 }
Guido van Rossum603484d2000-03-10 22:52:46 +0000698}
699
700/* Returns 1 for Unicode characters having the category 'Ll', 0
701 otherwise. */
702
Fredrik Lundh72b06852001-06-27 22:08:26 +0000703int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000704{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000705 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
706
707 return (ctype->flags & LOWER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000708}
709
710/* Returns 1 for Unicode characters having the category 'Lu', 0
711 otherwise. */
712
Fredrik Lundh72b06852001-06-27 22:08:26 +0000713int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000714{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000715 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
716
717 return (ctype->flags & UPPER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000718}
719
720/* Returns the uppercase Unicode characters corresponding to ch or just
721 ch if no uppercase mapping is known. */
722
Fredrik Lundh72b06852001-06-27 22:08:26 +0000723Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000724{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000725 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000726 int delta = ctype->upper;
727 if (delta >= 32768)
728 delta -= 65536;
729 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000730}
731
732/* Returns the lowercase Unicode characters corresponding to ch or just
733 ch if no lowercase mapping is known. */
734
Fredrik Lundh72b06852001-06-27 22:08:26 +0000735Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000736{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000737 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000738 int delta = ctype->lower;
739 if (delta >= 32768)
740 delta -= 65536;
741 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000742}
743
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000744/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
745 'Lo' or 'Lm', 0 otherwise. */
746
Fredrik Lundh72b06852001-06-27 22:08:26 +0000747int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000748{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000749 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000750
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000751 return (ctype->flags & ALPHA_MASK) != 0;
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000752}
753
Guido van Rossum603484d2000-03-10 22:52:46 +0000754#else
755
756/* Export the interfaces using the wchar_t type for portability
757 reasons: */
758
Fredrik Lundh72b06852001-06-27 22:08:26 +0000759int _PyUnicode_IsWhitespace(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000760{
761 return iswspace(ch);
762}
763
Fredrik Lundh72b06852001-06-27 22:08:26 +0000764int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000765{
766 return iswlower(ch);
767}
768
Fredrik Lundh72b06852001-06-27 22:08:26 +0000769int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000770{
771 return iswupper(ch);
772}
773
Fredrik Lundh72b06852001-06-27 22:08:26 +0000774Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000775{
776 return towlower(ch);
777}
778
Fredrik Lundh72b06852001-06-27 22:08:26 +0000779Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000780{
781 return towupper(ch);
782}
783
Fredrik Lundh72b06852001-06-27 22:08:26 +0000784int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000785{
786 return iswalpha(ch);
787}
788
Guido van Rossum603484d2000-03-10 22:52:46 +0000789#endif