blob: 911c53f65cb9a75474404050b0874447ab5e9b87 [file] [log] [blame]
Guido van Rossum603484d2000-03-10 22:52:46 +00001/*
2 Unicode character type helpers.
3
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00004 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Guido van Rossum603484d2000-03-10 22:52:46 +00006
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +00007 Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum603484d2000-03-10 22:52:46 +00008
9*/
10
11#include "Python.h"
Guido van Rossum603484d2000-03-10 22:52:46 +000012#include "unicodeobject.h"
13
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000014#define ALPHA_MASK 0x01
15#define DECIMAL_MASK 0x02
16#define DIGIT_MASK 0x04
17#define LOWER_MASK 0x08
18#define LINEBREAK_MASK 0x10
19#define SPACE_MASK 0x20
20#define TITLE_MASK 0x40
21#define UPPER_MASK 0x80
Martin v. Löwis13c3e382007-08-14 22:37:03 +000022#define XID_START_MASK 0x100
23#define XID_CONTINUE_MASK 0x200
Jack Jansen56cdce32000-07-06 13:57:38 +000024
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000025typedef struct {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000026 const Py_UNICODE upper;
27 const Py_UNICODE lower;
28 const Py_UNICODE title;
29 const unsigned char decimal;
30 const unsigned char digit;
Hye-Shik Chang974ed7c2004-06-02 16:49:17 +000031 const unsigned short flags;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000032} _PyUnicode_TypeRecord;
33
34#include "unicodetype_db.h"
35
36static const _PyUnicode_TypeRecord *
Fredrik Lundhee13dba2001-06-26 20:36:12 +000037gettyperecord(Py_UNICODE code)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000038{
39 int index;
40
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000041#ifdef Py_UNICODE_WIDE
Martin v. Löwis9def6a32002-10-18 16:11:54 +000042 if (code >= 0x110000)
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000043 index = 0;
Hye-Shik Chang7db07e62003-12-29 01:36:01 +000044 else
45#endif
46 {
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000047 index = index1[(code>>SHIFT)];
48 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
49 }
Fredrik Lundhee13dba2001-06-26 20:36:12 +000050
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000051 return &_PyUnicode_TypeRecords[index];
52}
Jack Jansen56cdce32000-07-06 13:57:38 +000053
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000054/* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
55 type 'B', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +000056
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000057int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000058{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +000059 switch (ch) {
60 case 0x000A: /* LINE FEED */
61 case 0x000D: /* CARRIAGE RETURN */
62 case 0x001C: /* FILE SEPARATOR */
63 case 0x001D: /* GROUP SEPARATOR */
64 case 0x001E: /* RECORD SEPARATOR */
65 case 0x0085: /* NEXT LINE */
66 case 0x2028: /* LINE SEPARATOR */
67 case 0x2029: /* PARAGRAPH SEPARATOR */
68 return 1;
69 default:
70 return 0;
71 }
Guido van Rossum603484d2000-03-10 22:52:46 +000072}
73
74/* Returns the titlecase Unicode characters corresponding to ch or just
75 ch if no titlecase mapping is known. */
76
Martin v. Löwisce9b5a52001-06-27 06:28:56 +000077Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000078{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000079 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +000080 int delta;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000081
82 if (ctype->title)
Martin v. Löwisedf368c2002-10-18 16:40:36 +000083 delta = ctype->title;
Martin v. Löwisce9b5a52001-06-27 06:28:56 +000084 else
Martin v. Löwisedf368c2002-10-18 16:40:36 +000085 delta = ctype->upper;
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000086
Martin v. Löwisedf368c2002-10-18 16:40:36 +000087 if (delta >= 32768)
88 delta -= 65536;
89
90 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +000091}
92
93/* Returns 1 for Unicode characters having the category 'Lt', 0
94 otherwise. */
95
Fredrik Lundh72b06852001-06-27 22:08:26 +000096int _PyUnicode_IsTitlecase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +000097{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +000098 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
99
100 return (ctype->flags & TITLE_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000101}
102
Martin v. Löwis13c3e382007-08-14 22:37:03 +0000103/* Returns 1 for Unicode characters having the XID_Start property, 0
104 otherwise. */
105
106int _PyUnicode_IsXidStart(Py_UNICODE ch)
107{
108 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
109
110 return (ctype->flags & XID_START_MASK) != 0;
111}
112
113/* Returns 1 for Unicode characters having the XID_Continue property,
114 0 otherwise. */
115
116int _PyUnicode_IsXidContinue(Py_UNICODE ch)
117{
118 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
119
120 return (ctype->flags & XID_CONTINUE_MASK) != 0;
121}
122
Guido van Rossum603484d2000-03-10 22:52:46 +0000123/* Returns the integer decimal (0-9) for Unicode characters having
124 this property, -1 otherwise. */
125
Fredrik Lundh72b06852001-06-27 22:08:26 +0000126int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000127{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000128 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
129
130 return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000131}
132
Fredrik Lundh72b06852001-06-27 22:08:26 +0000133int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000134{
135 if (_PyUnicode_ToDecimalDigit(ch) < 0)
136 return 0;
137 return 1;
138}
139
140/* Returns the integer digit (0-9) for Unicode characters having
141 this property, -1 otherwise. */
142
Fredrik Lundh72b06852001-06-27 22:08:26 +0000143int _PyUnicode_ToDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000144{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000145 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
146
147 return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
Guido van Rossum603484d2000-03-10 22:52:46 +0000148}
149
Fredrik Lundh72b06852001-06-27 22:08:26 +0000150int _PyUnicode_IsDigit(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000151{
152 if (_PyUnicode_ToDigit(ch) < 0)
153 return 0;
154 return 1;
155}
156
157/* Returns the numeric value as double for Unicode characters having
158 this property, -1.0 otherwise. */
159
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000160/* TODO: replace with unicodetype_db.h table */
161
Fredrik Lundh72b06852001-06-27 22:08:26 +0000162double _PyUnicode_ToNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000163{
164 switch (ch) {
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165 case 0x0F33:
166 return (double) -1 / 2;
167 case 0x17F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000168 case 0x3007:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000169#ifdef Py_UNICODE_WIDE
170 case 0x1018A:
171#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000172 return (double) 0;
173 case 0x09F4:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000174 case 0x17F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000175 case 0x215F:
176 case 0x2160:
177 case 0x2170:
178 case 0x3021:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 case 0x3192:
180 case 0x3220:
Guido van Rossum603484d2000-03-10 22:52:46 +0000181 case 0x3280:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000182#ifdef Py_UNICODE_WIDE
183 case 0x10107:
184 case 0x10142:
185 case 0x10158:
186 case 0x10159:
187 case 0x1015A:
188 case 0x10320:
189 case 0x103D1:
190#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000191 return (double) 1;
192 case 0x00BD:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 0x0F2A:
194 case 0x2CFD:
195#ifdef Py_UNICODE_WIDE
196 case 0x10141:
197 case 0x10175:
198 case 0x10176:
199#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000200 return (double) 1 / 2;
201 case 0x2153:
202 return (double) 1 / 3;
203 case 0x00BC:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204#ifdef Py_UNICODE_WIDE
205 case 0x10140:
206#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000207 return (double) 1 / 4;
208 case 0x2155:
209 return (double) 1 / 5;
210 case 0x2159:
211 return (double) 1 / 6;
212 case 0x215B:
213 return (double) 1 / 8;
214 case 0x0BF0:
215 case 0x1372:
216 case 0x2169:
217 case 0x2179:
218 case 0x2469:
219 case 0x247D:
220 case 0x2491:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000221 case 0x24FE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000222 case 0x277F:
223 case 0x2789:
224 case 0x2793:
225 case 0x3038:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 case 0x3229:
Guido van Rossum603484d2000-03-10 22:52:46 +0000227 case 0x3289:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000228#ifdef Py_UNICODE_WIDE
229 case 0x10110:
230 case 0x10149:
231 case 0x10150:
232 case 0x10157:
233 case 0x10160:
234 case 0x10161:
235 case 0x10162:
236 case 0x10163:
237 case 0x10164:
238 case 0x10322:
239 case 0x103D3:
240 case 0x10A44:
241#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000242 return (double) 10;
243 case 0x0BF1:
244 case 0x137B:
245 case 0x216D:
246 case 0x217D:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000247#ifdef Py_UNICODE_WIDE
248 case 0x10119:
249 case 0x1014B:
250 case 0x10152:
251 case 0x1016A:
252 case 0x103D5:
253 case 0x10A46:
254#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000255 return (double) 100;
256 case 0x0BF2:
257 case 0x216F:
258 case 0x217F:
259 case 0x2180:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000260#ifdef Py_UNICODE_WIDE
261 case 0x10122:
262 case 0x1014D:
263 case 0x10154:
264 case 0x10171:
265 case 0x10A47:
266#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000267 return (double) 1000;
268 case 0x137C:
269 case 0x2182:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000270#ifdef Py_UNICODE_WIDE
271 case 0x1012B:
272 case 0x10155:
273#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000274 return (double) 10000;
275 case 0x216A:
276 case 0x217A:
277 case 0x246A:
278 case 0x247E:
279 case 0x2492:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000280 case 0x24EB:
Guido van Rossum603484d2000-03-10 22:52:46 +0000281 return (double) 11;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000282 case 0x0F2F:
283 return (double) 11 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000284 case 0x216B:
285 case 0x217B:
286 case 0x246B:
287 case 0x247F:
288 case 0x2493:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 case 0x24EC:
Guido van Rossum603484d2000-03-10 22:52:46 +0000290 return (double) 12;
291 case 0x246C:
292 case 0x2480:
293 case 0x2494:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000294 case 0x24ED:
Guido van Rossum603484d2000-03-10 22:52:46 +0000295 return (double) 13;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000296 case 0x0F30:
297 return (double) 13 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000298 case 0x246D:
299 case 0x2481:
300 case 0x2495:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000301 case 0x24EE:
Guido van Rossum603484d2000-03-10 22:52:46 +0000302 return (double) 14;
303 case 0x246E:
304 case 0x2482:
305 case 0x2496:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000306 case 0x24EF:
Guido van Rossum603484d2000-03-10 22:52:46 +0000307 return (double) 15;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000308 case 0x0F31:
309 return (double) 15 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000310 case 0x09F9:
311 case 0x246F:
312 case 0x2483:
313 case 0x2497:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000314 case 0x24F0:
Guido van Rossum603484d2000-03-10 22:52:46 +0000315 return (double) 16;
316 case 0x16EE:
317 case 0x2470:
318 case 0x2484:
319 case 0x2498:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000320 case 0x24F1:
Guido van Rossum603484d2000-03-10 22:52:46 +0000321 return (double) 17;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000322 case 0x0F32:
323 return (double) 17 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000324 case 0x16EF:
325 case 0x2471:
326 case 0x2485:
327 case 0x2499:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000328 case 0x24F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000329 return (double) 18;
330 case 0x16F0:
331 case 0x2472:
332 case 0x2486:
333 case 0x249A:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000334 case 0x24F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000335 return (double) 19;
336 case 0x09F5:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000337 case 0x17F2:
Guido van Rossum603484d2000-03-10 22:52:46 +0000338 case 0x2161:
339 case 0x2171:
340 case 0x3022:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000341 case 0x3193:
342 case 0x3221:
Guido van Rossum603484d2000-03-10 22:52:46 +0000343 case 0x3281:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000344#ifdef Py_UNICODE_WIDE
345 case 0x10108:
346 case 0x1015B:
347 case 0x1015C:
348 case 0x1015D:
349 case 0x1015E:
350 case 0x103D2:
351#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000352 return (double) 2;
353 case 0x2154:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000354#ifdef Py_UNICODE_WIDE
355 case 0x10177:
356#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000357 return (double) 2 / 3;
358 case 0x2156:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000359 return (double) 2 / 5;
Guido van Rossum603484d2000-03-10 22:52:46 +0000360 case 0x1373:
361 case 0x2473:
362 case 0x2487:
363 case 0x249B:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000364 case 0x24F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000365 case 0x3039:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000366#ifdef Py_UNICODE_WIDE
367 case 0x10111:
368 case 0x103D4:
369 case 0x10A45:
370#endif
371 return (double) 20;
372#ifdef Py_UNICODE_WIDE
373 case 0x1011A:
374 return (double) 200;
375 case 0x10123:
376 return (double) 2000;
377 case 0x1012C:
378 return (double) 20000;
379#endif
380 case 0x3251:
381 return (double) 21;
382 case 0x3252:
383 return (double) 22;
384 case 0x3253:
385 return (double) 23;
386 case 0x3254:
387 return (double) 24;
388 case 0x3255:
389 return (double) 25;
390 case 0x3256:
391 return (double) 26;
392 case 0x3257:
393 return (double) 27;
394 case 0x3258:
395 return (double) 28;
396 case 0x3259:
397 return (double) 29;
Guido van Rossum603484d2000-03-10 22:52:46 +0000398 case 0x09F6:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000399 case 0x17F3:
Guido van Rossum603484d2000-03-10 22:52:46 +0000400 case 0x2162:
401 case 0x2172:
402 case 0x3023:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000403 case 0x3194:
404 case 0x3222:
Guido van Rossum603484d2000-03-10 22:52:46 +0000405 case 0x3282:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000406#ifdef Py_UNICODE_WIDE
407 case 0x10109:
408#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000409 return (double) 3;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000410 case 0x0F2B:
411 return (double) 3 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000412 case 0x00BE:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000413#ifdef Py_UNICODE_WIDE
414 case 0x10178:
415#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000416 return (double) 3 / 4;
417 case 0x2157:
418 return (double) 3 / 5;
419 case 0x215C:
420 return (double) 3 / 8;
421 case 0x1374:
422 case 0x303A:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000423 case 0x325A:
424#ifdef Py_UNICODE_WIDE
425 case 0x10112:
426 case 0x10165:
427#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000428 return (double) 30;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000429#ifdef Py_UNICODE_WIDE
430 case 0x1011B:
431 case 0x1016B:
432 return (double) 300;
433 case 0x10124:
434 return (double) 3000;
435 case 0x1012D:
436 return (double) 30000;
437#endif
438 case 0x325B:
439 return (double) 31;
440 case 0x325C:
441 return (double) 32;
442 case 0x325D:
443 return (double) 33;
444 case 0x325E:
445 return (double) 34;
446 case 0x325F:
447 return (double) 35;
448 case 0x32B1:
449 return (double) 36;
450 case 0x32B2:
451 return (double) 37;
452 case 0x32B3:
453 return (double) 38;
454 case 0x32B4:
455 return (double) 39;
Guido van Rossum603484d2000-03-10 22:52:46 +0000456 case 0x09F7:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000457 case 0x17F4:
Guido van Rossum603484d2000-03-10 22:52:46 +0000458 case 0x2163:
459 case 0x2173:
460 case 0x3024:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000461 case 0x3195:
462 case 0x3223:
Guido van Rossum603484d2000-03-10 22:52:46 +0000463 case 0x3283:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000464#ifdef Py_UNICODE_WIDE
465 case 0x1010A:
466#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000467 return (double) 4;
468 case 0x2158:
469 return (double) 4 / 5;
470 case 0x1375:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000471 case 0x32B5:
472#ifdef Py_UNICODE_WIDE
473 case 0x10113:
474#endif
475 return (double) 40;
476#ifdef Py_UNICODE_WIDE
477 case 0x1011C:
478 return (double) 400;
479 case 0x10125:
480 return (double) 4000;
481 case 0x1012E:
482 return (double) 40000;
483#endif
484 case 0x32B6:
485 return (double) 41;
486 case 0x32B7:
487 return (double) 42;
488 case 0x32B8:
489 return (double) 43;
490 case 0x32B9:
491 return (double) 44;
492 case 0x32BA:
493 return (double) 45;
494 case 0x32BB:
495 return (double) 46;
496 case 0x32BC:
497 return (double) 47;
498 case 0x32BD:
499 return (double) 48;
500 case 0x32BE:
501 return (double) 49;
502 case 0x17F5:
Guido van Rossum603484d2000-03-10 22:52:46 +0000503 case 0x2164:
504 case 0x2174:
505 case 0x3025:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000506 case 0x3224:
Guido van Rossum603484d2000-03-10 22:52:46 +0000507 case 0x3284:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000508#ifdef Py_UNICODE_WIDE
509 case 0x1010B:
510 case 0x10143:
511 case 0x10148:
512 case 0x1014F:
513 case 0x1015F:
514 case 0x10173:
515 case 0x10321:
516#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000517 return (double) 5;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000518 case 0x0F2C:
519 return (double) 5 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000520 case 0x215A:
521 return (double) 5 / 6;
522 case 0x215D:
523 return (double) 5 / 8;
524 case 0x1376:
525 case 0x216C:
526 case 0x217C:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000527 case 0x32BF:
528#ifdef Py_UNICODE_WIDE
529 case 0x10114:
530 case 0x10144:
531 case 0x1014A:
532 case 0x10151:
533 case 0x10166:
534 case 0x10167:
535 case 0x10168:
536 case 0x10169:
537 case 0x10174:
538 case 0x10323:
539#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000540 return (double) 50;
541 case 0x216E:
542 case 0x217E:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000543#ifdef Py_UNICODE_WIDE
544 case 0x1011D:
545 case 0x10145:
546 case 0x1014C:
547 case 0x10153:
548 case 0x1016C:
549 case 0x1016D:
550 case 0x1016E:
551 case 0x1016F:
552 case 0x10170:
553#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000554 return (double) 500;
555 case 0x2181:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000556#ifdef Py_UNICODE_WIDE
557 case 0x10126:
558 case 0x10146:
559 case 0x1014E:
560 case 0x10172:
561#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000562 return (double) 5000;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000563#ifdef Py_UNICODE_WIDE
564 case 0x1012F:
565 case 0x10147:
566 case 0x10156:
567 return (double) 50000;
568#endif
569 case 0x17F6:
Guido van Rossum603484d2000-03-10 22:52:46 +0000570 case 0x2165:
571 case 0x2175:
572 case 0x3026:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000573 case 0x3225:
Guido van Rossum603484d2000-03-10 22:52:46 +0000574 case 0x3285:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000575#ifdef Py_UNICODE_WIDE
576 case 0x1010C:
577#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000578 return (double) 6;
579 case 0x1377:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000580#ifdef Py_UNICODE_WIDE
581 case 0x10115:
582#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000583 return (double) 60;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000584#ifdef Py_UNICODE_WIDE
585 case 0x1011E:
586 return (double) 600;
587 case 0x10127:
588 return (double) 6000;
589 case 0x10130:
590 return (double) 60000;
591#endif
592 case 0x17F7:
Guido van Rossum603484d2000-03-10 22:52:46 +0000593 case 0x2166:
594 case 0x2176:
595 case 0x3027:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000596 case 0x3226:
Guido van Rossum603484d2000-03-10 22:52:46 +0000597 case 0x3286:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000598#ifdef Py_UNICODE_WIDE
599 case 0x1010D:
600#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000601 return (double) 7;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000602 case 0x0F2D:
603 return (double) 7 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000604 case 0x215E:
605 return (double) 7 / 8;
606 case 0x1378:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000607#ifdef Py_UNICODE_WIDE
608 case 0x10116:
609#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000610 return (double) 70;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000611#ifdef Py_UNICODE_WIDE
612 case 0x1011F:
613 return (double) 700;
614 case 0x10128:
615 return (double) 7000;
616 case 0x10131:
617 return (double) 70000;
618#endif
619 case 0x17F8:
Guido van Rossum603484d2000-03-10 22:52:46 +0000620 case 0x2167:
621 case 0x2177:
622 case 0x3028:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000623 case 0x3227:
Guido van Rossum603484d2000-03-10 22:52:46 +0000624 case 0x3287:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000625#ifdef Py_UNICODE_WIDE
626 case 0x1010E:
627#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000628 return (double) 8;
629 case 0x1379:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000630#ifdef Py_UNICODE_WIDE
631 case 0x10117:
632#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000633 return (double) 80;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000634#ifdef Py_UNICODE_WIDE
635 case 0x10120:
636 return (double) 800;
637 case 0x10129:
638 return (double) 8000;
639 case 0x10132:
640 return (double) 80000;
641#endif
642 case 0x17F9:
Guido van Rossum603484d2000-03-10 22:52:46 +0000643 case 0x2168:
644 case 0x2178:
645 case 0x3029:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000646 case 0x3228:
Guido van Rossum603484d2000-03-10 22:52:46 +0000647 case 0x3288:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000648#ifdef Py_UNICODE_WIDE
649 case 0x1010F:
650#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000651 return (double) 9;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000652 case 0x0F2E:
653 return (double) 9 / 2;
Guido van Rossum603484d2000-03-10 22:52:46 +0000654 case 0x137A:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000655#ifdef Py_UNICODE_WIDE
656 case 0x10118:
657#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000658 return (double) 90;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000659#ifdef Py_UNICODE_WIDE
660 case 0x10121:
661 case 0x1034A:
662 return (double) 900;
663 case 0x1012A:
664 return (double) 9000;
665 case 0x10133:
666 return (double) 90000;
667#endif
Guido van Rossum603484d2000-03-10 22:52:46 +0000668 default:
669 return (double) _PyUnicode_ToDigit(ch);
670 }
671}
672
Fredrik Lundh72b06852001-06-27 22:08:26 +0000673int _PyUnicode_IsNumeric(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000674{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000675 return _PyUnicode_ToNumeric(ch) != -1.0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000676}
677
678#ifndef WANT_WCTYPE_FUNCTIONS
679
Guido van Rossumdc742b32000-04-11 15:39:02 +0000680/* Returns 1 for Unicode characters having the bidirectional type
681 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
Guido van Rossum603484d2000-03-10 22:52:46 +0000682
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000683int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000684{
Marc-André Lemburg2cb94ab2005-10-20 19:06:35 +0000685 switch (ch) {
686 case 0x0009: /* HORIZONTAL TABULATION */
687 case 0x000A: /* LINE FEED */
688 case 0x000B: /* VERTICAL TABULATION */
689 case 0x000C: /* FORM FEED */
690 case 0x000D: /* CARRIAGE RETURN */
691 case 0x001C: /* FILE SEPARATOR */
692 case 0x001D: /* GROUP SEPARATOR */
693 case 0x001E: /* RECORD SEPARATOR */
694 case 0x001F: /* UNIT SEPARATOR */
695 case 0x0020: /* SPACE */
696 case 0x0085: /* NEXT LINE */
697 case 0x00A0: /* NO-BREAK SPACE */
698 case 0x1680: /* OGHAM SPACE MARK */
699 case 0x2000: /* EN QUAD */
700 case 0x2001: /* EM QUAD */
701 case 0x2002: /* EN SPACE */
702 case 0x2003: /* EM SPACE */
703 case 0x2004: /* THREE-PER-EM SPACE */
704 case 0x2005: /* FOUR-PER-EM SPACE */
705 case 0x2006: /* SIX-PER-EM SPACE */
706 case 0x2007: /* FIGURE SPACE */
707 case 0x2008: /* PUNCTUATION SPACE */
708 case 0x2009: /* THIN SPACE */
709 case 0x200A: /* HAIR SPACE */
710 case 0x200B: /* ZERO WIDTH SPACE */
711 case 0x2028: /* LINE SEPARATOR */
712 case 0x2029: /* PARAGRAPH SEPARATOR */
713 case 0x202F: /* NARROW NO-BREAK SPACE */
714 case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
715 case 0x3000: /* IDEOGRAPHIC SPACE */
716 return 1;
717 default:
718 return 0;
719 }
Guido van Rossum603484d2000-03-10 22:52:46 +0000720}
721
722/* Returns 1 for Unicode characters having the category 'Ll', 0
723 otherwise. */
724
Fredrik Lundh72b06852001-06-27 22:08:26 +0000725int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000726{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000727 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
728
729 return (ctype->flags & LOWER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000730}
731
732/* Returns 1 for Unicode characters having the category 'Lu', 0
733 otherwise. */
734
Fredrik Lundh72b06852001-06-27 22:08:26 +0000735int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000736{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000737 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
738
739 return (ctype->flags & UPPER_MASK) != 0;
Guido van Rossum603484d2000-03-10 22:52:46 +0000740}
741
742/* Returns the uppercase Unicode characters corresponding to ch or just
743 ch if no uppercase mapping is known. */
744
Fredrik Lundh72b06852001-06-27 22:08:26 +0000745Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000746{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000747 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000748 int delta = ctype->upper;
749 if (delta >= 32768)
750 delta -= 65536;
751 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000752}
753
754/* Returns the lowercase Unicode characters corresponding to ch or just
755 ch if no lowercase mapping is known. */
756
Fredrik Lundh72b06852001-06-27 22:08:26 +0000757Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000758{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000759 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Martin v. Löwisedf368c2002-10-18 16:40:36 +0000760 int delta = ctype->lower;
761 if (delta >= 32768)
762 delta -= 65536;
763 return ch + delta;
Guido van Rossum603484d2000-03-10 22:52:46 +0000764}
765
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000766/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
767 'Lo' or 'Lm', 0 otherwise. */
768
Fredrik Lundh72b06852001-06-27 22:08:26 +0000769int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000770{
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000771 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000772
Fredrik Lundh9e7dd4c2000-09-25 21:48:13 +0000773 return (ctype->flags & ALPHA_MASK) != 0;
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000774}
775
Guido van Rossum603484d2000-03-10 22:52:46 +0000776#else
777
778/* Export the interfaces using the wchar_t type for portability
779 reasons: */
780
Fredrik Lundh72b06852001-06-27 22:08:26 +0000781int _PyUnicode_IsWhitespace(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000782{
783 return iswspace(ch);
784}
785
Fredrik Lundh72b06852001-06-27 22:08:26 +0000786int _PyUnicode_IsLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000787{
788 return iswlower(ch);
789}
790
Fredrik Lundh72b06852001-06-27 22:08:26 +0000791int _PyUnicode_IsUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000792{
793 return iswupper(ch);
794}
795
Fredrik Lundh72b06852001-06-27 22:08:26 +0000796Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000797{
798 return towlower(ch);
799}
800
Fredrik Lundh72b06852001-06-27 22:08:26 +0000801Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
Guido van Rossum603484d2000-03-10 22:52:46 +0000802{
803 return towupper(ch);
804}
805
Fredrik Lundh72b06852001-06-27 22:08:26 +0000806int _PyUnicode_IsAlpha(Py_UNICODE ch)
Marc-André Lemburgf3938f52000-07-05 09:48:59 +0000807{
808 return iswalpha(ch);
809}
810
Guido van Rossum603484d2000-03-10 22:52:46 +0000811#endif