blob: 330b3763e44cfd553f2678ee28cc96a95d1edafc [file] [log] [blame]
Guido van Rossum2a70a3a2000-03-10 23:10:21 +00001/* ------------------------------------------------------------------------
2
Martin v. Löwis7d41e292002-11-23 12:22:32 +00003 unicodedata -- Provides access to the Unicode 3.2 data base.
Guido van Rossum2a70a3a2000-03-10 23:10:21 +00004
Martin v. Löwis7d41e292002-11-23 12:22:32 +00005 Data was extracted from the Unicode 3.2 UnicodeData.txt file.
Guido van Rossum2a70a3a2000-03-10 23:10:21 +00006
Fredrik Lundhcfcea492000-09-25 08:07:06 +00007 Written by Marc-Andre Lemburg (mal@lemburg.com).
8 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
Martin v. Löwis7d41e292002-11-23 12:22:32 +00009 Modified by Martin v. Löwis (martin@v.loewis.de)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000010
Fredrik Lundhcfcea492000-09-25 08:07:06 +000011 Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000012
13 ------------------------------------------------------------------------ */
14
15#include "Python.h"
Fredrik Lundh06d12682001-01-24 07:59:11 +000016#include "ucnhash.h"
17
18/* character properties */
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000019
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000020typedef struct {
21 const unsigned char category; /* index into
22 _PyUnicode_CategoryNames */
23 const unsigned char combining; /* combining class value 0 - 255 */
24 const unsigned char bidirectional; /* index into
25 _PyUnicode_BidirectionalNames */
26 const unsigned char mirrored; /* true if mirrored in bidir mode */
27} _PyUnicode_DatabaseRecord;
28
29/* data file generated by Tools/unicode/makeunicodedata.py */
30#include "unicodedata_db.h"
31
32static const _PyUnicode_DatabaseRecord*
Fredrik Lundhb95896b2001-02-18 22:06:17 +000033_getrecord(PyUnicodeObject* v)
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000034{
35 int code;
36 int index;
37
38 code = (int) *PyUnicode_AS_UNICODE(v);
39
Martin v. Löwis9def6a32002-10-18 16:11:54 +000040 if (code < 0 || code >= 0x110000)
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000041 index = 0;
42 else {
43 index = index1[(code>>SHIFT)];
44 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
45 }
46
47 return &_PyUnicode_Database_Records[index];
48}
49
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000050/* --- Module API --------------------------------------------------------- */
51
52static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000053unicodedata_decimal(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000054{
55 PyUnicodeObject *v;
56 PyObject *defobj = NULL;
57 long rc;
58
Fredrik Lundh06d12682001-01-24 07:59:11 +000059 if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000060 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000061 if (PyUnicode_GET_SIZE(v) != 1) {
62 PyErr_SetString(PyExc_TypeError,
63 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000064 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000065 }
66 rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v));
67 if (rc < 0) {
68 if (defobj == NULL) {
69 PyErr_SetString(PyExc_ValueError,
70 "not a decimal");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000071 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000072 }
73 else {
74 Py_INCREF(defobj);
75 return defobj;
76 }
77 }
78 return PyInt_FromLong(rc);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000079}
80
81static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000082unicodedata_digit(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000083{
84 PyUnicodeObject *v;
85 PyObject *defobj = NULL;
86 long rc;
87
Fredrik Lundh06d12682001-01-24 07:59:11 +000088 if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000089 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000090 if (PyUnicode_GET_SIZE(v) != 1) {
91 PyErr_SetString(PyExc_TypeError,
92 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000093 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000094 }
95 rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
96 if (rc < 0) {
97 if (defobj == NULL) {
Fredrik Lundh06d12682001-01-24 07:59:11 +000098 PyErr_SetString(PyExc_ValueError, "not a digit");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +000099 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000100 }
101 else {
102 Py_INCREF(defobj);
103 return defobj;
104 }
105 }
106 return PyInt_FromLong(rc);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000107}
108
109static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000110unicodedata_numeric(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000111{
112 PyUnicodeObject *v;
113 PyObject *defobj = NULL;
114 double rc;
115
Fredrik Lundh06d12682001-01-24 07:59:11 +0000116 if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000117 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000118 if (PyUnicode_GET_SIZE(v) != 1) {
119 PyErr_SetString(PyExc_TypeError,
120 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000121 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000122 }
123 rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v));
124 if (rc < 0) {
125 if (defobj == NULL) {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000126 PyErr_SetString(PyExc_ValueError, "not a numeric character");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000127 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000128 }
129 else {
130 Py_INCREF(defobj);
131 return defobj;
132 }
133 }
134 return PyFloat_FromDouble(rc);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000135}
136
137static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000138unicodedata_category(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000139{
140 PyUnicodeObject *v;
141 int index;
142
143 if (!PyArg_ParseTuple(args, "O!:category",
144 &PyUnicode_Type, &v))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000145 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000146 if (PyUnicode_GET_SIZE(v) != 1) {
147 PyErr_SetString(PyExc_TypeError,
148 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000149 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000150 }
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000151 index = (int) _getrecord(v)->category;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000152 return PyString_FromString(_PyUnicode_CategoryNames[index]);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000153}
154
155static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000156unicodedata_bidirectional(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000157{
158 PyUnicodeObject *v;
159 int index;
160
161 if (!PyArg_ParseTuple(args, "O!:bidirectional",
162 &PyUnicode_Type, &v))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000163 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000164 if (PyUnicode_GET_SIZE(v) != 1) {
165 PyErr_SetString(PyExc_TypeError,
166 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000167 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000168 }
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000169 index = (int) _getrecord(v)->bidirectional;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000170 return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000171}
172
173static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000174unicodedata_combining(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000175{
176 PyUnicodeObject *v;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000177
178 if (!PyArg_ParseTuple(args, "O!:combining",
179 &PyUnicode_Type, &v))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000180 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000181 if (PyUnicode_GET_SIZE(v) != 1) {
182 PyErr_SetString(PyExc_TypeError,
183 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000184 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000185 }
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000186 return PyInt_FromLong((int) _getrecord(v)->combining);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000187}
188
189static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000190unicodedata_mirrored(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000191{
192 PyUnicodeObject *v;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000193
194 if (!PyArg_ParseTuple(args, "O!:mirrored",
195 &PyUnicode_Type, &v))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000196 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000197 if (PyUnicode_GET_SIZE(v) != 1) {
198 PyErr_SetString(PyExc_TypeError,
199 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000200 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000201 }
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000202 return PyInt_FromLong((int) _getrecord(v)->mirrored);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000203}
204
205static PyObject *
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000206unicodedata_decomposition(PyObject *self, PyObject *args)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000207{
208 PyUnicodeObject *v;
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000209 char decomp[256];
210 int code, index, count, i;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000211
212 if (!PyArg_ParseTuple(args, "O!:decomposition",
213 &PyUnicode_Type, &v))
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000214 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000215 if (PyUnicode_GET_SIZE(v) != 1) {
216 PyErr_SetString(PyExc_TypeError,
217 "need a single Unicode character as parameter");
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000218 return NULL;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000219 }
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000220
221 code = (int) *PyUnicode_AS_UNICODE(v);
222
Martin v. Löwis9def6a32002-10-18 16:11:54 +0000223 if (code < 0 || code >= 0x110000)
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000224 index = 0;
225 else {
226 index = decomp_index1[(code>>DECOMP_SHIFT)];
227 index = decomp_index2[(index<<DECOMP_SHIFT)+
228 (code&((1<<DECOMP_SHIFT)-1))];
229 }
230
Tim Peters69b83b12001-11-30 07:23:05 +0000231 /* high byte is number of hex bytes (usually one or two), low byte
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000232 is prefix code (from*/
233 count = decomp_data[index] >> 8;
234
235 /* XXX: could allocate the PyString up front instead
236 (strlen(prefix) + 5 * count + 1 bytes) */
237
238 /* copy prefix */
239 i = strlen(decomp_prefix[decomp_data[index] & 255]);
240 memcpy(decomp, decomp_prefix[decomp_data[index] & 255], i);
241
242 while (count-- > 0) {
243 if (i)
244 decomp[i++] = ' ';
Tim Peters69b83b12001-11-30 07:23:05 +0000245 assert((size_t)i < sizeof(decomp));
246 PyOS_snprintf(decomp + i, sizeof(decomp) - i, "%04X",
247 decomp_data[++index]);
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000248 i += strlen(decomp + i);
249 }
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000250
Fredrik Lundh7b7dd102001-01-21 22:41:08 +0000251 decomp[i] = '\0';
252
253 return PyString_FromString(decomp);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000254}
255
Fredrik Lundh06d12682001-01-24 07:59:11 +0000256/* -------------------------------------------------------------------- */
257/* unicode character name tables */
258
259/* data file generated by Tools/unicode/makeunicodedata.py */
260#include "unicodename_db.h"
261
262/* -------------------------------------------------------------------- */
263/* database code (cut and pasted from the unidb package) */
264
265static unsigned long
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000266_gethash(const char *s, int len, int scale)
Fredrik Lundh06d12682001-01-24 07:59:11 +0000267{
268 int i;
269 unsigned long h = 0;
270 unsigned long ix;
271 for (i = 0; i < len; i++) {
272 h = (h * scale) + (unsigned char) toupper(s[i]);
273 ix = h & 0xff000000;
274 if (ix)
275 h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
276 }
277 return h;
278}
279
Martin v. Löwis7d41e292002-11-23 12:22:32 +0000280#define SBase 0xAC00
281#define LBase 0x1100
282#define VBase 0x1161
283#define TBase 0x11A7
284#define LCount 19
285#define VCount 21
286#define TCount 28
287#define NCount (VCount*TCount)
288#define SCount (LCount*NCount)
289
290static char *hangul_syllables[][3] = {
291 { "G", "A", "" },
292 { "GG", "AE", "G" },
293 { "N", "YA", "GG" },
294 { "D", "YAE", "GS" },
295 { "DD", "EO", "N", },
296 { "R", "E", "NJ" },
297 { "M", "YEO", "NH" },
298 { "B", "YE", "D" },
299 { "BB", "O", "L" },
300 { "S", "WA", "LG" },
301 { "SS", "WAE", "LM" },
302 { "", "OE", "LB" },
303 { "J", "YO", "LS" },
304 { "JJ", "U", "LT" },
305 { "C", "WEO", "LP" },
306 { "K", "WE", "LH" },
307 { "T", "WI", "M" },
308 { "P", "YU", "B" },
309 { "H", "EU", "BS" },
310 { 0, "YI", "S" },
311 { 0, "I", "SS" },
312 { 0, 0, "NG" },
313 { 0, 0, "J" },
314 { 0, 0, "C" },
315 { 0, 0, "K" },
316 { 0, 0, "T" },
317 { 0, 0, "P" },
318 { 0, 0, "H" }
319};
320
Fredrik Lundh06d12682001-01-24 07:59:11 +0000321static int
Andrew MacIntyre74a3bec2002-06-13 11:55:14 +0000322_getucname(Py_UCS4 code, char* buffer, int buflen)
Fredrik Lundh06d12682001-01-24 07:59:11 +0000323{
324 int offset;
325 int i;
326 int word;
327 unsigned char* w;
328
Martin v. Löwis2f4be4e2002-11-23 17:11:06 +0000329 if (SBase <= code && code < SBase+SCount) {
Martin v. Löwis7d41e292002-11-23 12:22:32 +0000330 /* Hangul syllable. */
331 int SIndex = code - SBase;
332 int L = SIndex / NCount;
333 int V = (SIndex % NCount) / TCount;
334 int T = SIndex % TCount;
335
336 if (buflen < 27)
337 /* Worst case: HANGUL SYLLABLE <10chars>. */
338 return 0;
339 strcpy(buffer, "HANGUL SYLLABLE ");
340 buffer += 16;
341 strcpy(buffer, hangul_syllables[L][0]);
342 buffer += strlen(hangul_syllables[L][0]);
343 strcpy(buffer, hangul_syllables[V][1]);
344 buffer += strlen(hangul_syllables[V][1]);
345 strcpy(buffer, hangul_syllables[T][2]);
346 buffer += strlen(hangul_syllables[T][2]);
347 *buffer = '\0';
348 return 1;
349 }
350
Martin v. Löwis9def6a32002-10-18 16:11:54 +0000351 if (code >= 0x110000)
Fredrik Lundh06d12682001-01-24 07:59:11 +0000352 return 0;
353
354 /* get offset into phrasebook */
355 offset = phrasebook_offset1[(code>>phrasebook_shift)];
356 offset = phrasebook_offset2[(offset<<phrasebook_shift) +
357 (code&((1<<phrasebook_shift)-1))];
358 if (!offset)
359 return 0;
360
361 i = 0;
362
363 for (;;) {
364 /* get word index */
365 word = phrasebook[offset] - phrasebook_short;
366 if (word >= 0) {
367 word = (word << 8) + phrasebook[offset+1];
368 offset += 2;
369 } else
370 word = phrasebook[offset++];
371 if (i) {
372 if (i > buflen)
373 return 0; /* buffer overflow */
374 buffer[i++] = ' ';
375 }
376 /* copy word string from lexicon. the last character in the
377 word has bit 7 set. the last word in a string ends with
378 0x80 */
379 w = lexicon + lexicon_offset[word];
380 while (*w < 128) {
381 if (i >= buflen)
382 return 0; /* buffer overflow */
383 buffer[i++] = *w++;
384 }
385 if (i >= buflen)
386 return 0; /* buffer overflow */
387 buffer[i++] = *w & 127;
388 if (*w == 128)
389 break; /* end of word */
390 }
391
392 return 1;
393}
394
395static int
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000396_cmpname(int code, const char* name, int namelen)
Fredrik Lundh06d12682001-01-24 07:59:11 +0000397{
398 /* check if code corresponds to the given name */
399 int i;
400 char buffer[NAME_MAXLEN];
Andrew MacIntyre74a3bec2002-06-13 11:55:14 +0000401 if (!_getucname(code, buffer, sizeof(buffer)))
Fredrik Lundh06d12682001-01-24 07:59:11 +0000402 return 0;
403 for (i = 0; i < namelen; i++) {
404 if (toupper(name[i]) != buffer[i])
405 return 0;
406 }
407 return buffer[namelen] == '\0';
408}
409
Martin v. Löwis7d41e292002-11-23 12:22:32 +0000410static void
411find_syllable(const char *str, int *len, int *pos, int count, int column)
412{
413 int i, len1;
414 *len = -1;
415 for (i = 0; i < count; i++) {
416 char *s = hangul_syllables[i][column];
417 len1 = strlen(s);
418 if (len1 <= *len)
419 continue;
420 if (strncmp(str, s, len1) == 0) {
421 *len = len1;
422 *pos = i;
423 }
424 }
425 if (*len == -1) {
426 *len = 0;
427 *pos = -1;
428 }
429}
430
Fredrik Lundh06d12682001-01-24 07:59:11 +0000431static int
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000432_getcode(const char* name, int namelen, Py_UCS4* code)
Fredrik Lundh06d12682001-01-24 07:59:11 +0000433{
434 unsigned int h, v;
435 unsigned int mask = code_size-1;
436 unsigned int i, incr;
437
Martin v. Löwis7d41e292002-11-23 12:22:32 +0000438 /* Check for hangul syllables. */
439 if (strncmp(name, "HANGUL SYLLABLE ", 16) == 0) {
440 int L, V, T, len;
441 const char *pos = name + 16;
442 find_syllable(pos, &len, &L, LCount, 0);
443 pos += len;
444 find_syllable(pos, &len, &V, VCount, 1);
445 pos += len;
446 find_syllable(pos, &len, &T, TCount, 2);
447 pos += len;
448 if (V != -1 && V != -1 && T != -1 && pos-name == namelen) {
449 *code = SBase + (L*VCount+V)*TCount + T;
450 return 1;
451 }
452 }
453
Fredrik Lundh06d12682001-01-24 07:59:11 +0000454 /* the following is the same as python's dictionary lookup, with
455 only minor changes. see the makeunicodedata script for more
456 details */
457
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000458 h = (unsigned int) _gethash(name, namelen, code_magic);
Fredrik Lundh06d12682001-01-24 07:59:11 +0000459 i = (~h) & mask;
460 v = code_hash[i];
461 if (!v)
462 return 0;
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000463 if (_cmpname(v, name, namelen)) {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000464 *code = v;
465 return 1;
466 }
467 incr = (h ^ (h >> 3)) & mask;
468 if (!incr)
469 incr = mask;
470 for (;;) {
471 i = (i + incr) & mask;
472 v = code_hash[i];
473 if (!v)
Fredrik Lundhae763672001-02-18 11:41:49 +0000474 return 0;
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000475 if (_cmpname(v, name, namelen)) {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000476 *code = v;
477 return 1;
478 }
479 incr = incr << 1;
480 if (incr > mask)
481 incr = incr ^ code_poly;
482 }
483}
484
485static const _PyUnicode_Name_CAPI hashAPI =
486{
487 sizeof(_PyUnicode_Name_CAPI),
Andrew MacIntyre74a3bec2002-06-13 11:55:14 +0000488 _getucname,
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000489 _getcode
Fredrik Lundh06d12682001-01-24 07:59:11 +0000490};
491
492/* -------------------------------------------------------------------- */
493/* Python bindings */
494
495static PyObject *
496unicodedata_name(PyObject* self, PyObject* args)
497{
498 char name[NAME_MAXLEN];
499
500 PyUnicodeObject* v;
501 PyObject* defobj = NULL;
502 if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj))
503 return NULL;
504
505 if (PyUnicode_GET_SIZE(v) != 1) {
506 PyErr_SetString(PyExc_TypeError,
507 "need a single Unicode character as parameter");
508 return NULL;
509 }
510
Andrew MacIntyre74a3bec2002-06-13 11:55:14 +0000511 if (!_getucname((Py_UCS4) *PyUnicode_AS_UNICODE(v),
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000512 name, sizeof(name))) {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000513 if (defobj == NULL) {
514 PyErr_SetString(PyExc_ValueError, "no such name");
515 return NULL;
516 }
517 else {
518 Py_INCREF(defobj);
519 return defobj;
520 }
521 }
522
523 return Py_BuildValue("s", name);
524}
525
526static PyObject *
527unicodedata_lookup(PyObject* self, PyObject* args)
528{
529 Py_UCS4 code;
530 Py_UNICODE str[1];
531
532 char* name;
533 int namelen;
534 if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen))
535 return NULL;
536
Fredrik Lundhb95896b2001-02-18 22:06:17 +0000537 if (!_getcode(name, namelen, &code)) {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000538 PyErr_SetString(PyExc_KeyError, "undefined character name");
539 return NULL;
540 }
541
542 str[0] = (Py_UNICODE) code;
543 return PyUnicode_FromUnicode(str, 1);
544}
545
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000546/* XXX Add doc strings. */
547
548static PyMethodDef unicodedata_functions[] = {
Fredrik Lundh06d12682001-01-24 07:59:11 +0000549 {"decimal", unicodedata_decimal, METH_VARARGS},
550 {"digit", unicodedata_digit, METH_VARARGS},
551 {"numeric", unicodedata_numeric, METH_VARARGS},
552 {"category", unicodedata_category, METH_VARARGS},
553 {"bidirectional", unicodedata_bidirectional, METH_VARARGS},
554 {"combining", unicodedata_combining, METH_VARARGS},
555 {"mirrored", unicodedata_mirrored, METH_VARARGS},
556 {"decomposition",unicodedata_decomposition, METH_VARARGS},
557 {"name", unicodedata_name, METH_VARARGS},
558 {"lookup", unicodedata_lookup, METH_VARARGS},
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000559 {NULL, NULL} /* sentinel */
560};
561
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000562PyDoc_STRVAR(unicodedata_docstring, "unicode character database");
Fredrik Lundh06d12682001-01-24 07:59:11 +0000563
Mark Hammond62b1ab12002-07-23 06:31:15 +0000564PyMODINIT_FUNC
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000565initunicodedata(void)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000566{
Fred Drakea2bd8d32002-04-03 21:39:26 +0000567 PyObject *m, *v;
Fredrik Lundh06d12682001-01-24 07:59:11 +0000568
Fred Drakef585bef2001-03-03 19:41:55 +0000569 m = Py_InitModule3(
570 "unicodedata", unicodedata_functions, unicodedata_docstring);
Fredrik Lundh06d12682001-01-24 07:59:11 +0000571 if (!m)
572 return;
573
Fredrik Lundh06d12682001-01-24 07:59:11 +0000574 /* Export C API */
575 v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL);
Fred Drakea2bd8d32002-04-03 21:39:26 +0000576 if (v != NULL)
577 PyModule_AddObject(m, "ucnhash_CAPI", v);
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000578}
Martin v. Löwis7d41e292002-11-23 12:22:32 +0000579
580/*
581Local variables:
582c-basic-offset: 4
583End:
584*/