blob: 36c6566ad426bd6403ee26c3bfa41f520770ad65 [file] [log] [blame]
Guido van Rossum2a70a3a2000-03-10 23:10:21 +00001/* ------------------------------------------------------------------------
2
3 unicodedata -- Provides access to the Unicode 3.0 data base.
4
5 Data was extracted from the Unicode 3.0 UnicodeData.txt file.
6
7Written by Marc-Andre Lemburg (mal@lemburg.com).
8
Guido van Rossum16b1ad92000-08-03 16:24:25 +00009Copyright (c) Corporation for National Research Initiatives.
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000010
11 ------------------------------------------------------------------------ */
12
13#include "Python.h"
14#include "unicodedatabase.h"
15
Guido van Rossum8a160542000-03-31 17:26:12 +000016/* --- Helpers ------------------------------------------------------------ */
17
18static
19const _PyUnicode_DatabaseRecord *unicode_db(register int i)
20{
21 register int page = i >> 12;
22
23 if (page < sizeof(_PyUnicode_Database))
24 return &_PyUnicode_Database[page][i & 0x0fff];
25 return &_PyUnicode_Database[0][0];
26}
27
Guido van Rossum2a70a3a2000-03-10 23:10:21 +000028/* --- Module API --------------------------------------------------------- */
29
30static PyObject *
31unicodedata_decimal(PyObject *self,
32 PyObject *args)
33{
34 PyUnicodeObject *v;
35 PyObject *defobj = NULL;
36 long rc;
37
38 if (!PyArg_ParseTuple(args, "O!|O:decimal",
39 &PyUnicode_Type, &v, &defobj))
40 goto onError;
41 if (PyUnicode_GET_SIZE(v) != 1) {
42 PyErr_SetString(PyExc_TypeError,
43 "need a single Unicode character as parameter");
44 goto onError;
45 }
46 rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v));
47 if (rc < 0) {
48 if (defobj == NULL) {
49 PyErr_SetString(PyExc_ValueError,
50 "not a decimal");
51 goto onError;
52 }
53 else {
54 Py_INCREF(defobj);
55 return defobj;
56 }
57 }
58 return PyInt_FromLong(rc);
59
60 onError:
61 return NULL;
62}
63
64static PyObject *
65unicodedata_digit(PyObject *self,
66 PyObject *args)
67{
68 PyUnicodeObject *v;
69 PyObject *defobj = NULL;
70 long rc;
71
72 if (!PyArg_ParseTuple(args, "O!|O:digit",
73 &PyUnicode_Type, &v, &defobj))
74 goto onError;
75 if (PyUnicode_GET_SIZE(v) != 1) {
76 PyErr_SetString(PyExc_TypeError,
77 "need a single Unicode character as parameter");
78 goto onError;
79 }
80 rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
81 if (rc < 0) {
82 if (defobj == NULL) {
83 PyErr_SetString(PyExc_ValueError,
84 "not a digit");
85 goto onError;
86 }
87 else {
88 Py_INCREF(defobj);
89 return defobj;
90 }
91 }
92 return PyInt_FromLong(rc);
93
94 onError:
95 return NULL;
96}
97
98static PyObject *
99unicodedata_numeric(PyObject *self,
100 PyObject *args)
101{
102 PyUnicodeObject *v;
103 PyObject *defobj = NULL;
104 double rc;
105
106 if (!PyArg_ParseTuple(args, "O!|O:numeric",
107 &PyUnicode_Type, &v, &defobj))
108 goto onError;
109 if (PyUnicode_GET_SIZE(v) != 1) {
110 PyErr_SetString(PyExc_TypeError,
111 "need a single Unicode character as parameter");
112 goto onError;
113 }
114 rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v));
115 if (rc < 0) {
116 if (defobj == NULL) {
117 PyErr_SetString(PyExc_ValueError,
118 "not a numeric character");
119 goto onError;
120 }
121 else {
122 Py_INCREF(defobj);
123 return defobj;
124 }
125 }
126 return PyFloat_FromDouble(rc);
127
128 onError:
129 return NULL;
130}
131
132static PyObject *
133unicodedata_category(PyObject *self,
134 PyObject *args)
135{
136 PyUnicodeObject *v;
137 int index;
138
139 if (!PyArg_ParseTuple(args, "O!:category",
140 &PyUnicode_Type, &v))
141 goto onError;
142 if (PyUnicode_GET_SIZE(v) != 1) {
143 PyErr_SetString(PyExc_TypeError,
144 "need a single Unicode character as parameter");
145 goto onError;
146 }
Guido van Rossum8a160542000-03-31 17:26:12 +0000147 index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->category;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000148 if (index < 0 ||
149 index > sizeof(_PyUnicode_CategoryNames) /
150 sizeof(_PyUnicode_CategoryNames[0])) {
151 PyErr_Format(PyExc_SystemError,
152 "category index out of range: %i",
153 index);
154 goto onError;
155 }
156 return PyString_FromString(_PyUnicode_CategoryNames[index]);
157
158 onError:
159 return NULL;
160}
161
162static PyObject *
163unicodedata_bidirectional(PyObject *self,
164 PyObject *args)
165{
166 PyUnicodeObject *v;
167 int index;
168
169 if (!PyArg_ParseTuple(args, "O!:bidirectional",
170 &PyUnicode_Type, &v))
171 goto onError;
172 if (PyUnicode_GET_SIZE(v) != 1) {
173 PyErr_SetString(PyExc_TypeError,
174 "need a single Unicode character as parameter");
175 goto onError;
176 }
Guido van Rossum8a160542000-03-31 17:26:12 +0000177 index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->bidirectional;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000178 if (index < 0 ||
179 index > sizeof(_PyUnicode_CategoryNames) /
180 sizeof(_PyUnicode_CategoryNames[0])) {
181 PyErr_Format(PyExc_SystemError,
182 "bidirectional index out of range: %i",
183 index);
184 goto onError;
185 }
186 return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
187
188 onError:
189 return NULL;
190}
191
192static PyObject *
193unicodedata_combining(PyObject *self,
194 PyObject *args)
195{
196 PyUnicodeObject *v;
197 int value;
198
199 if (!PyArg_ParseTuple(args, "O!:combining",
200 &PyUnicode_Type, &v))
201 goto onError;
202 if (PyUnicode_GET_SIZE(v) != 1) {
203 PyErr_SetString(PyExc_TypeError,
204 "need a single Unicode character as parameter");
205 goto onError;
206 }
Guido van Rossum8a160542000-03-31 17:26:12 +0000207 value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->combining;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000208 return PyInt_FromLong(value);
209
210 onError:
211 return NULL;
212}
213
214static PyObject *
215unicodedata_mirrored(PyObject *self,
216 PyObject *args)
217{
218 PyUnicodeObject *v;
219 int value;
220
221 if (!PyArg_ParseTuple(args, "O!:mirrored",
222 &PyUnicode_Type, &v))
223 goto onError;
224 if (PyUnicode_GET_SIZE(v) != 1) {
225 PyErr_SetString(PyExc_TypeError,
226 "need a single Unicode character as parameter");
227 goto onError;
228 }
Guido van Rossum8a160542000-03-31 17:26:12 +0000229 value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->mirrored;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000230 return PyInt_FromLong(value);
231
232 onError:
233 return NULL;
234}
235
236static PyObject *
237unicodedata_decomposition(PyObject *self,
238 PyObject *args)
239{
240 PyUnicodeObject *v;
241 const char *value;
242
243 if (!PyArg_ParseTuple(args, "O!:decomposition",
244 &PyUnicode_Type, &v))
245 goto onError;
246 if (PyUnicode_GET_SIZE(v) != 1) {
247 PyErr_SetString(PyExc_TypeError,
248 "need a single Unicode character as parameter");
249 goto onError;
250 }
Guido van Rossum8a160542000-03-31 17:26:12 +0000251 value = unicode_db((int)*PyUnicode_AS_UNICODE(v))->decomposition;
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000252 if (value == NULL)
253 return PyString_FromString("");
254 else
255 return PyString_FromString(value);
256
257 onError:
258 return NULL;
259}
260
261/* XXX Add doc strings. */
262
263static PyMethodDef unicodedata_functions[] = {
264 {"decimal", unicodedata_decimal, 1},
265 {"digit", unicodedata_digit, 1},
266 {"numeric", unicodedata_numeric, 1},
267 {"category", unicodedata_category, 1},
268 {"bidirectional", unicodedata_bidirectional, 1},
269 {"combining", unicodedata_combining, 1},
270 {"mirrored", unicodedata_mirrored, 1},
271 {"decomposition", unicodedata_decomposition, 1},
272 {NULL, NULL} /* sentinel */
273};
274
275DL_EXPORT(void)
Thomas Woutersf3f33dc2000-07-21 06:00:07 +0000276initunicodedata(void)
Guido van Rossum2a70a3a2000-03-10 23:10:21 +0000277{
278 Py_InitModule("unicodedata", unicodedata_functions);
279}