| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 1 | /* ------------------------------------------------------------------------ | 
 | 2 |  | 
 | 3 |    unicodedata -- Provides access to the Unicode 3.0 data base. | 
 | 4 |  | 
 | 5 |    Data was extracted from the Unicode 3.0 UnicodeData.txt file. | 
 | 6 |  | 
 | 7 | Written by Marc-Andre Lemburg (mal@lemburg.com). | 
 | 8 |  | 
 | 9 | (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | 
 | 10 |  | 
 | 11 |    ------------------------------------------------------------------------ */ | 
 | 12 |  | 
 | 13 | #include "Python.h" | 
 | 14 | #include "unicodedatabase.h" | 
 | 15 |  | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 16 | /* --- Helpers ------------------------------------------------------------ */ | 
 | 17 |  | 
 | 18 | static  | 
 | 19 | const _PyUnicode_DatabaseRecord *unicode_db(register int i) | 
 | 20 | { | 
 | 21 |     register int page = i >> 12; | 
 | 22 |      | 
 | 23 |     if (page < sizeof(_PyUnicode_Database)) | 
 | 24 | 	return &_PyUnicode_Database[page][i & 0x0fff]; | 
 | 25 |     return &_PyUnicode_Database[0][0]; | 
 | 26 | } | 
 | 27 |  | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 28 | /* --- Module API --------------------------------------------------------- */ | 
 | 29 |  | 
 | 30 | static PyObject * | 
 | 31 | unicodedata_decimal(PyObject *self, | 
 | 32 | 		    PyObject *args) | 
 | 33 | { | 
 | 34 |     PyUnicodeObject *v; | 
 | 35 |     PyObject *defobj = NULL; | 
 | 36 |     long rc; | 
 | 37 |  | 
 | 38 |     if (!PyArg_ParseTuple(args, "O!|O:decimal", | 
 | 39 | 			  &PyUnicode_Type, &v, &defobj)) | 
 | 40 | 	goto onError; | 
 | 41 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 42 | 	PyErr_SetString(PyExc_TypeError, | 
 | 43 | 			"need a single Unicode character as parameter"); | 
 | 44 | 	goto onError; | 
 | 45 |     } | 
 | 46 |     rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v)); | 
 | 47 |     if (rc < 0) { | 
 | 48 | 	if (defobj == NULL) { | 
 | 49 | 	    PyErr_SetString(PyExc_ValueError, | 
 | 50 | 			    "not a decimal"); | 
 | 51 | 	    goto onError; | 
 | 52 | 	} | 
 | 53 | 	else { | 
 | 54 | 	    Py_INCREF(defobj); | 
 | 55 | 	    return defobj; | 
 | 56 | 	} | 
 | 57 |     } | 
 | 58 |     return PyInt_FromLong(rc); | 
 | 59 |      | 
 | 60 |  onError: | 
 | 61 |     return NULL; | 
 | 62 | } | 
 | 63 |  | 
 | 64 | static PyObject * | 
 | 65 | unicodedata_digit(PyObject *self, | 
 | 66 | 		  PyObject *args) | 
 | 67 | { | 
 | 68 |     PyUnicodeObject *v; | 
 | 69 |     PyObject *defobj = NULL; | 
 | 70 |     long rc; | 
 | 71 |  | 
 | 72 |     if (!PyArg_ParseTuple(args, "O!|O:digit", | 
 | 73 | 			  &PyUnicode_Type, &v, &defobj)) | 
 | 74 | 	goto onError; | 
 | 75 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 76 | 	PyErr_SetString(PyExc_TypeError, | 
 | 77 | 			"need a single Unicode character as parameter"); | 
 | 78 | 	goto onError; | 
 | 79 |     } | 
 | 80 |     rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v)); | 
 | 81 |     if (rc < 0) { | 
 | 82 | 	if (defobj == NULL) { | 
 | 83 | 	    PyErr_SetString(PyExc_ValueError, | 
 | 84 | 			    "not a digit"); | 
 | 85 | 	    goto onError; | 
 | 86 | 	} | 
 | 87 | 	else { | 
 | 88 | 	    Py_INCREF(defobj); | 
 | 89 | 	    return defobj; | 
 | 90 | 	} | 
 | 91 |     } | 
 | 92 |     return PyInt_FromLong(rc); | 
 | 93 |      | 
 | 94 |  onError: | 
 | 95 |     return NULL; | 
 | 96 | } | 
 | 97 |  | 
 | 98 | static PyObject * | 
 | 99 | unicodedata_numeric(PyObject *self, | 
 | 100 | 		    PyObject *args) | 
 | 101 | { | 
 | 102 |     PyUnicodeObject *v; | 
 | 103 |     PyObject *defobj = NULL; | 
 | 104 |     double rc; | 
 | 105 |  | 
 | 106 |     if (!PyArg_ParseTuple(args, "O!|O:numeric", | 
 | 107 | 			  &PyUnicode_Type, &v, &defobj)) | 
 | 108 | 	goto onError; | 
 | 109 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 110 | 	PyErr_SetString(PyExc_TypeError, | 
 | 111 | 			"need a single Unicode character as parameter"); | 
 | 112 | 	goto onError; | 
 | 113 |     } | 
 | 114 |     rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v)); | 
 | 115 |     if (rc < 0) { | 
 | 116 | 	if (defobj == NULL) { | 
 | 117 | 	    PyErr_SetString(PyExc_ValueError, | 
 | 118 | 			    "not a numeric character"); | 
 | 119 | 	    goto onError; | 
 | 120 | 	} | 
 | 121 | 	else { | 
 | 122 | 	    Py_INCREF(defobj); | 
 | 123 | 	    return defobj; | 
 | 124 | 	} | 
 | 125 |     } | 
 | 126 |     return PyFloat_FromDouble(rc); | 
 | 127 |      | 
 | 128 |  onError: | 
 | 129 |     return NULL; | 
 | 130 | } | 
 | 131 |  | 
 | 132 | static PyObject * | 
 | 133 | unicodedata_category(PyObject *self, | 
 | 134 | 		     PyObject *args) | 
 | 135 | { | 
 | 136 |     PyUnicodeObject *v; | 
 | 137 |     int index; | 
 | 138 |  | 
 | 139 |     if (!PyArg_ParseTuple(args, "O!:category", | 
 | 140 | 			  &PyUnicode_Type, &v)) | 
 | 141 | 	goto onError; | 
 | 142 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 143 | 	PyErr_SetString(PyExc_TypeError, | 
 | 144 | 			"need a single Unicode character as parameter"); | 
 | 145 | 	goto onError; | 
 | 146 |     } | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 147 |     index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->category; | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 148 |     if (index < 0 ||  | 
 | 149 | 	index > sizeof(_PyUnicode_CategoryNames) /  | 
 | 150 | 	        sizeof(_PyUnicode_CategoryNames[0])) { | 
 | 151 | 	PyErr_Format(PyExc_SystemError, | 
 | 152 | 		     "category index out of range: %i", | 
 | 153 | 		     index); | 
 | 154 | 	goto onError; | 
 | 155 |     } | 
 | 156 |     return PyString_FromString(_PyUnicode_CategoryNames[index]); | 
 | 157 |      | 
 | 158 |  onError: | 
 | 159 |     return NULL; | 
 | 160 | } | 
 | 161 |  | 
 | 162 | static PyObject * | 
 | 163 | unicodedata_bidirectional(PyObject *self, | 
 | 164 | 			  PyObject *args) | 
 | 165 | { | 
 | 166 |     PyUnicodeObject *v; | 
 | 167 |     int index; | 
 | 168 |  | 
 | 169 |     if (!PyArg_ParseTuple(args, "O!:bidirectional", | 
 | 170 | 			  &PyUnicode_Type, &v)) | 
 | 171 | 	goto onError; | 
 | 172 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 173 | 	PyErr_SetString(PyExc_TypeError, | 
 | 174 | 			"need a single Unicode character as parameter"); | 
 | 175 | 	goto onError; | 
 | 176 |     } | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 177 |     index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->bidirectional; | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 178 |     if (index < 0 ||  | 
 | 179 | 	index > sizeof(_PyUnicode_CategoryNames) /  | 
 | 180 | 	        sizeof(_PyUnicode_CategoryNames[0])) { | 
 | 181 | 	PyErr_Format(PyExc_SystemError, | 
 | 182 | 		     "bidirectional index out of range: %i", | 
 | 183 | 		     index); | 
 | 184 | 	goto onError; | 
 | 185 |     } | 
 | 186 |     return PyString_FromString(_PyUnicode_BidirectionalNames[index]); | 
 | 187 |      | 
 | 188 |  onError: | 
 | 189 |     return NULL; | 
 | 190 | } | 
 | 191 |  | 
 | 192 | static PyObject * | 
 | 193 | unicodedata_combining(PyObject *self, | 
 | 194 | 		      PyObject *args) | 
 | 195 | { | 
 | 196 |     PyUnicodeObject *v; | 
 | 197 |     int value; | 
 | 198 |  | 
 | 199 |     if (!PyArg_ParseTuple(args, "O!:combining", | 
 | 200 | 			  &PyUnicode_Type, &v)) | 
 | 201 | 	goto onError; | 
 | 202 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 203 | 	PyErr_SetString(PyExc_TypeError, | 
 | 204 | 			"need a single Unicode character as parameter"); | 
 | 205 | 	goto onError; | 
 | 206 |     } | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 207 |     value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->combining; | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 208 |     return PyInt_FromLong(value); | 
 | 209 |      | 
 | 210 |  onError: | 
 | 211 |     return NULL; | 
 | 212 | } | 
 | 213 |  | 
 | 214 | static PyObject * | 
 | 215 | unicodedata_mirrored(PyObject *self, | 
 | 216 | 		     PyObject *args) | 
 | 217 | { | 
 | 218 |     PyUnicodeObject *v; | 
 | 219 |     int value; | 
 | 220 |  | 
 | 221 |     if (!PyArg_ParseTuple(args, "O!:mirrored", | 
 | 222 | 			  &PyUnicode_Type, &v)) | 
 | 223 | 	goto onError; | 
 | 224 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 225 | 	PyErr_SetString(PyExc_TypeError, | 
 | 226 | 			"need a single Unicode character as parameter"); | 
 | 227 | 	goto onError; | 
 | 228 |     } | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 229 |     value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->mirrored; | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 230 |     return PyInt_FromLong(value); | 
 | 231 |      | 
 | 232 |  onError: | 
 | 233 |     return NULL; | 
 | 234 | } | 
 | 235 |  | 
 | 236 | static PyObject * | 
 | 237 | unicodedata_decomposition(PyObject *self, | 
 | 238 | 		      PyObject *args) | 
 | 239 | { | 
 | 240 |     PyUnicodeObject *v; | 
 | 241 |     const char *value; | 
 | 242 |  | 
 | 243 |     if (!PyArg_ParseTuple(args, "O!:decomposition", | 
 | 244 | 			  &PyUnicode_Type, &v)) | 
 | 245 | 	goto onError; | 
 | 246 |     if (PyUnicode_GET_SIZE(v) != 1) { | 
 | 247 | 	PyErr_SetString(PyExc_TypeError, | 
 | 248 | 			"need a single Unicode character as parameter"); | 
 | 249 | 	goto onError; | 
 | 250 |     } | 
| Guido van Rossum | 8a16054 | 2000-03-31 17:26:12 +0000 | [diff] [blame^] | 251 |     value = unicode_db((int)*PyUnicode_AS_UNICODE(v))->decomposition; | 
| Guido van Rossum | 2a70a3a | 2000-03-10 23:10:21 +0000 | [diff] [blame] | 252 |     if (value == NULL) | 
 | 253 | 	return PyString_FromString(""); | 
 | 254 |     else | 
 | 255 | 	return PyString_FromString(value); | 
 | 256 |      | 
 | 257 |  onError: | 
 | 258 |     return NULL; | 
 | 259 | } | 
 | 260 |  | 
 | 261 | /* XXX Add doc strings. */ | 
 | 262 |  | 
 | 263 | static PyMethodDef unicodedata_functions[] = { | 
 | 264 |     {"decimal",		unicodedata_decimal,			1}, | 
 | 265 |     {"digit",		unicodedata_digit,			1}, | 
 | 266 |     {"numeric",		unicodedata_numeric,			1}, | 
 | 267 |     {"category",	unicodedata_category,			1}, | 
 | 268 |     {"bidirectional",	unicodedata_bidirectional,		1}, | 
 | 269 |     {"combining",	unicodedata_combining,			1}, | 
 | 270 |     {"mirrored",	unicodedata_mirrored,			1}, | 
 | 271 |     {"decomposition",	unicodedata_decomposition,		1}, | 
 | 272 |     {NULL, NULL}		/* sentinel */ | 
 | 273 | }; | 
 | 274 |  | 
 | 275 | DL_EXPORT(void) | 
 | 276 | initunicodedata() | 
 | 277 | { | 
 | 278 |     Py_InitModule("unicodedata", unicodedata_functions); | 
 | 279 | } |