Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 1 | /* |
| 2 | * _iconv_codec.c |
| 3 | * |
| 4 | * libiconv adaptor for Python iconvcodec |
| 5 | * |
| 6 | * Author : Hye-Shik Chang <perky@FreeBSD.org> |
| 7 | * Created : 17 January 2003 |
| 8 | */ |
| 9 | |
| 10 | #include "Python.h" |
| 11 | #include <string.h> |
| 12 | #include <iconv.h> |
| 13 | |
| 14 | static const char *__version__ = "$Revision$"; |
| 15 | |
| 16 | #if Py_USING_UNICODE |
| 17 | # if Py_UNICODE_SIZE == 2 |
| 18 | # ifdef __GNU_LIBRARY__ |
| 19 | # define UNICODE_ENCODING "ucs-2" |
| 20 | # else |
| 21 | # define UNICODE_ENCODING "ucs-2-internal" |
| 22 | # endif |
| 23 | # define MBENCODED_LENGTH_MAX 4 |
| 24 | # elif Py_UNICODE_SIZE == 4 |
| 25 | # ifdef __GNU_LIBRARY__ |
| 26 | # define UNICODE_ENCODING "ucs-4" |
| 27 | # else |
| 28 | # define UNICODE_ENCODING "ucs-4-internal" |
| 29 | # endif |
| 30 | # define MBENCODED_LENGTH_MAX 6 |
| 31 | # endif |
| 32 | #else |
| 33 | # error "Unicode is not available" |
| 34 | #endif |
| 35 | |
| 36 | typedef struct { |
| 37 | PyObject_HEAD |
| 38 | iconv_t enchdl, dechdl; |
| 39 | char *encoding; |
| 40 | } iconvcodecObject; |
| 41 | PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object"); |
| 42 | |
| 43 | staticforward PyTypeObject iconvcodec_Type; |
| 44 | |
| 45 | |
| 46 | #define ERROR_STRICT (PyObject *)(1) |
| 47 | #define ERROR_IGNORE (PyObject *)(2) |
| 48 | #define ERROR_REPLACE (PyObject *)(3) |
| 49 | #define ERROR_MAX ERROR_REPLACE |
| 50 | |
| 51 | #define REPLACEMENT_CHAR_DECODE 0xFFFD |
| 52 | #define REPLACEMENT_CHAR_ENCODE '?' |
| 53 | |
| 54 | #define DEFAULT_ENCODING "utf-8" |
| 55 | |
| 56 | |
| 57 | static PyObject * |
| 58 | get_errorcallback(const char *errors) |
| 59 | { |
| 60 | if (errors == NULL || strcmp(errors, "strict") == 0) |
| 61 | return ERROR_STRICT; |
| 62 | else if (strcmp(errors, "ignore") == 0) |
| 63 | return ERROR_IGNORE; |
| 64 | else if (strcmp(errors, "replace") == 0) |
| 65 | return ERROR_REPLACE; |
| 66 | else |
| 67 | return PyCodec_LookupError(errors); |
| 68 | } |
| 69 | |
| 70 | |
| 71 | PyDoc_STRVAR(iconvcodec_encode__doc__, |
| 72 | "I.encode(unicode, [,errors]) -> (string, length consumed)\n\ |
| 73 | \n\ |
| 74 | Return an encoded string version of `unicode'. errors may be given to\n\ |
| 75 | set a different error handling scheme. Default is 'strict' meaning that\n\ |
| 76 | encoding errors raise a UnicodeEncodeError. Other possible values are\n\ |
| 77 | 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ |
| 78 | registered with codecs.register_error that can handle UnicodeEncodeErrors."); |
| 79 | |
| 80 | static PyObject * |
| 81 | iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 82 | { |
| 83 | static char *kwlist[] = { "input", "errors", NULL }; |
| 84 | Py_UNICODE *input; |
| 85 | int inputlen; |
| 86 | char *errors = NULL/*strict*/, *out, *out_top; |
| 87 | const char *inp, *inp_top; |
| 88 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 89 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 90 | *exceptionobj = NULL; |
| 91 | |
| 92 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode", |
| 93 | kwlist, &input, &inputlen, &errors)) |
| 94 | return NULL; /* TypeError */ |
| 95 | |
| 96 | errorcb = get_errorcallback(errors); |
| 97 | if (errorcb == NULL) |
| 98 | return NULL; /* LookupError or something else from error handler */ |
| 99 | |
| 100 | inp = inp_top = (char *)input; |
| 101 | inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE); |
| 102 | |
| 103 | outlen = inputlen * MBENCODED_LENGTH_MAX; |
| 104 | if (outlen < 16) |
| 105 | outlen = 16; /* for iso-2022 codecs */ |
| 106 | |
| 107 | outputobj = PyString_FromStringAndSize(NULL, outlen); |
| 108 | if (outputobj == NULL) |
| 109 | return NULL; |
| 110 | out = out_top = PyString_AS_STRING(outputobj); |
| 111 | outlen_total = outlen; |
| 112 | |
| 113 | estep = inputlen * Py_UNICODE_SIZE / 2; |
| 114 | |
| 115 | #define RESIZE_OUTBUFFER(size) { \ |
| 116 | size_t toadd = (size); \ |
| 117 | outlen_total += toadd; \ |
| 118 | outlen += toadd; \ |
| 119 | if (_PyString_Resize(&outputobj, outlen_total) == -1) \ |
| 120 | goto errorexit; \ |
| 121 | out = PyString_AS_STRING(outputobj) + (out - out_top); \ |
| 122 | out_top = PyString_AS_STRING(outputobj); \ |
| 123 | } |
| 124 | while (inplen > 0) { |
Neal Norwitz | 57c115c | 2003-01-26 16:26:20 +0000 | [diff] [blame] | 125 | if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == -1) { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 126 | char reason[128]; |
| 127 | int errpos; |
| 128 | |
| 129 | if (errno == E2BIG) { |
| 130 | RESIZE_OUTBUFFER(estep); |
| 131 | continue; |
| 132 | } |
| 133 | |
| 134 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 135 | inplen -= Py_UNICODE_SIZE; |
| 136 | inp += Py_UNICODE_SIZE; |
| 137 | if (errorcb == ERROR_REPLACE) { |
| 138 | if (outlen < 1) |
| 139 | RESIZE_OUTBUFFER(errno == EINVAL ? 1 : estep); |
| 140 | outlen--; |
| 141 | *out++ = REPLACEMENT_CHAR_ENCODE; |
| 142 | } |
| 143 | if (errno == EINVAL) break; |
| 144 | else continue; |
| 145 | } |
| 146 | |
| 147 | errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE; |
| 148 | sprintf(reason, "Undefined character map from " |
| 149 | #if Py_UNICODE_SIZE == 2 |
| 150 | "\\u%04x" |
| 151 | #elif Py_UNICODE_SIZE == 4 |
| 152 | "\\u%08x" |
| 153 | #endif |
| 154 | , *(Py_UNICODE *)inp); |
| 155 | |
| 156 | if (exceptionobj == NULL) { |
| 157 | if ((exceptionobj = PyUnicodeEncodeError_Create( |
| 158 | self->encoding, input, inputlen, |
| 159 | errpos, errpos + 1, reason)) == NULL) |
| 160 | goto errorexit; |
| 161 | } else { |
| 162 | if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0) |
| 163 | goto errorexit; |
| 164 | if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 165 | goto errorexit; |
| 166 | if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0) |
| 167 | goto errorexit; |
| 168 | } |
| 169 | |
| 170 | if (errorcb == ERROR_STRICT) { |
| 171 | PyCodec_StrictErrors(exceptionobj); |
| 172 | goto errorexit; |
| 173 | } else { |
| 174 | PyObject *argsobj, *retobj, *retuni; |
| 175 | long newpos; |
| 176 | |
| 177 | argsobj = PyTuple_New(1); |
| 178 | if (argsobj == NULL) |
| 179 | goto errorexit; |
| 180 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 181 | Py_INCREF(exceptionobj); |
| 182 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 183 | Py_DECREF(argsobj); |
| 184 | if (retobj == NULL) |
| 185 | goto errorexit; |
| 186 | |
| 187 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 188 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 189 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 190 | Py_DECREF(retobj); |
| 191 | PyErr_SetString(PyExc_ValueError, "encoding error handler " |
| 192 | "must return (unicode, int) tuple"); |
| 193 | goto errorexit; |
| 194 | } |
| 195 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 196 | #define errorexit errorexit_cbpad |
| 197 | PyObject *retstr = NULL; |
| 198 | int retstrsize; |
| 199 | |
| 200 | retstr = PyUnicode_AsEncodedString( |
| 201 | retuni, self->encoding, NULL); |
| 202 | if (retstr == NULL || !PyString_Check(retstr)) |
| 203 | goto errorexit; |
| 204 | |
| 205 | retstrsize = PyString_GET_SIZE(retstr); |
| 206 | if (outlen < retstrsize) |
| 207 | RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep |
| 208 | ? retstrsize - outlen : estep); |
| 209 | |
| 210 | memcpy(out, PyString_AS_STRING(retstr), retstrsize); |
| 211 | out += retstrsize; |
| 212 | outlen -= retstrsize; |
| 213 | #undef errorexit |
| 214 | if (0) { |
| 215 | errorexit_cbpad: Py_XDECREF(retobj); |
| 216 | Py_XDECREF(retstr); |
| 217 | goto errorexit; |
| 218 | } |
| 219 | Py_DECREF(retstr); |
| 220 | } |
| 221 | |
| 222 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 223 | Py_DECREF(retobj); |
| 224 | |
| 225 | if (newpos < 0) |
| 226 | newpos = inputlen - newpos; |
| 227 | if (newpos < 0 || newpos >= inputlen) |
| 228 | break; |
| 229 | inp = inp_top + Py_UNICODE_SIZE * newpos; |
| 230 | inplen = inplen_total - Py_UNICODE_SIZE * newpos; |
| 231 | } |
| 232 | } else |
| 233 | break; |
| 234 | } |
| 235 | #undef RESIZE_OUTBUFFER |
| 236 | |
| 237 | { |
| 238 | PyObject *rettup; |
| 239 | int finalsize; |
| 240 | |
| 241 | finalsize = (int)(out - out_top); |
| 242 | |
| 243 | if (finalsize != outlen_total) { |
| 244 | if (_PyString_Resize(&outputobj, finalsize) == -1) |
| 245 | goto errorexit; |
| 246 | } |
| 247 | |
| 248 | if (errorcb > ERROR_MAX) { |
| 249 | Py_DECREF(errorcb); |
| 250 | } |
| 251 | Py_XDECREF(exceptionobj); |
| 252 | |
| 253 | rettup = PyTuple_New(2); |
| 254 | if (rettup == NULL) { |
| 255 | Py_DECREF(outputobj); |
| 256 | return NULL; |
| 257 | } |
| 258 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 259 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen)); |
| 260 | return rettup; |
| 261 | } |
| 262 | |
| 263 | errorexit: |
| 264 | Py_XDECREF(outputobj); |
| 265 | if (errorcb > ERROR_MAX) { |
| 266 | Py_DECREF(errorcb); |
| 267 | } |
| 268 | Py_XDECREF(exceptionobj); |
| 269 | |
| 270 | return NULL; |
| 271 | } |
| 272 | |
| 273 | PyDoc_STRVAR(iconvcodec_decode__doc__, |
| 274 | "I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ |
| 275 | \n\ |
| 276 | Decodes `string' using I, an iconvcodec instance. errors may be given\n\ |
| 277 | to set a different error handling scheme. Default is 'strict' meaning\n\ |
| 278 | that encoding errors raise a UnicodeDecodeError. Other possible values\n\ |
| 279 | are 'ignore' and 'replace' as well as any other name registerd with\n\ |
| 280 | codecs.register_error that is able to handle UnicodeDecodeErrors."); |
| 281 | |
| 282 | static PyObject * |
| 283 | iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 284 | { |
| 285 | static char *kwlist[] = { "input", "errors", NULL }; |
| 286 | char *errors = NULL/*strict*/, *out, *out_top; |
| 287 | const char *inp, *inp_top; |
| 288 | int inplen_int; |
| 289 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 290 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 291 | *exceptionobj = NULL; |
| 292 | |
| 293 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode", |
| 294 | kwlist, &inp, &inplen_int, &errors)) |
| 295 | return NULL; /* TypeError */ |
| 296 | |
| 297 | errorcb = get_errorcallback(errors); |
| 298 | if (errorcb == NULL) |
| 299 | return NULL; /* LookupError or something else from error handler */ |
| 300 | |
| 301 | inp_top = inp; |
| 302 | inplen_total = inplen = (size_t)inplen_int; |
| 303 | |
| 304 | outputobj = PyUnicode_FromUnicode(NULL, inplen); |
| 305 | if (outputobj == NULL) |
| 306 | return NULL; |
| 307 | outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj); |
| 308 | out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj); |
| 309 | |
| 310 | estep = outlen / 2; |
| 311 | |
| 312 | #define RESIZE_OUTBUFFER(size) { \ |
| 313 | size_t toadd = (size); \ |
| 314 | outlen_total += toadd; \ |
| 315 | outlen += toadd; \ |
| 316 | if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1) \ |
| 317 | goto errorexit; \ |
| 318 | out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top); \ |
| 319 | out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \ |
| 320 | } |
| 321 | while (inplen > 0) { |
Neal Norwitz | 57c115c | 2003-01-26 16:26:20 +0000 | [diff] [blame] | 322 | if (iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen) == -1) { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 323 | char reason[128], *reasonpos = (char *)reason; |
| 324 | int errpos; |
| 325 | |
| 326 | if (errno == E2BIG) { |
| 327 | RESIZE_OUTBUFFER(estep); |
| 328 | continue; |
| 329 | } |
| 330 | |
| 331 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 332 | inplen--; inp++; |
| 333 | if (errorcb == ERROR_REPLACE) { |
| 334 | Py_UNICODE *replp; |
| 335 | |
| 336 | if (outlen < Py_UNICODE_SIZE) |
| 337 | RESIZE_OUTBUFFER( |
| 338 | errno == EINVAL || Py_UNICODE_SIZE > estep |
| 339 | ? Py_UNICODE_SIZE : estep); |
| 340 | |
| 341 | /* some compilers hate casted lvalue */ |
| 342 | replp = (Py_UNICODE *)out; |
| 343 | assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */ |
| 344 | *replp = REPLACEMENT_CHAR_DECODE; |
| 345 | |
| 346 | out += Py_UNICODE_SIZE; |
| 347 | outlen -= Py_UNICODE_SIZE; |
| 348 | } |
| 349 | if (errno == EINVAL) break; |
| 350 | else continue; |
| 351 | } |
| 352 | |
| 353 | errpos = (int)(inp - inp_top); |
| 354 | reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x", |
| 355 | (unsigned char)*inp); |
| 356 | if (inplen > 1) { |
| 357 | reasonpos += sprintf(reasonpos, |
| 358 | "\\x%02x", (unsigned char)*(inp+1)); |
| 359 | if (inplen > 2) |
| 360 | sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2)); |
| 361 | } |
| 362 | |
| 363 | if (exceptionobj == NULL) { |
| 364 | exceptionobj = PyUnicodeDecodeError_Create( |
| 365 | self->encoding, inp_top, inplen_total, |
| 366 | errpos, errpos + 1, reason); |
| 367 | if (exceptionobj == NULL) |
| 368 | goto errorexit; |
| 369 | } else { |
| 370 | if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0) |
| 371 | goto errorexit; |
| 372 | if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 373 | goto errorexit; |
| 374 | if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0) |
| 375 | goto errorexit; |
| 376 | } |
| 377 | |
| 378 | if (errorcb == ERROR_STRICT) { |
| 379 | PyCodec_StrictErrors(exceptionobj); |
| 380 | goto errorexit; |
| 381 | } else { |
| 382 | PyObject *argsobj, *retobj, *retuni; |
| 383 | long newpos; |
| 384 | |
| 385 | argsobj = PyTuple_New(1); |
| 386 | if (argsobj == NULL) |
| 387 | goto errorexit; |
| 388 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 389 | Py_INCREF(exceptionobj); |
| 390 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 391 | Py_DECREF(argsobj); |
| 392 | if (retobj == NULL) |
| 393 | goto errorexit; |
| 394 | |
| 395 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 396 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 397 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 398 | Py_DECREF(retobj); |
| 399 | PyErr_SetString(PyExc_ValueError, "decoding error handler " |
| 400 | "must return (unicode, int) tuple"); |
| 401 | goto errorexit; |
| 402 | } |
| 403 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 404 | #define errorexit errorexit_cbpad |
| 405 | size_t retunisize; |
| 406 | |
| 407 | retunisize = PyUnicode_GET_DATA_SIZE(retuni); |
| 408 | if (outlen < retunisize) |
| 409 | RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep |
| 410 | ? retunisize - outlen : estep); |
| 411 | |
| 412 | memcpy(out, PyUnicode_AS_DATA(retuni), retunisize); |
| 413 | out += retunisize; |
| 414 | outlen -= retunisize; |
| 415 | #undef errorexit |
| 416 | if (0) { |
| 417 | errorexit_cbpad: Py_DECREF(retobj); |
| 418 | goto errorexit; |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 423 | Py_DECREF(retobj); |
| 424 | |
| 425 | if (newpos < 0) |
| 426 | newpos = inplen_total - newpos; |
| 427 | if (newpos < 0 || newpos >= inplen_total) |
| 428 | break; |
| 429 | inp = inp_top + newpos; |
| 430 | inplen = inplen_total - newpos; |
| 431 | } |
| 432 | } else |
| 433 | break; |
| 434 | } |
| 435 | #undef RESIZE_OUTBUFFER |
| 436 | |
| 437 | { |
| 438 | PyObject *rettup; |
| 439 | int finalsize; |
| 440 | |
| 441 | finalsize = (int)(out - out_top); |
| 442 | if (finalsize != outlen_total) { |
| 443 | if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE) == -1) |
| 444 | goto errorexit; |
| 445 | } |
| 446 | |
| 447 | if (errorcb > ERROR_MAX) { |
| 448 | Py_DECREF(errorcb); |
| 449 | } |
| 450 | Py_XDECREF(exceptionobj); |
| 451 | |
| 452 | rettup = PyTuple_New(2); |
| 453 | if (rettup == NULL) { |
| 454 | Py_DECREF(outputobj); |
| 455 | return NULL; |
| 456 | } |
| 457 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 458 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total)); |
| 459 | return rettup; |
| 460 | } |
| 461 | |
| 462 | errorexit: |
| 463 | Py_XDECREF(outputobj); |
| 464 | if (errorcb > ERROR_MAX) { |
| 465 | Py_DECREF(errorcb); |
| 466 | } |
| 467 | Py_XDECREF(exceptionobj); |
| 468 | |
| 469 | return NULL; |
| 470 | } |
| 471 | |
| 472 | static struct PyMethodDef iconvcodec_methods[] = { |
| 473 | {"encode", (PyCFunction)iconvcodec_encode, |
| 474 | METH_VARARGS | METH_KEYWORDS, |
| 475 | iconvcodec_encode__doc__}, |
| 476 | {"decode", (PyCFunction)iconvcodec_decode, |
| 477 | METH_VARARGS | METH_KEYWORDS, |
| 478 | iconvcodec_decode__doc__}, |
| 479 | {NULL, NULL}, |
| 480 | }; |
| 481 | |
| 482 | static PyObject * |
| 483 | iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) |
| 484 | { |
| 485 | PyObject *encobj = NULL; |
| 486 | iconvcodecObject *new = NULL; |
| 487 | |
| 488 | new = (iconvcodecObject *)type->tp_alloc(type, 0); |
| 489 | if (new == NULL) |
| 490 | return NULL; |
| 491 | |
| 492 | new->encoding = NULL; |
| 493 | new->enchdl = new->dechdl = (iconv_t)(-1); |
| 494 | |
| 495 | encobj = PyObject_GetAttrString((PyObject *)new, "encoding"); |
| 496 | if (encobj == NULL) { |
| 497 | PyErr_Clear(); |
| 498 | new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING)); |
| 499 | strcpy(new->encoding, DEFAULT_ENCODING); |
| 500 | } else if (!PyString_Check(encobj)) { |
| 501 | Py_DECREF(encobj); |
| 502 | PyErr_SetString(PyExc_TypeError, |
| 503 | "`encoding' attribute must be a string."); |
| 504 | goto errorexit; |
| 505 | } else { |
| 506 | new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1); |
| 507 | strcpy(new->encoding, PyString_AS_STRING(encobj)); |
| 508 | Py_DECREF(encobj); |
| 509 | } |
| 510 | |
| 511 | new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding); |
| 512 | if (new->dechdl == (iconv_t)(-1)) { |
| 513 | PyErr_SetString(PyExc_ValueError, "unsupported decoding"); |
| 514 | goto errorexit; |
| 515 | } |
| 516 | |
| 517 | new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING); |
| 518 | if (new->enchdl == (iconv_t)(-1)) { |
| 519 | PyErr_SetString(PyExc_ValueError, "unsupported encoding"); |
| 520 | iconv_close(new->dechdl); |
| 521 | new->dechdl = (iconv_t)(-1); |
| 522 | goto errorexit; |
| 523 | } |
| 524 | |
| 525 | return (PyObject *)new; |
| 526 | |
| 527 | errorexit: |
| 528 | Py_XDECREF(new); |
| 529 | |
| 530 | return NULL; |
| 531 | } |
| 532 | |
| 533 | static void |
| 534 | iconvcodec_dealloc(iconvcodecObject *self) |
| 535 | { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 536 | if (self->enchdl != (iconv_t)-1) |
| 537 | iconv_close(self->enchdl); |
| 538 | if (self->dechdl != (iconv_t)-1) |
| 539 | iconv_close(self->dechdl); |
| 540 | if (self->encoding != NULL) |
| 541 | PyMem_Free(self->encoding); |
| 542 | |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 543 | self->ob_type->tp_free((PyObject *)self); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 544 | } |
| 545 | |
| 546 | static PyObject * |
| 547 | iconvcodec_repr(PyObject *self) |
| 548 | { |
| 549 | return PyString_FromFormat("<iconvcodec encoding='%s'>", |
| 550 | ((iconvcodecObject *)self)->encoding); |
| 551 | } |
| 552 | |
| 553 | statichere PyTypeObject iconvcodec_Type = { |
| 554 | PyObject_HEAD_INIT(&PyType_Type) |
| 555 | 0, /* Number of items for varobject */ |
| 556 | "iconvcodec", /* Name of this type */ |
| 557 | sizeof(iconvcodecObject), /* Basic object size */ |
| 558 | 0, /* Item size for varobject */ |
| 559 | (destructor)iconvcodec_dealloc, /* tp_dealloc */ |
| 560 | 0, /* tp_print */ |
| 561 | 0, /* tp_getattr */ |
| 562 | 0, /* tp_setattr */ |
| 563 | 0, /* tp_compare */ |
| 564 | iconvcodec_repr, /* tp_repr */ |
| 565 | 0, /* tp_as_number */ |
| 566 | 0, /* tp_as_sequence */ |
| 567 | 0, /* tp_as_mapping */ |
| 568 | 0, /* tp_hash */ |
| 569 | 0, /* tp_call */ |
| 570 | 0, /* tp_str */ |
| 571 | PyObject_GenericGetAttr, /* tp_getattro */ |
| 572 | 0, /* tp_setattro */ |
| 573 | 0, /* tp_as_buffer */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 574 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 575 | iconvcodec_doc, /* tp_doc */ |
| 576 | 0, /* tp_traverse */ |
| 577 | 0, /* tp_clear */ |
| 578 | 0, /* tp_richcompare */ |
| 579 | 0, /* tp_weaklistoffset */ |
| 580 | 0, /* tp_iter */ |
| 581 | 0, /* tp_iterext */ |
| 582 | iconvcodec_methods, /* tp_methods */ |
| 583 | 0, /* tp_members */ |
| 584 | 0, /* tp_getset */ |
| 585 | 0, /* tp_base */ |
| 586 | 0, /* tp_dict */ |
| 587 | 0, /* tp_descr_get */ |
| 588 | 0, /* tp_descr_set */ |
| 589 | 0, /* tp_dictoffset */ |
| 590 | 0, /* tp_init */ |
| 591 | PyType_GenericAlloc, /* tp_alloc */ |
| 592 | iconvcodec_new, /* tp_new */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 593 | PyObject_Del, /* tp_free */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 594 | }; |
| 595 | |
| 596 | static struct PyMethodDef _iconv_codec_methods[] = { |
| 597 | {NULL, NULL}, |
| 598 | }; |
| 599 | |
| 600 | void |
| 601 | init_iconv_codec(void) |
| 602 | { |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 603 | PyObject *m; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 604 | |
| 605 | m = Py_InitModule("_iconv_codec", _iconv_codec_methods); |
| 606 | |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 607 | PyModule_AddStringConstant(m, "__version__", (char*)__version__); |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 608 | Py_INCREF(&iconvcodec_Type); |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 609 | PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type)); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 610 | PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING); |
| 611 | |
| 612 | if (PyErr_Occurred()) |
| 613 | Py_FatalError("can't initialize the _iconv_codec module"); |
| 614 | } |
| 615 | |
| 616 | /* |
| 617 | * ex: ts=8 sts=4 et |
| 618 | * $Id$ |
| 619 | */ |