Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 1 | /* |
| 2 | * _iconv_codec.c |
| 3 | * |
| 4 | * libiconv adaptor for Python iconvcodec |
| 5 | * |
| 6 | * Author : Hye-Shik Chang <perky@FreeBSD.org> |
| 7 | * Created : 17 January 2003 |
| 8 | */ |
| 9 | |
| 10 | #include "Python.h" |
| 11 | #include <string.h> |
| 12 | #include <iconv.h> |
| 13 | |
| 14 | static const char *__version__ = "$Revision$"; |
| 15 | |
| 16 | #if Py_USING_UNICODE |
| 17 | # if Py_UNICODE_SIZE == 2 |
| 18 | # ifdef __GNU_LIBRARY__ |
| 19 | # define UNICODE_ENCODING "ucs-2" |
| 20 | # else |
| 21 | # define UNICODE_ENCODING "ucs-2-internal" |
| 22 | # endif |
| 23 | # define MBENCODED_LENGTH_MAX 4 |
| 24 | # elif Py_UNICODE_SIZE == 4 |
| 25 | # ifdef __GNU_LIBRARY__ |
| 26 | # define UNICODE_ENCODING "ucs-4" |
| 27 | # else |
| 28 | # define UNICODE_ENCODING "ucs-4-internal" |
| 29 | # endif |
| 30 | # define MBENCODED_LENGTH_MAX 6 |
| 31 | # endif |
| 32 | #else |
| 33 | # error "Unicode is not available" |
| 34 | #endif |
| 35 | |
| 36 | typedef struct { |
| 37 | PyObject_HEAD |
| 38 | iconv_t enchdl, dechdl; |
| 39 | char *encoding; |
| 40 | } iconvcodecObject; |
| 41 | PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object"); |
| 42 | |
| 43 | staticforward PyTypeObject iconvcodec_Type; |
| 44 | |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 45 | /* does the choosen internal encoding require |
| 46 | * byteswapping to get native endianness? |
| 47 | * 0=no, 1=yes, -1=unknown */ |
| 48 | static int byteswap = -1; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 49 | |
| 50 | #define ERROR_STRICT (PyObject *)(1) |
| 51 | #define ERROR_IGNORE (PyObject *)(2) |
| 52 | #define ERROR_REPLACE (PyObject *)(3) |
| 53 | #define ERROR_MAX ERROR_REPLACE |
| 54 | |
| 55 | #define REPLACEMENT_CHAR_DECODE 0xFFFD |
| 56 | #define REPLACEMENT_CHAR_ENCODE '?' |
| 57 | |
| 58 | #define DEFAULT_ENCODING "utf-8" |
| 59 | |
| 60 | |
| 61 | static PyObject * |
| 62 | get_errorcallback(const char *errors) |
| 63 | { |
| 64 | if (errors == NULL || strcmp(errors, "strict") == 0) |
| 65 | return ERROR_STRICT; |
| 66 | else if (strcmp(errors, "ignore") == 0) |
| 67 | return ERROR_IGNORE; |
| 68 | else if (strcmp(errors, "replace") == 0) |
| 69 | return ERROR_REPLACE; |
| 70 | else |
| 71 | return PyCodec_LookupError(errors); |
| 72 | } |
| 73 | |
| 74 | |
| 75 | PyDoc_STRVAR(iconvcodec_encode__doc__, |
| 76 | "I.encode(unicode, [,errors]) -> (string, length consumed)\n\ |
| 77 | \n\ |
| 78 | Return an encoded string version of `unicode'. errors may be given to\n\ |
| 79 | set a different error handling scheme. Default is 'strict' meaning that\n\ |
| 80 | encoding errors raise a UnicodeEncodeError. Other possible values are\n\ |
| 81 | 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ |
| 82 | registered with codecs.register_error that can handle UnicodeEncodeErrors."); |
| 83 | |
| 84 | static PyObject * |
| 85 | iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 86 | { |
| 87 | static char *kwlist[] = { "input", "errors", NULL }; |
| 88 | Py_UNICODE *input; |
| 89 | int inputlen; |
| 90 | char *errors = NULL/*strict*/, *out, *out_top; |
| 91 | const char *inp, *inp_top; |
| 92 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 93 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 94 | *exceptionobj = NULL; |
Walter Dörwald | 757246c | 2003-01-31 16:26:50 +0000 | [diff] [blame] | 95 | Py_UNICODE *swappedinput = NULL; |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 96 | int swapi; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 97 | |
| 98 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode", |
| 99 | kwlist, &input, &inputlen, &errors)) |
| 100 | return NULL; /* TypeError */ |
| 101 | |
| 102 | errorcb = get_errorcallback(errors); |
| 103 | if (errorcb == NULL) |
| 104 | return NULL; /* LookupError or something else from error handler */ |
| 105 | |
| 106 | inp = inp_top = (char *)input; |
| 107 | inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE); |
| 108 | |
| 109 | outlen = inputlen * MBENCODED_LENGTH_MAX; |
| 110 | if (outlen < 16) |
| 111 | outlen = 16; /* for iso-2022 codecs */ |
| 112 | |
| 113 | outputobj = PyString_FromStringAndSize(NULL, outlen); |
| 114 | if (outputobj == NULL) |
| 115 | return NULL; |
| 116 | out = out_top = PyString_AS_STRING(outputobj); |
| 117 | outlen_total = outlen; |
| 118 | |
| 119 | estep = inputlen * Py_UNICODE_SIZE / 2; |
| 120 | |
| 121 | #define RESIZE_OUTBUFFER(size) { \ |
| 122 | size_t toadd = (size); \ |
| 123 | outlen_total += toadd; \ |
| 124 | outlen += toadd; \ |
| 125 | if (_PyString_Resize(&outputobj, outlen_total) == -1) \ |
| 126 | goto errorexit; \ |
| 127 | out = PyString_AS_STRING(outputobj) + (out - out_top); \ |
| 128 | out_top = PyString_AS_STRING(outputobj); \ |
| 129 | } |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 130 | if (byteswap) { |
| 131 | swappedinput = PyMem_Malloc(inplen); |
| 132 | if (swappedinput == NULL) |
| 133 | return NULL; |
| 134 | for (swapi = 0; swapi<inputlen; ++swapi) |
| 135 | { |
| 136 | Py_UNICODE c = input[swapi]; |
| 137 | #if Py_UNICODE_SIZE == 2 |
| 138 | c = ((char *)&c)[0]<<8 | ((char *)&c)[1]; |
| 139 | #else |
| 140 | c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 | |
| 141 | ((char *)&c)[2]<<8 | ((char *)&c)[3]; |
| 142 | #endif |
| 143 | swappedinput[swapi] = c; |
| 144 | } |
| 145 | inp = inp_top = (char *)swappedinput; |
| 146 | } |
| 147 | |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 148 | while (inplen > 0) { |
Neal Norwitz | 57c115c | 2003-01-26 16:26:20 +0000 | [diff] [blame] | 149 | if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) == -1) { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 150 | char reason[128]; |
| 151 | int errpos; |
| 152 | |
| 153 | if (errno == E2BIG) { |
| 154 | RESIZE_OUTBUFFER(estep); |
| 155 | continue; |
| 156 | } |
| 157 | |
| 158 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 159 | inplen -= Py_UNICODE_SIZE; |
| 160 | inp += Py_UNICODE_SIZE; |
| 161 | if (errorcb == ERROR_REPLACE) { |
| 162 | if (outlen < 1) |
| 163 | RESIZE_OUTBUFFER(errno == EINVAL ? 1 : estep); |
| 164 | outlen--; |
| 165 | *out++ = REPLACEMENT_CHAR_ENCODE; |
| 166 | } |
| 167 | if (errno == EINVAL) break; |
| 168 | else continue; |
| 169 | } |
| 170 | |
| 171 | errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE; |
| 172 | sprintf(reason, "Undefined character map from " |
| 173 | #if Py_UNICODE_SIZE == 2 |
| 174 | "\\u%04x" |
| 175 | #elif Py_UNICODE_SIZE == 4 |
| 176 | "\\u%08x" |
| 177 | #endif |
| 178 | , *(Py_UNICODE *)inp); |
| 179 | |
| 180 | if (exceptionobj == NULL) { |
| 181 | if ((exceptionobj = PyUnicodeEncodeError_Create( |
| 182 | self->encoding, input, inputlen, |
| 183 | errpos, errpos + 1, reason)) == NULL) |
| 184 | goto errorexit; |
| 185 | } else { |
| 186 | if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0) |
| 187 | goto errorexit; |
| 188 | if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 189 | goto errorexit; |
| 190 | if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0) |
| 191 | goto errorexit; |
| 192 | } |
| 193 | |
| 194 | if (errorcb == ERROR_STRICT) { |
| 195 | PyCodec_StrictErrors(exceptionobj); |
| 196 | goto errorexit; |
| 197 | } else { |
| 198 | PyObject *argsobj, *retobj, *retuni; |
| 199 | long newpos; |
| 200 | |
| 201 | argsobj = PyTuple_New(1); |
| 202 | if (argsobj == NULL) |
| 203 | goto errorexit; |
| 204 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 205 | Py_INCREF(exceptionobj); |
| 206 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 207 | Py_DECREF(argsobj); |
| 208 | if (retobj == NULL) |
| 209 | goto errorexit; |
| 210 | |
| 211 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 212 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 213 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 214 | Py_DECREF(retobj); |
| 215 | PyErr_SetString(PyExc_ValueError, "encoding error handler " |
| 216 | "must return (unicode, int) tuple"); |
| 217 | goto errorexit; |
| 218 | } |
| 219 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 220 | #define errorexit errorexit_cbpad |
| 221 | PyObject *retstr = NULL; |
| 222 | int retstrsize; |
| 223 | |
| 224 | retstr = PyUnicode_AsEncodedString( |
| 225 | retuni, self->encoding, NULL); |
| 226 | if (retstr == NULL || !PyString_Check(retstr)) |
| 227 | goto errorexit; |
| 228 | |
| 229 | retstrsize = PyString_GET_SIZE(retstr); |
| 230 | if (outlen < retstrsize) |
| 231 | RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep |
| 232 | ? retstrsize - outlen : estep); |
| 233 | |
| 234 | memcpy(out, PyString_AS_STRING(retstr), retstrsize); |
| 235 | out += retstrsize; |
| 236 | outlen -= retstrsize; |
| 237 | #undef errorexit |
| 238 | if (0) { |
| 239 | errorexit_cbpad: Py_XDECREF(retobj); |
| 240 | Py_XDECREF(retstr); |
| 241 | goto errorexit; |
| 242 | } |
| 243 | Py_DECREF(retstr); |
| 244 | } |
| 245 | |
| 246 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 247 | Py_DECREF(retobj); |
| 248 | |
| 249 | if (newpos < 0) |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 250 | newpos = inputlen + newpos; |
| 251 | if (newpos < 0 || newpos > inputlen) { |
| 252 | PyErr_Format(PyExc_IndexError, "position %ld from error handler" |
| 253 | " out of bounds", newpos); |
| 254 | goto errorexit; |
| 255 | } |
| 256 | if (newpos == inputlen) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 257 | break; |
| 258 | inp = inp_top + Py_UNICODE_SIZE * newpos; |
| 259 | inplen = inplen_total - Py_UNICODE_SIZE * newpos; |
| 260 | } |
| 261 | } else |
| 262 | break; |
| 263 | } |
| 264 | #undef RESIZE_OUTBUFFER |
| 265 | |
| 266 | { |
| 267 | PyObject *rettup; |
| 268 | int finalsize; |
| 269 | |
| 270 | finalsize = (int)(out - out_top); |
| 271 | |
| 272 | if (finalsize != outlen_total) { |
| 273 | if (_PyString_Resize(&outputobj, finalsize) == -1) |
| 274 | goto errorexit; |
| 275 | } |
| 276 | |
| 277 | if (errorcb > ERROR_MAX) { |
| 278 | Py_DECREF(errorcb); |
| 279 | } |
| 280 | Py_XDECREF(exceptionobj); |
| 281 | |
| 282 | rettup = PyTuple_New(2); |
| 283 | if (rettup == NULL) { |
| 284 | Py_DECREF(outputobj); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 285 | if (byteswap) |
| 286 | PyMem_Free(swappedinput); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 287 | return NULL; |
| 288 | } |
| 289 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 290 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen)); |
| 291 | return rettup; |
| 292 | } |
| 293 | |
| 294 | errorexit: |
| 295 | Py_XDECREF(outputobj); |
| 296 | if (errorcb > ERROR_MAX) { |
| 297 | Py_DECREF(errorcb); |
| 298 | } |
| 299 | Py_XDECREF(exceptionobj); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 300 | if (byteswap) |
| 301 | PyMem_Free(swappedinput); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 302 | |
| 303 | return NULL; |
| 304 | } |
| 305 | |
| 306 | PyDoc_STRVAR(iconvcodec_decode__doc__, |
| 307 | "I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ |
| 308 | \n\ |
| 309 | Decodes `string' using I, an iconvcodec instance. errors may be given\n\ |
| 310 | to set a different error handling scheme. Default is 'strict' meaning\n\ |
| 311 | that encoding errors raise a UnicodeDecodeError. Other possible values\n\ |
| 312 | are 'ignore' and 'replace' as well as any other name registerd with\n\ |
| 313 | codecs.register_error that is able to handle UnicodeDecodeErrors."); |
| 314 | |
| 315 | static PyObject * |
| 316 | iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 317 | { |
| 318 | static char *kwlist[] = { "input", "errors", NULL }; |
| 319 | char *errors = NULL/*strict*/, *out, *out_top; |
| 320 | const char *inp, *inp_top; |
| 321 | int inplen_int; |
| 322 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 323 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 324 | *exceptionobj = NULL; |
| 325 | |
| 326 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode", |
| 327 | kwlist, &inp, &inplen_int, &errors)) |
| 328 | return NULL; /* TypeError */ |
| 329 | |
| 330 | errorcb = get_errorcallback(errors); |
| 331 | if (errorcb == NULL) |
| 332 | return NULL; /* LookupError or something else from error handler */ |
| 333 | |
| 334 | inp_top = inp; |
| 335 | inplen_total = inplen = (size_t)inplen_int; |
| 336 | |
| 337 | outputobj = PyUnicode_FromUnicode(NULL, inplen); |
| 338 | if (outputobj == NULL) |
| 339 | return NULL; |
| 340 | outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj); |
| 341 | out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj); |
| 342 | |
| 343 | estep = outlen / 2; |
| 344 | |
| 345 | #define RESIZE_OUTBUFFER(size) { \ |
| 346 | size_t toadd = (size); \ |
| 347 | outlen_total += toadd; \ |
| 348 | outlen += toadd; \ |
| 349 | if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1) \ |
| 350 | goto errorexit; \ |
| 351 | out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top); \ |
| 352 | out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \ |
| 353 | } |
| 354 | while (inplen > 0) { |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 355 | char *oldout = out; |
| 356 | char res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen); |
| 357 | |
| 358 | if (byteswap) { |
| 359 | while (oldout < out) |
| 360 | { |
| 361 | char c0 = oldout[0]; |
| 362 | #if Py_UNICODE_SIZE == 2 |
| 363 | oldout[0] = oldout[1]; |
| 364 | oldout[1] = c0; |
| 365 | #else |
| 366 | char c1 = oldout[1]; |
| 367 | oldout[0] = oldout[3]; |
| 368 | oldout[1] = oldout[2]; |
| 369 | oldout[2] = c1; |
| 370 | oldout[3] = c0; |
| 371 | #endif |
| 372 | oldout += sizeof(Py_UNICODE); |
| 373 | } |
| 374 | } |
| 375 | if (res == -1) { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 376 | char reason[128], *reasonpos = (char *)reason; |
| 377 | int errpos; |
| 378 | |
| 379 | if (errno == E2BIG) { |
| 380 | RESIZE_OUTBUFFER(estep); |
| 381 | continue; |
| 382 | } |
| 383 | |
| 384 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 385 | inplen--; inp++; |
| 386 | if (errorcb == ERROR_REPLACE) { |
| 387 | Py_UNICODE *replp; |
| 388 | |
| 389 | if (outlen < Py_UNICODE_SIZE) |
| 390 | RESIZE_OUTBUFFER( |
| 391 | errno == EINVAL || Py_UNICODE_SIZE > estep |
| 392 | ? Py_UNICODE_SIZE : estep); |
| 393 | |
| 394 | /* some compilers hate casted lvalue */ |
| 395 | replp = (Py_UNICODE *)out; |
| 396 | assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */ |
| 397 | *replp = REPLACEMENT_CHAR_DECODE; |
| 398 | |
| 399 | out += Py_UNICODE_SIZE; |
| 400 | outlen -= Py_UNICODE_SIZE; |
| 401 | } |
| 402 | if (errno == EINVAL) break; |
| 403 | else continue; |
| 404 | } |
| 405 | |
| 406 | errpos = (int)(inp - inp_top); |
| 407 | reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x", |
| 408 | (unsigned char)*inp); |
| 409 | if (inplen > 1) { |
| 410 | reasonpos += sprintf(reasonpos, |
| 411 | "\\x%02x", (unsigned char)*(inp+1)); |
| 412 | if (inplen > 2) |
| 413 | sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2)); |
| 414 | } |
| 415 | |
| 416 | if (exceptionobj == NULL) { |
| 417 | exceptionobj = PyUnicodeDecodeError_Create( |
| 418 | self->encoding, inp_top, inplen_total, |
| 419 | errpos, errpos + 1, reason); |
| 420 | if (exceptionobj == NULL) |
| 421 | goto errorexit; |
| 422 | } else { |
| 423 | if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0) |
| 424 | goto errorexit; |
| 425 | if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 426 | goto errorexit; |
| 427 | if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0) |
| 428 | goto errorexit; |
| 429 | } |
| 430 | |
| 431 | if (errorcb == ERROR_STRICT) { |
| 432 | PyCodec_StrictErrors(exceptionobj); |
| 433 | goto errorexit; |
| 434 | } else { |
| 435 | PyObject *argsobj, *retobj, *retuni; |
| 436 | long newpos; |
| 437 | |
| 438 | argsobj = PyTuple_New(1); |
| 439 | if (argsobj == NULL) |
| 440 | goto errorexit; |
| 441 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 442 | Py_INCREF(exceptionobj); |
| 443 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 444 | Py_DECREF(argsobj); |
| 445 | if (retobj == NULL) |
| 446 | goto errorexit; |
| 447 | |
| 448 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 449 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 450 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 451 | Py_DECREF(retobj); |
| 452 | PyErr_SetString(PyExc_ValueError, "decoding error handler " |
| 453 | "must return (unicode, int) tuple"); |
| 454 | goto errorexit; |
| 455 | } |
| 456 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 457 | #define errorexit errorexit_cbpad |
| 458 | size_t retunisize; |
| 459 | |
| 460 | retunisize = PyUnicode_GET_DATA_SIZE(retuni); |
| 461 | if (outlen < retunisize) |
| 462 | RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep |
| 463 | ? retunisize - outlen : estep); |
| 464 | |
| 465 | memcpy(out, PyUnicode_AS_DATA(retuni), retunisize); |
| 466 | out += retunisize; |
| 467 | outlen -= retunisize; |
| 468 | #undef errorexit |
| 469 | if (0) { |
| 470 | errorexit_cbpad: Py_DECREF(retobj); |
| 471 | goto errorexit; |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 476 | Py_DECREF(retobj); |
| 477 | |
| 478 | if (newpos < 0) |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 479 | newpos = inplen_total + newpos; |
| 480 | if (newpos < 0 || newpos > inplen_total) { |
| 481 | PyErr_Format(PyExc_IndexError, "position %ld from error handler" |
| 482 | " out of bounds", newpos); |
| 483 | goto errorexit; |
| 484 | } |
| 485 | if (newpos == inplen_total) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 486 | break; |
| 487 | inp = inp_top + newpos; |
| 488 | inplen = inplen_total - newpos; |
| 489 | } |
| 490 | } else |
| 491 | break; |
| 492 | } |
| 493 | #undef RESIZE_OUTBUFFER |
| 494 | |
| 495 | { |
| 496 | PyObject *rettup; |
| 497 | int finalsize; |
| 498 | |
| 499 | finalsize = (int)(out - out_top); |
| 500 | if (finalsize != outlen_total) { |
| 501 | if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE) == -1) |
| 502 | goto errorexit; |
| 503 | } |
| 504 | |
| 505 | if (errorcb > ERROR_MAX) { |
| 506 | Py_DECREF(errorcb); |
| 507 | } |
| 508 | Py_XDECREF(exceptionobj); |
| 509 | |
| 510 | rettup = PyTuple_New(2); |
| 511 | if (rettup == NULL) { |
| 512 | Py_DECREF(outputobj); |
| 513 | return NULL; |
| 514 | } |
| 515 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 516 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total)); |
| 517 | return rettup; |
| 518 | } |
| 519 | |
| 520 | errorexit: |
| 521 | Py_XDECREF(outputobj); |
| 522 | if (errorcb > ERROR_MAX) { |
| 523 | Py_DECREF(errorcb); |
| 524 | } |
| 525 | Py_XDECREF(exceptionobj); |
| 526 | |
| 527 | return NULL; |
| 528 | } |
| 529 | |
| 530 | static struct PyMethodDef iconvcodec_methods[] = { |
| 531 | {"encode", (PyCFunction)iconvcodec_encode, |
| 532 | METH_VARARGS | METH_KEYWORDS, |
| 533 | iconvcodec_encode__doc__}, |
| 534 | {"decode", (PyCFunction)iconvcodec_decode, |
| 535 | METH_VARARGS | METH_KEYWORDS, |
| 536 | iconvcodec_decode__doc__}, |
| 537 | {NULL, NULL}, |
| 538 | }; |
| 539 | |
| 540 | static PyObject * |
| 541 | iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) |
| 542 | { |
| 543 | PyObject *encobj = NULL; |
| 544 | iconvcodecObject *new = NULL; |
| 545 | |
| 546 | new = (iconvcodecObject *)type->tp_alloc(type, 0); |
| 547 | if (new == NULL) |
| 548 | return NULL; |
| 549 | |
| 550 | new->encoding = NULL; |
| 551 | new->enchdl = new->dechdl = (iconv_t)(-1); |
| 552 | |
| 553 | encobj = PyObject_GetAttrString((PyObject *)new, "encoding"); |
| 554 | if (encobj == NULL) { |
| 555 | PyErr_Clear(); |
| 556 | new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING)); |
| 557 | strcpy(new->encoding, DEFAULT_ENCODING); |
| 558 | } else if (!PyString_Check(encobj)) { |
| 559 | Py_DECREF(encobj); |
| 560 | PyErr_SetString(PyExc_TypeError, |
| 561 | "`encoding' attribute must be a string."); |
| 562 | goto errorexit; |
| 563 | } else { |
| 564 | new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1); |
| 565 | strcpy(new->encoding, PyString_AS_STRING(encobj)); |
| 566 | Py_DECREF(encobj); |
| 567 | } |
| 568 | |
| 569 | new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding); |
| 570 | if (new->dechdl == (iconv_t)(-1)) { |
| 571 | PyErr_SetString(PyExc_ValueError, "unsupported decoding"); |
| 572 | goto errorexit; |
| 573 | } |
| 574 | |
| 575 | new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING); |
| 576 | if (new->enchdl == (iconv_t)(-1)) { |
| 577 | PyErr_SetString(PyExc_ValueError, "unsupported encoding"); |
| 578 | iconv_close(new->dechdl); |
| 579 | new->dechdl = (iconv_t)(-1); |
| 580 | goto errorexit; |
| 581 | } |
| 582 | |
| 583 | return (PyObject *)new; |
| 584 | |
| 585 | errorexit: |
| 586 | Py_XDECREF(new); |
| 587 | |
| 588 | return NULL; |
| 589 | } |
| 590 | |
| 591 | static void |
| 592 | iconvcodec_dealloc(iconvcodecObject *self) |
| 593 | { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 594 | if (self->enchdl != (iconv_t)-1) |
| 595 | iconv_close(self->enchdl); |
| 596 | if (self->dechdl != (iconv_t)-1) |
| 597 | iconv_close(self->dechdl); |
| 598 | if (self->encoding != NULL) |
| 599 | PyMem_Free(self->encoding); |
| 600 | |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 601 | self->ob_type->tp_free((PyObject *)self); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 602 | } |
| 603 | |
| 604 | static PyObject * |
| 605 | iconvcodec_repr(PyObject *self) |
| 606 | { |
| 607 | return PyString_FromFormat("<iconvcodec encoding='%s'>", |
| 608 | ((iconvcodecObject *)self)->encoding); |
| 609 | } |
| 610 | |
| 611 | statichere PyTypeObject iconvcodec_Type = { |
| 612 | PyObject_HEAD_INIT(&PyType_Type) |
| 613 | 0, /* Number of items for varobject */ |
| 614 | "iconvcodec", /* Name of this type */ |
| 615 | sizeof(iconvcodecObject), /* Basic object size */ |
| 616 | 0, /* Item size for varobject */ |
| 617 | (destructor)iconvcodec_dealloc, /* tp_dealloc */ |
| 618 | 0, /* tp_print */ |
| 619 | 0, /* tp_getattr */ |
| 620 | 0, /* tp_setattr */ |
| 621 | 0, /* tp_compare */ |
| 622 | iconvcodec_repr, /* tp_repr */ |
| 623 | 0, /* tp_as_number */ |
| 624 | 0, /* tp_as_sequence */ |
| 625 | 0, /* tp_as_mapping */ |
| 626 | 0, /* tp_hash */ |
| 627 | 0, /* tp_call */ |
| 628 | 0, /* tp_str */ |
| 629 | PyObject_GenericGetAttr, /* tp_getattro */ |
| 630 | 0, /* tp_setattro */ |
| 631 | 0, /* tp_as_buffer */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 632 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 633 | iconvcodec_doc, /* tp_doc */ |
| 634 | 0, /* tp_traverse */ |
| 635 | 0, /* tp_clear */ |
| 636 | 0, /* tp_richcompare */ |
| 637 | 0, /* tp_weaklistoffset */ |
| 638 | 0, /* tp_iter */ |
| 639 | 0, /* tp_iterext */ |
| 640 | iconvcodec_methods, /* tp_methods */ |
| 641 | 0, /* tp_members */ |
| 642 | 0, /* tp_getset */ |
| 643 | 0, /* tp_base */ |
| 644 | 0, /* tp_dict */ |
| 645 | 0, /* tp_descr_get */ |
| 646 | 0, /* tp_descr_set */ |
| 647 | 0, /* tp_dictoffset */ |
| 648 | 0, /* tp_init */ |
| 649 | PyType_GenericAlloc, /* tp_alloc */ |
| 650 | iconvcodec_new, /* tp_new */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 651 | PyObject_Del, /* tp_free */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 652 | }; |
| 653 | |
| 654 | static struct PyMethodDef _iconv_codec_methods[] = { |
| 655 | {NULL, NULL}, |
| 656 | }; |
| 657 | |
| 658 | void |
| 659 | init_iconv_codec(void) |
| 660 | { |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 661 | PyObject *m; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 662 | |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 663 | char in = 1; |
| 664 | char *inptr = ∈ |
| 665 | int insize = 1; |
| 666 | Py_UNICODE out = 0; |
| 667 | char *outptr = (char *)&out; |
| 668 | int outsize = sizeof(out); |
| 669 | int res; |
| 670 | |
| 671 | iconv_t hdl = iconv_open(UNICODE_ENCODING, "ASCII"); |
| 672 | |
| 673 | if (hdl == (iconv_t)-1) |
| 674 | Py_FatalError("can't initialize the _iconv_codec module: iconv_open() failed"); |
| 675 | |
| 676 | res = iconv(hdl, &inptr, &insize, &outptr, &outsize); |
| 677 | if (res == -1) |
| 678 | Py_FatalError("can't initialize the _iconv_codec module: iconv() failed"); |
| 679 | |
| 680 | /* Check whether conv() returned native endianess or not for the choosen encoding */ |
| 681 | if (out == 0x1) |
| 682 | byteswap = 0; |
| 683 | #if Py_UNICODE_SIZE == 2 |
| 684 | else if (out == 0x0100) |
| 685 | #else |
| 686 | else if (out == 0x01000000) |
| 687 | #endif |
| 688 | byteswap = 1; |
| 689 | else |
| 690 | Py_FatalError("can't initialize the _iconv_codec module: mixed endianess"); |
| 691 | iconv_close(hdl); |
| 692 | |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 693 | m = Py_InitModule("_iconv_codec", _iconv_codec_methods); |
| 694 | |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 695 | PyModule_AddStringConstant(m, "__version__", (char*)__version__); |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 696 | Py_INCREF(&iconvcodec_Type); |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 697 | PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type)); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 698 | PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING); |
| 699 | |
| 700 | if (PyErr_Occurred()) |
| 701 | Py_FatalError("can't initialize the _iconv_codec module"); |
| 702 | } |
| 703 | |
| 704 | /* |
| 705 | * ex: ts=8 sts=4 et |
| 706 | * $Id$ |
| 707 | */ |