Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 1 | /* |
| 2 | * _iconv_codec.c |
| 3 | * |
| 4 | * libiconv adaptor for Python iconvcodec |
| 5 | * |
| 6 | * Author : Hye-Shik Chang <perky@FreeBSD.org> |
| 7 | * Created : 17 January 2003 |
| 8 | */ |
| 9 | |
| 10 | #include "Python.h" |
| 11 | #include <string.h> |
| 12 | #include <iconv.h> |
| 13 | |
| 14 | static const char *__version__ = "$Revision$"; |
| 15 | |
| 16 | #if Py_USING_UNICODE |
| 17 | # if Py_UNICODE_SIZE == 2 |
| 18 | # ifdef __GNU_LIBRARY__ |
| 19 | # define UNICODE_ENCODING "ucs-2" |
| 20 | # else |
| 21 | # define UNICODE_ENCODING "ucs-2-internal" |
| 22 | # endif |
| 23 | # define MBENCODED_LENGTH_MAX 4 |
| 24 | # elif Py_UNICODE_SIZE == 4 |
| 25 | # ifdef __GNU_LIBRARY__ |
| 26 | # define UNICODE_ENCODING "ucs-4" |
| 27 | # else |
| 28 | # define UNICODE_ENCODING "ucs-4-internal" |
| 29 | # endif |
| 30 | # define MBENCODED_LENGTH_MAX 6 |
| 31 | # endif |
| 32 | #else |
| 33 | # error "Unicode is not available" |
| 34 | #endif |
| 35 | |
| 36 | typedef struct { |
| 37 | PyObject_HEAD |
| 38 | iconv_t enchdl, dechdl; |
| 39 | char *encoding; |
| 40 | } iconvcodecObject; |
| 41 | PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object"); |
| 42 | |
Walter Dörwald | bda1c86 | 2003-02-04 18:02:28 +0000 | [diff] [blame] | 43 | /* does the chosen internal encoding require |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 44 | * byteswapping to get native endianness? |
| 45 | * 0=no, 1=yes, -1=unknown */ |
| 46 | static int byteswap = -1; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 47 | |
| 48 | #define ERROR_STRICT (PyObject *)(1) |
| 49 | #define ERROR_IGNORE (PyObject *)(2) |
| 50 | #define ERROR_REPLACE (PyObject *)(3) |
| 51 | #define ERROR_MAX ERROR_REPLACE |
| 52 | |
| 53 | #define REPLACEMENT_CHAR_DECODE 0xFFFD |
| 54 | #define REPLACEMENT_CHAR_ENCODE '?' |
| 55 | |
| 56 | #define DEFAULT_ENCODING "utf-8" |
| 57 | |
| 58 | |
| 59 | static PyObject * |
| 60 | get_errorcallback(const char *errors) |
| 61 | { |
| 62 | if (errors == NULL || strcmp(errors, "strict") == 0) |
| 63 | return ERROR_STRICT; |
| 64 | else if (strcmp(errors, "ignore") == 0) |
| 65 | return ERROR_IGNORE; |
| 66 | else if (strcmp(errors, "replace") == 0) |
| 67 | return ERROR_REPLACE; |
| 68 | else |
| 69 | return PyCodec_LookupError(errors); |
| 70 | } |
| 71 | |
| 72 | |
| 73 | PyDoc_STRVAR(iconvcodec_encode__doc__, |
| 74 | "I.encode(unicode, [,errors]) -> (string, length consumed)\n\ |
| 75 | \n\ |
| 76 | Return an encoded string version of `unicode'. errors may be given to\n\ |
| 77 | set a different error handling scheme. Default is 'strict' meaning that\n\ |
| 78 | encoding errors raise a UnicodeEncodeError. Other possible values are\n\ |
| 79 | 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\ |
| 80 | registered with codecs.register_error that can handle UnicodeEncodeErrors."); |
| 81 | |
| 82 | static PyObject * |
| 83 | iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 84 | { |
| 85 | static char *kwlist[] = { "input", "errors", NULL }; |
| 86 | Py_UNICODE *input; |
| 87 | int inputlen; |
| 88 | char *errors = NULL/*strict*/, *out, *out_top; |
| 89 | const char *inp, *inp_top; |
| 90 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 91 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 92 | *exceptionobj = NULL; |
Walter Dörwald | 757246c | 2003-01-31 16:26:50 +0000 | [diff] [blame] | 93 | Py_UNICODE *swappedinput = NULL; |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 94 | int swapi; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 95 | |
| 96 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode", |
| 97 | kwlist, &input, &inputlen, &errors)) |
| 98 | return NULL; /* TypeError */ |
| 99 | |
| 100 | errorcb = get_errorcallback(errors); |
| 101 | if (errorcb == NULL) |
| 102 | return NULL; /* LookupError or something else from error handler */ |
| 103 | |
| 104 | inp = inp_top = (char *)input; |
| 105 | inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE); |
| 106 | |
| 107 | outlen = inputlen * MBENCODED_LENGTH_MAX; |
| 108 | if (outlen < 16) |
| 109 | outlen = 16; /* for iso-2022 codecs */ |
| 110 | |
| 111 | outputobj = PyString_FromStringAndSize(NULL, outlen); |
| 112 | if (outputobj == NULL) |
| 113 | return NULL; |
| 114 | out = out_top = PyString_AS_STRING(outputobj); |
| 115 | outlen_total = outlen; |
| 116 | |
| 117 | estep = inputlen * Py_UNICODE_SIZE / 2; |
| 118 | |
| 119 | #define RESIZE_OUTBUFFER(size) { \ |
| 120 | size_t toadd = (size); \ |
| 121 | outlen_total += toadd; \ |
| 122 | outlen += toadd; \ |
| 123 | if (_PyString_Resize(&outputobj, outlen_total) == -1) \ |
| 124 | goto errorexit; \ |
| 125 | out = PyString_AS_STRING(outputobj) + (out - out_top); \ |
| 126 | out_top = PyString_AS_STRING(outputobj); \ |
| 127 | } |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 128 | if (byteswap) { |
| 129 | swappedinput = PyMem_Malloc(inplen); |
| 130 | if (swappedinput == NULL) |
| 131 | return NULL; |
| 132 | for (swapi = 0; swapi<inputlen; ++swapi) |
| 133 | { |
| 134 | Py_UNICODE c = input[swapi]; |
| 135 | #if Py_UNICODE_SIZE == 2 |
| 136 | c = ((char *)&c)[0]<<8 | ((char *)&c)[1]; |
| 137 | #else |
| 138 | c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 | |
| 139 | ((char *)&c)[2]<<8 | ((char *)&c)[3]; |
| 140 | #endif |
| 141 | swappedinput[swapi] = c; |
| 142 | } |
| 143 | inp = inp_top = (char *)swappedinput; |
| 144 | } |
| 145 | |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 146 | while (inplen > 0) { |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 147 | if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen) |
| 148 | == (size_t)-1) |
| 149 | { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 150 | char reason[128]; |
| 151 | int errpos; |
| 152 | |
| 153 | if (errno == E2BIG) { |
| 154 | RESIZE_OUTBUFFER(estep); |
| 155 | continue; |
| 156 | } |
| 157 | |
| 158 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 159 | inplen -= Py_UNICODE_SIZE; |
| 160 | inp += Py_UNICODE_SIZE; |
| 161 | if (errorcb == ERROR_REPLACE) { |
| 162 | if (outlen < 1) |
| 163 | RESIZE_OUTBUFFER(errno == EINVAL ? 1 : estep); |
| 164 | outlen--; |
| 165 | *out++ = REPLACEMENT_CHAR_ENCODE; |
| 166 | } |
| 167 | if (errno == EINVAL) break; |
| 168 | else continue; |
| 169 | } |
| 170 | |
| 171 | errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE; |
| 172 | sprintf(reason, "Undefined character map from " |
| 173 | #if Py_UNICODE_SIZE == 2 |
| 174 | "\\u%04x" |
| 175 | #elif Py_UNICODE_SIZE == 4 |
| 176 | "\\u%08x" |
| 177 | #endif |
| 178 | , *(Py_UNICODE *)inp); |
| 179 | |
| 180 | if (exceptionobj == NULL) { |
| 181 | if ((exceptionobj = PyUnicodeEncodeError_Create( |
| 182 | self->encoding, input, inputlen, |
| 183 | errpos, errpos + 1, reason)) == NULL) |
| 184 | goto errorexit; |
| 185 | } else { |
| 186 | if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0) |
| 187 | goto errorexit; |
| 188 | if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 189 | goto errorexit; |
| 190 | if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0) |
| 191 | goto errorexit; |
| 192 | } |
| 193 | |
| 194 | if (errorcb == ERROR_STRICT) { |
| 195 | PyCodec_StrictErrors(exceptionobj); |
| 196 | goto errorexit; |
| 197 | } else { |
| 198 | PyObject *argsobj, *retobj, *retuni; |
| 199 | long newpos; |
| 200 | |
| 201 | argsobj = PyTuple_New(1); |
| 202 | if (argsobj == NULL) |
| 203 | goto errorexit; |
| 204 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 205 | Py_INCREF(exceptionobj); |
| 206 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 207 | Py_DECREF(argsobj); |
| 208 | if (retobj == NULL) |
| 209 | goto errorexit; |
| 210 | |
| 211 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 212 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 213 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 214 | Py_DECREF(retobj); |
| 215 | PyErr_SetString(PyExc_ValueError, "encoding error handler " |
| 216 | "must return (unicode, int) tuple"); |
| 217 | goto errorexit; |
| 218 | } |
| 219 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 220 | #define errorexit errorexit_cbpad |
| 221 | PyObject *retstr = NULL; |
| 222 | int retstrsize; |
| 223 | |
| 224 | retstr = PyUnicode_AsEncodedString( |
| 225 | retuni, self->encoding, NULL); |
| 226 | if (retstr == NULL || !PyString_Check(retstr)) |
| 227 | goto errorexit; |
| 228 | |
| 229 | retstrsize = PyString_GET_SIZE(retstr); |
| 230 | if (outlen < retstrsize) |
| 231 | RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep |
| 232 | ? retstrsize - outlen : estep); |
| 233 | |
| 234 | memcpy(out, PyString_AS_STRING(retstr), retstrsize); |
| 235 | out += retstrsize; |
| 236 | outlen -= retstrsize; |
| 237 | #undef errorexit |
| 238 | if (0) { |
| 239 | errorexit_cbpad: Py_XDECREF(retobj); |
| 240 | Py_XDECREF(retstr); |
| 241 | goto errorexit; |
| 242 | } |
| 243 | Py_DECREF(retstr); |
| 244 | } |
| 245 | |
| 246 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 247 | Py_DECREF(retobj); |
| 248 | |
| 249 | if (newpos < 0) |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 250 | newpos = inputlen + newpos; |
| 251 | if (newpos < 0 || newpos > inputlen) { |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 252 | PyErr_Format(PyExc_IndexError, |
| 253 | "position %ld from error handler out of bounds", |
| 254 | newpos); |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 255 | goto errorexit; |
| 256 | } |
| 257 | if (newpos == inputlen) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 258 | break; |
| 259 | inp = inp_top + Py_UNICODE_SIZE * newpos; |
| 260 | inplen = inplen_total - Py_UNICODE_SIZE * newpos; |
| 261 | } |
| 262 | } else |
| 263 | break; |
| 264 | } |
| 265 | #undef RESIZE_OUTBUFFER |
| 266 | |
| 267 | { |
| 268 | PyObject *rettup; |
| 269 | int finalsize; |
| 270 | |
| 271 | finalsize = (int)(out - out_top); |
| 272 | |
| 273 | if (finalsize != outlen_total) { |
| 274 | if (_PyString_Resize(&outputobj, finalsize) == -1) |
| 275 | goto errorexit; |
| 276 | } |
| 277 | |
| 278 | if (errorcb > ERROR_MAX) { |
| 279 | Py_DECREF(errorcb); |
| 280 | } |
| 281 | Py_XDECREF(exceptionobj); |
| 282 | |
| 283 | rettup = PyTuple_New(2); |
| 284 | if (rettup == NULL) { |
| 285 | Py_DECREF(outputobj); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 286 | if (byteswap) |
| 287 | PyMem_Free(swappedinput); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 288 | return NULL; |
| 289 | } |
| 290 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 291 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen)); |
| 292 | return rettup; |
| 293 | } |
| 294 | |
| 295 | errorexit: |
| 296 | Py_XDECREF(outputobj); |
| 297 | if (errorcb > ERROR_MAX) { |
| 298 | Py_DECREF(errorcb); |
| 299 | } |
| 300 | Py_XDECREF(exceptionobj); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 301 | if (byteswap) |
| 302 | PyMem_Free(swappedinput); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 303 | |
| 304 | return NULL; |
| 305 | } |
| 306 | |
| 307 | PyDoc_STRVAR(iconvcodec_decode__doc__, |
| 308 | "I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\ |
| 309 | \n\ |
| 310 | Decodes `string' using I, an iconvcodec instance. errors may be given\n\ |
| 311 | to set a different error handling scheme. Default is 'strict' meaning\n\ |
| 312 | that encoding errors raise a UnicodeDecodeError. Other possible values\n\ |
| 313 | are 'ignore' and 'replace' as well as any other name registerd with\n\ |
| 314 | codecs.register_error that is able to handle UnicodeDecodeErrors."); |
| 315 | |
| 316 | static PyObject * |
| 317 | iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs) |
| 318 | { |
| 319 | static char *kwlist[] = { "input", "errors", NULL }; |
| 320 | char *errors = NULL/*strict*/, *out, *out_top; |
| 321 | const char *inp, *inp_top; |
| 322 | int inplen_int; |
| 323 | size_t inplen, inplen_total, outlen, outlen_total, estep; |
| 324 | PyObject *outputobj = NULL, *errorcb = NULL, |
| 325 | *exceptionobj = NULL; |
| 326 | |
| 327 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode", |
| 328 | kwlist, &inp, &inplen_int, &errors)) |
| 329 | return NULL; /* TypeError */ |
| 330 | |
| 331 | errorcb = get_errorcallback(errors); |
| 332 | if (errorcb == NULL) |
| 333 | return NULL; /* LookupError or something else from error handler */ |
| 334 | |
| 335 | inp_top = inp; |
| 336 | inplen_total = inplen = (size_t)inplen_int; |
| 337 | |
| 338 | outputobj = PyUnicode_FromUnicode(NULL, inplen); |
| 339 | if (outputobj == NULL) |
| 340 | return NULL; |
| 341 | outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj); |
| 342 | out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj); |
| 343 | |
| 344 | estep = outlen / 2; |
| 345 | |
| 346 | #define RESIZE_OUTBUFFER(size) { \ |
| 347 | size_t toadd = (size); \ |
| 348 | outlen_total += toadd; \ |
| 349 | outlen += toadd; \ |
| 350 | if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1) \ |
| 351 | goto errorexit; \ |
| 352 | out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top); \ |
| 353 | out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \ |
| 354 | } |
| 355 | while (inplen > 0) { |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 356 | char *oldout = out; |
Walter Dörwald | bda1c86 | 2003-02-04 18:02:28 +0000 | [diff] [blame] | 357 | size_t res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 358 | |
| 359 | if (byteswap) { |
| 360 | while (oldout < out) |
| 361 | { |
| 362 | char c0 = oldout[0]; |
| 363 | #if Py_UNICODE_SIZE == 2 |
| 364 | oldout[0] = oldout[1]; |
| 365 | oldout[1] = c0; |
| 366 | #else |
| 367 | char c1 = oldout[1]; |
| 368 | oldout[0] = oldout[3]; |
| 369 | oldout[1] = oldout[2]; |
| 370 | oldout[2] = c1; |
| 371 | oldout[3] = c0; |
| 372 | #endif |
| 373 | oldout += sizeof(Py_UNICODE); |
| 374 | } |
| 375 | } |
Walter Dörwald | bda1c86 | 2003-02-04 18:02:28 +0000 | [diff] [blame] | 376 | if (res == (size_t)-1) { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 377 | char reason[128], *reasonpos = (char *)reason; |
| 378 | int errpos; |
| 379 | |
| 380 | if (errno == E2BIG) { |
| 381 | RESIZE_OUTBUFFER(estep); |
| 382 | continue; |
| 383 | } |
| 384 | |
| 385 | if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) { |
| 386 | inplen--; inp++; |
| 387 | if (errorcb == ERROR_REPLACE) { |
| 388 | Py_UNICODE *replp; |
| 389 | |
| 390 | if (outlen < Py_UNICODE_SIZE) |
| 391 | RESIZE_OUTBUFFER( |
| 392 | errno == EINVAL || Py_UNICODE_SIZE > estep |
| 393 | ? Py_UNICODE_SIZE : estep); |
| 394 | |
| 395 | /* some compilers hate casted lvalue */ |
| 396 | replp = (Py_UNICODE *)out; |
| 397 | assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */ |
| 398 | *replp = REPLACEMENT_CHAR_DECODE; |
| 399 | |
| 400 | out += Py_UNICODE_SIZE; |
| 401 | outlen -= Py_UNICODE_SIZE; |
| 402 | } |
| 403 | if (errno == EINVAL) break; |
| 404 | else continue; |
| 405 | } |
| 406 | |
| 407 | errpos = (int)(inp - inp_top); |
| 408 | reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x", |
| 409 | (unsigned char)*inp); |
| 410 | if (inplen > 1) { |
| 411 | reasonpos += sprintf(reasonpos, |
| 412 | "\\x%02x", (unsigned char)*(inp+1)); |
| 413 | if (inplen > 2) |
| 414 | sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2)); |
| 415 | } |
| 416 | |
| 417 | if (exceptionobj == NULL) { |
| 418 | exceptionobj = PyUnicodeDecodeError_Create( |
| 419 | self->encoding, inp_top, inplen_total, |
| 420 | errpos, errpos + 1, reason); |
| 421 | if (exceptionobj == NULL) |
| 422 | goto errorexit; |
| 423 | } else { |
| 424 | if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0) |
| 425 | goto errorexit; |
| 426 | if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0) |
| 427 | goto errorexit; |
| 428 | if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0) |
| 429 | goto errorexit; |
| 430 | } |
| 431 | |
| 432 | if (errorcb == ERROR_STRICT) { |
| 433 | PyCodec_StrictErrors(exceptionobj); |
| 434 | goto errorexit; |
| 435 | } else { |
| 436 | PyObject *argsobj, *retobj, *retuni; |
| 437 | long newpos; |
| 438 | |
| 439 | argsobj = PyTuple_New(1); |
| 440 | if (argsobj == NULL) |
| 441 | goto errorexit; |
| 442 | PyTuple_SET_ITEM(argsobj, 0, exceptionobj); |
| 443 | Py_INCREF(exceptionobj); |
| 444 | retobj = PyObject_CallObject(errorcb, argsobj); |
| 445 | Py_DECREF(argsobj); |
| 446 | if (retobj == NULL) |
| 447 | goto errorexit; |
| 448 | |
| 449 | if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 || |
| 450 | !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) || |
| 451 | !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) { |
| 452 | Py_DECREF(retobj); |
| 453 | PyErr_SetString(PyExc_ValueError, "decoding error handler " |
| 454 | "must return (unicode, int) tuple"); |
| 455 | goto errorexit; |
| 456 | } |
| 457 | if (PyUnicode_GET_SIZE(retuni) > 0) { |
| 458 | #define errorexit errorexit_cbpad |
| 459 | size_t retunisize; |
| 460 | |
| 461 | retunisize = PyUnicode_GET_DATA_SIZE(retuni); |
| 462 | if (outlen < retunisize) |
| 463 | RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep |
| 464 | ? retunisize - outlen : estep); |
| 465 | |
| 466 | memcpy(out, PyUnicode_AS_DATA(retuni), retunisize); |
| 467 | out += retunisize; |
| 468 | outlen -= retunisize; |
| 469 | #undef errorexit |
| 470 | if (0) { |
| 471 | errorexit_cbpad: Py_DECREF(retobj); |
| 472 | goto errorexit; |
| 473 | } |
| 474 | } |
| 475 | |
| 476 | newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1)); |
| 477 | Py_DECREF(retobj); |
| 478 | |
| 479 | if (newpos < 0) |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 480 | newpos = inplen_total + newpos; |
| 481 | if (newpos < 0 || newpos > inplen_total) { |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 482 | PyErr_Format(PyExc_IndexError, |
| 483 | "position %ld from error handler out of bounds", |
| 484 | newpos); |
Walter Dörwald | 2e0b18a | 2003-01-31 17:19:08 +0000 | [diff] [blame] | 485 | goto errorexit; |
| 486 | } |
| 487 | if (newpos == inplen_total) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 488 | break; |
| 489 | inp = inp_top + newpos; |
| 490 | inplen = inplen_total - newpos; |
| 491 | } |
| 492 | } else |
| 493 | break; |
| 494 | } |
| 495 | #undef RESIZE_OUTBUFFER |
| 496 | |
| 497 | { |
| 498 | PyObject *rettup; |
| 499 | int finalsize; |
| 500 | |
| 501 | finalsize = (int)(out - out_top); |
| 502 | if (finalsize != outlen_total) { |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 503 | if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE) |
| 504 | == -1) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 505 | goto errorexit; |
| 506 | } |
| 507 | |
| 508 | if (errorcb > ERROR_MAX) { |
| 509 | Py_DECREF(errorcb); |
| 510 | } |
| 511 | Py_XDECREF(exceptionobj); |
| 512 | |
| 513 | rettup = PyTuple_New(2); |
| 514 | if (rettup == NULL) { |
| 515 | Py_DECREF(outputobj); |
| 516 | return NULL; |
| 517 | } |
| 518 | PyTuple_SET_ITEM(rettup, 0, outputobj); |
| 519 | PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total)); |
| 520 | return rettup; |
| 521 | } |
| 522 | |
| 523 | errorexit: |
| 524 | Py_XDECREF(outputobj); |
| 525 | if (errorcb > ERROR_MAX) { |
| 526 | Py_DECREF(errorcb); |
| 527 | } |
| 528 | Py_XDECREF(exceptionobj); |
| 529 | |
| 530 | return NULL; |
| 531 | } |
| 532 | |
| 533 | static struct PyMethodDef iconvcodec_methods[] = { |
| 534 | {"encode", (PyCFunction)iconvcodec_encode, |
| 535 | METH_VARARGS | METH_KEYWORDS, |
| 536 | iconvcodec_encode__doc__}, |
| 537 | {"decode", (PyCFunction)iconvcodec_decode, |
| 538 | METH_VARARGS | METH_KEYWORDS, |
| 539 | iconvcodec_decode__doc__}, |
| 540 | {NULL, NULL}, |
| 541 | }; |
| 542 | |
| 543 | static PyObject * |
| 544 | iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) |
| 545 | { |
| 546 | PyObject *encobj = NULL; |
| 547 | iconvcodecObject *new = NULL; |
| 548 | |
| 549 | new = (iconvcodecObject *)type->tp_alloc(type, 0); |
| 550 | if (new == NULL) |
| 551 | return NULL; |
| 552 | |
| 553 | new->encoding = NULL; |
| 554 | new->enchdl = new->dechdl = (iconv_t)(-1); |
| 555 | |
| 556 | encobj = PyObject_GetAttrString((PyObject *)new, "encoding"); |
| 557 | if (encobj == NULL) { |
| 558 | PyErr_Clear(); |
| 559 | new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING)); |
| 560 | strcpy(new->encoding, DEFAULT_ENCODING); |
| 561 | } else if (!PyString_Check(encobj)) { |
| 562 | Py_DECREF(encobj); |
| 563 | PyErr_SetString(PyExc_TypeError, |
| 564 | "`encoding' attribute must be a string."); |
| 565 | goto errorexit; |
| 566 | } else { |
| 567 | new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1); |
| 568 | strcpy(new->encoding, PyString_AS_STRING(encobj)); |
| 569 | Py_DECREF(encobj); |
| 570 | } |
| 571 | |
| 572 | new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding); |
| 573 | if (new->dechdl == (iconv_t)(-1)) { |
| 574 | PyErr_SetString(PyExc_ValueError, "unsupported decoding"); |
| 575 | goto errorexit; |
| 576 | } |
| 577 | |
| 578 | new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING); |
| 579 | if (new->enchdl == (iconv_t)(-1)) { |
| 580 | PyErr_SetString(PyExc_ValueError, "unsupported encoding"); |
| 581 | iconv_close(new->dechdl); |
| 582 | new->dechdl = (iconv_t)(-1); |
| 583 | goto errorexit; |
| 584 | } |
| 585 | |
| 586 | return (PyObject *)new; |
| 587 | |
| 588 | errorexit: |
| 589 | Py_XDECREF(new); |
| 590 | |
| 591 | return NULL; |
| 592 | } |
| 593 | |
| 594 | static void |
| 595 | iconvcodec_dealloc(iconvcodecObject *self) |
| 596 | { |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 597 | if (self->enchdl != (iconv_t)-1) |
| 598 | iconv_close(self->enchdl); |
| 599 | if (self->dechdl != (iconv_t)-1) |
| 600 | iconv_close(self->dechdl); |
| 601 | if (self->encoding != NULL) |
| 602 | PyMem_Free(self->encoding); |
| 603 | |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 604 | self->ob_type->tp_free((PyObject *)self); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 605 | } |
| 606 | |
| 607 | static PyObject * |
| 608 | iconvcodec_repr(PyObject *self) |
| 609 | { |
| 610 | return PyString_FromFormat("<iconvcodec encoding='%s'>", |
| 611 | ((iconvcodecObject *)self)->encoding); |
| 612 | } |
| 613 | |
Neal Norwitz | 7fe16e7 | 2003-02-04 20:46:50 +0000 | [diff] [blame] | 614 | static PyTypeObject iconvcodec_Type = { |
Jason Tishler | 0c10015 | 2003-02-10 20:48:35 +0000 | [diff] [blame] | 615 | PyObject_HEAD_INIT(NULL) |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 616 | 0, /* Number of items for varobject */ |
| 617 | "iconvcodec", /* Name of this type */ |
| 618 | sizeof(iconvcodecObject), /* Basic object size */ |
| 619 | 0, /* Item size for varobject */ |
| 620 | (destructor)iconvcodec_dealloc, /* tp_dealloc */ |
| 621 | 0, /* tp_print */ |
| 622 | 0, /* tp_getattr */ |
| 623 | 0, /* tp_setattr */ |
| 624 | 0, /* tp_compare */ |
| 625 | iconvcodec_repr, /* tp_repr */ |
| 626 | 0, /* tp_as_number */ |
| 627 | 0, /* tp_as_sequence */ |
| 628 | 0, /* tp_as_mapping */ |
| 629 | 0, /* tp_hash */ |
| 630 | 0, /* tp_call */ |
| 631 | 0, /* tp_str */ |
| 632 | PyObject_GenericGetAttr, /* tp_getattro */ |
| 633 | 0, /* tp_setattro */ |
| 634 | 0, /* tp_as_buffer */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 635 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 636 | iconvcodec_doc, /* tp_doc */ |
| 637 | 0, /* tp_traverse */ |
| 638 | 0, /* tp_clear */ |
| 639 | 0, /* tp_richcompare */ |
| 640 | 0, /* tp_weaklistoffset */ |
| 641 | 0, /* tp_iter */ |
| 642 | 0, /* tp_iterext */ |
| 643 | iconvcodec_methods, /* tp_methods */ |
| 644 | 0, /* tp_members */ |
| 645 | 0, /* tp_getset */ |
| 646 | 0, /* tp_base */ |
| 647 | 0, /* tp_dict */ |
| 648 | 0, /* tp_descr_get */ |
| 649 | 0, /* tp_descr_set */ |
| 650 | 0, /* tp_dictoffset */ |
| 651 | 0, /* tp_init */ |
| 652 | PyType_GenericAlloc, /* tp_alloc */ |
| 653 | iconvcodec_new, /* tp_new */ |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 654 | PyObject_Del, /* tp_free */ |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 655 | }; |
| 656 | |
| 657 | static struct PyMethodDef _iconv_codec_methods[] = { |
| 658 | {NULL, NULL}, |
| 659 | }; |
| 660 | |
| 661 | void |
| 662 | init_iconv_codec(void) |
| 663 | { |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 664 | PyObject *m; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 665 | |
Walter Dörwald | e9b851a | 2003-02-21 18:18:49 +0000 | [diff] [blame] | 666 | char in = '0'; |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 667 | char *inptr = ∈ |
Walter Dörwald | bda1c86 | 2003-02-04 18:02:28 +0000 | [diff] [blame] | 668 | size_t insize = 1; |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 669 | Py_UNICODE out = 0; |
| 670 | char *outptr = (char *)&out; |
Walter Dörwald | bda1c86 | 2003-02-04 18:02:28 +0000 | [diff] [blame] | 671 | size_t outsize = sizeof(out); |
| 672 | size_t res; |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 673 | |
Walter Dörwald | dd8766a | 2003-02-24 20:17:32 +0000 | [diff] [blame] | 674 | iconv_t hdl = iconv_open(UNICODE_ENCODING, "ISO-8859-1"); |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 675 | |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 676 | if (hdl == (iconv_t)-1) { |
Neal Norwitz | 3f5fcc8 | 2003-02-28 17:21:39 +0000 | [diff] [blame] | 677 | PyErr_SetString(PyExc_RuntimeError, |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 678 | "can't initialize the _iconv_codec module: iconv_open() failed"); |
| 679 | return; |
| 680 | } |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 681 | |
| 682 | res = iconv(hdl, &inptr, &insize, &outptr, &outsize); |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 683 | if (res == (size_t)-1) { |
Neal Norwitz | 3f5fcc8 | 2003-02-28 17:21:39 +0000 | [diff] [blame] | 684 | PyErr_SetString(PyExc_RuntimeError, |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 685 | "can't initialize the _iconv_codec module: iconv() failed"); |
| 686 | return; |
| 687 | } |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 688 | |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 689 | /* Check whether conv() returned native endianess or not for the chosen |
| 690 | encoding */ |
Walter Dörwald | e9b851a | 2003-02-21 18:18:49 +0000 | [diff] [blame] | 691 | if (out == 0x30) |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 692 | byteswap = 0; |
| 693 | #if Py_UNICODE_SIZE == 2 |
Walter Dörwald | e9b851a | 2003-02-21 18:18:49 +0000 | [diff] [blame] | 694 | else if (out == 0x3000) |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 695 | #else |
Walter Dörwald | e9b851a | 2003-02-21 18:18:49 +0000 | [diff] [blame] | 696 | else if (out == 0x30000000) |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 697 | #endif |
| 698 | byteswap = 1; |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 699 | else { |
| 700 | iconv_close(hdl); |
Neal Norwitz | 3f5fcc8 | 2003-02-28 17:21:39 +0000 | [diff] [blame] | 701 | PyErr_SetString(PyExc_RuntimeError, |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 702 | "can't initialize the _iconv_codec module: mixed endianess"); |
| 703 | return; |
| 704 | } |
Walter Dörwald | b4ff111 | 2003-01-30 19:55:28 +0000 | [diff] [blame] | 705 | iconv_close(hdl); |
| 706 | |
Jason Tishler | 0c10015 | 2003-02-10 20:48:35 +0000 | [diff] [blame] | 707 | iconvcodec_Type.ob_type = &PyType_Type; |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 708 | m = Py_InitModule("_iconv_codec", _iconv_codec_methods); |
| 709 | |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 710 | PyModule_AddStringConstant(m, "__version__", (char*)__version__); |
Martin v. Löwis | 7a565f0 | 2003-01-27 11:39:04 +0000 | [diff] [blame] | 711 | Py_INCREF(&iconvcodec_Type); |
Martin v. Löwis | 727fe66 | 2003-01-26 11:48:20 +0000 | [diff] [blame] | 712 | PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type)); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 713 | PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING); |
| 714 | |
| 715 | if (PyErr_Occurred()) |
Guido van Rossum | 55dc26c | 2003-02-18 16:11:11 +0000 | [diff] [blame] | 716 | PyErr_SetString(PyExc_RuntimeError, |
| 717 | "can't initialize the _iconv_codec module"); |
Martin v. Löwis | 9789aef | 2003-01-26 11:30:36 +0000 | [diff] [blame] | 718 | } |
| 719 | |
| 720 | /* |
| 721 | * ex: ts=8 sts=4 et |
| 722 | * $Id$ |
| 723 | */ |