blob: 3570f461e5968169eecad6e08dbbe45b501faa87 [file] [log] [blame]
Martin v. Löwis9789aef2003-01-26 11:30:36 +00001/*
2 * _iconv_codec.c
3 *
4 * libiconv adaptor for Python iconvcodec
5 *
6 * Author : Hye-Shik Chang <perky@FreeBSD.org>
7 * Created : 17 January 2003
8 */
9
10#include "Python.h"
11#include <string.h>
12#include <iconv.h>
13
14static const char *__version__ = "$Revision$";
15
16#if Py_USING_UNICODE
17# if Py_UNICODE_SIZE == 2
18# ifdef __GNU_LIBRARY__
19# define UNICODE_ENCODING "ucs-2"
20# else
21# define UNICODE_ENCODING "ucs-2-internal"
22# endif
23# define MBENCODED_LENGTH_MAX 4
24# elif Py_UNICODE_SIZE == 4
25# ifdef __GNU_LIBRARY__
26# define UNICODE_ENCODING "ucs-4"
27# else
28# define UNICODE_ENCODING "ucs-4-internal"
29# endif
30# define MBENCODED_LENGTH_MAX 6
31# endif
32#else
33# error "Unicode is not available"
34#endif
35
36typedef struct {
37 PyObject_HEAD
38 iconv_t enchdl, dechdl;
39 char *encoding;
40} iconvcodecObject;
41PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object");
42
Walter Dörwaldbda1c862003-02-04 18:02:28 +000043/* does the chosen internal encoding require
Walter Dörwaldb4ff1112003-01-30 19:55:28 +000044 * byteswapping to get native endianness?
45 * 0=no, 1=yes, -1=unknown */
46static int byteswap = -1;
Martin v. Löwis9789aef2003-01-26 11:30:36 +000047
48#define ERROR_STRICT (PyObject *)(1)
49#define ERROR_IGNORE (PyObject *)(2)
50#define ERROR_REPLACE (PyObject *)(3)
51#define ERROR_MAX ERROR_REPLACE
52
53#define REPLACEMENT_CHAR_DECODE 0xFFFD
54#define REPLACEMENT_CHAR_ENCODE '?'
55
56#define DEFAULT_ENCODING "utf-8"
57
58
59static PyObject *
60get_errorcallback(const char *errors)
61{
62 if (errors == NULL || strcmp(errors, "strict") == 0)
63 return ERROR_STRICT;
64 else if (strcmp(errors, "ignore") == 0)
65 return ERROR_IGNORE;
66 else if (strcmp(errors, "replace") == 0)
67 return ERROR_REPLACE;
68 else
69 return PyCodec_LookupError(errors);
70}
71
72
73PyDoc_STRVAR(iconvcodec_encode__doc__,
74"I.encode(unicode, [,errors]) -> (string, length consumed)\n\
75\n\
76Return an encoded string version of `unicode'. errors may be given to\n\
77set a different error handling scheme. Default is 'strict' meaning that\n\
78encoding errors raise a UnicodeEncodeError. Other possible values are\n\
79'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
80registered with codecs.register_error that can handle UnicodeEncodeErrors.");
81
82static PyObject *
83iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
84{
85 static char *kwlist[] = { "input", "errors", NULL };
86 Py_UNICODE *input;
87 int inputlen;
88 char *errors = NULL/*strict*/, *out, *out_top;
89 const char *inp, *inp_top;
90 size_t inplen, inplen_total, outlen, outlen_total, estep;
91 PyObject *outputobj = NULL, *errorcb = NULL,
92 *exceptionobj = NULL;
Walter Dörwald757246c2003-01-31 16:26:50 +000093 Py_UNICODE *swappedinput = NULL;
Walter Dörwaldb4ff1112003-01-30 19:55:28 +000094 int swapi;
Martin v. Löwis9789aef2003-01-26 11:30:36 +000095
96 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode",
97 kwlist, &input, &inputlen, &errors))
98 return NULL; /* TypeError */
99
100 errorcb = get_errorcallback(errors);
101 if (errorcb == NULL)
102 return NULL; /* LookupError or something else from error handler */
103
104 inp = inp_top = (char *)input;
105 inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE);
106
107 outlen = inputlen * MBENCODED_LENGTH_MAX;
108 if (outlen < 16)
109 outlen = 16; /* for iso-2022 codecs */
110
111 outputobj = PyString_FromStringAndSize(NULL, outlen);
112 if (outputobj == NULL)
113 return NULL;
114 out = out_top = PyString_AS_STRING(outputobj);
115 outlen_total = outlen;
116
117 estep = inputlen * Py_UNICODE_SIZE / 2;
118
119#define RESIZE_OUTBUFFER(size) { \
120 size_t toadd = (size); \
121 outlen_total += toadd; \
122 outlen += toadd; \
123 if (_PyString_Resize(&outputobj, outlen_total) == -1) \
124 goto errorexit; \
125 out = PyString_AS_STRING(outputobj) + (out - out_top); \
126 out_top = PyString_AS_STRING(outputobj); \
127}
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000128 if (byteswap) {
129 swappedinput = PyMem_Malloc(inplen);
130 if (swappedinput == NULL)
131 return NULL;
132 for (swapi = 0; swapi<inputlen; ++swapi)
133 {
134 Py_UNICODE c = input[swapi];
135#if Py_UNICODE_SIZE == 2
136 c = ((char *)&c)[0]<<8 | ((char *)&c)[1];
137#else
138 c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 |
139 ((char *)&c)[2]<<8 | ((char *)&c)[3];
140#endif
141 swappedinput[swapi] = c;
142 }
143 inp = inp_top = (char *)swappedinput;
144 }
145
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000146 while (inplen > 0) {
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000147 if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen)
148 == (size_t)-1)
149 {
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000150 char reason[128];
151 int errpos;
152
153 if (errno == E2BIG) {
154 RESIZE_OUTBUFFER(estep);
155 continue;
156 }
157
158 if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
159 inplen -= Py_UNICODE_SIZE;
160 inp += Py_UNICODE_SIZE;
161 if (errorcb == ERROR_REPLACE) {
162 if (outlen < 1)
163 RESIZE_OUTBUFFER(errno == EINVAL ? 1 : estep);
164 outlen--;
165 *out++ = REPLACEMENT_CHAR_ENCODE;
166 }
167 if (errno == EINVAL) break;
168 else continue;
169 }
170
171 errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE;
172 sprintf(reason, "Undefined character map from "
173#if Py_UNICODE_SIZE == 2
174 "\\u%04x"
175#elif Py_UNICODE_SIZE == 4
176 "\\u%08x"
177#endif
178 , *(Py_UNICODE *)inp);
179
180 if (exceptionobj == NULL) {
181 if ((exceptionobj = PyUnicodeEncodeError_Create(
182 self->encoding, input, inputlen,
183 errpos, errpos + 1, reason)) == NULL)
184 goto errorexit;
185 } else {
186 if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0)
187 goto errorexit;
188 if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0)
189 goto errorexit;
190 if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0)
191 goto errorexit;
192 }
193
194 if (errorcb == ERROR_STRICT) {
195 PyCodec_StrictErrors(exceptionobj);
196 goto errorexit;
197 } else {
198 PyObject *argsobj, *retobj, *retuni;
199 long newpos;
200
201 argsobj = PyTuple_New(1);
202 if (argsobj == NULL)
203 goto errorexit;
204 PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
205 Py_INCREF(exceptionobj);
206 retobj = PyObject_CallObject(errorcb, argsobj);
207 Py_DECREF(argsobj);
208 if (retobj == NULL)
209 goto errorexit;
210
211 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
212 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
213 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
214 Py_DECREF(retobj);
215 PyErr_SetString(PyExc_ValueError, "encoding error handler "
216 "must return (unicode, int) tuple");
217 goto errorexit;
218 }
219 if (PyUnicode_GET_SIZE(retuni) > 0) {
220#define errorexit errorexit_cbpad
221 PyObject *retstr = NULL;
222 int retstrsize;
223
224 retstr = PyUnicode_AsEncodedString(
225 retuni, self->encoding, NULL);
226 if (retstr == NULL || !PyString_Check(retstr))
227 goto errorexit;
228
229 retstrsize = PyString_GET_SIZE(retstr);
230 if (outlen < retstrsize)
231 RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep
232 ? retstrsize - outlen : estep);
233
234 memcpy(out, PyString_AS_STRING(retstr), retstrsize);
235 out += retstrsize;
236 outlen -= retstrsize;
237#undef errorexit
238 if (0) {
239errorexit_cbpad: Py_XDECREF(retobj);
240 Py_XDECREF(retstr);
241 goto errorexit;
242 }
243 Py_DECREF(retstr);
244 }
245
246 newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
247 Py_DECREF(retobj);
248
249 if (newpos < 0)
Walter Dörwald2e0b18a2003-01-31 17:19:08 +0000250 newpos = inputlen + newpos;
251 if (newpos < 0 || newpos > inputlen) {
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000252 PyErr_Format(PyExc_IndexError,
253 "position %ld from error handler out of bounds",
254 newpos);
Walter Dörwald2e0b18a2003-01-31 17:19:08 +0000255 goto errorexit;
256 }
257 if (newpos == inputlen)
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000258 break;
259 inp = inp_top + Py_UNICODE_SIZE * newpos;
260 inplen = inplen_total - Py_UNICODE_SIZE * newpos;
261 }
262 } else
263 break;
264 }
265#undef RESIZE_OUTBUFFER
266
267 {
268 PyObject *rettup;
269 int finalsize;
270
271 finalsize = (int)(out - out_top);
272
273 if (finalsize != outlen_total) {
274 if (_PyString_Resize(&outputobj, finalsize) == -1)
275 goto errorexit;
276 }
277
278 if (errorcb > ERROR_MAX) {
279 Py_DECREF(errorcb);
280 }
281 Py_XDECREF(exceptionobj);
282
283 rettup = PyTuple_New(2);
284 if (rettup == NULL) {
285 Py_DECREF(outputobj);
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000286 if (byteswap)
287 PyMem_Free(swappedinput);
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000288 return NULL;
289 }
290 PyTuple_SET_ITEM(rettup, 0, outputobj);
291 PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen));
292 return rettup;
293 }
294
295errorexit:
296 Py_XDECREF(outputobj);
297 if (errorcb > ERROR_MAX) {
298 Py_DECREF(errorcb);
299 }
300 Py_XDECREF(exceptionobj);
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000301 if (byteswap)
302 PyMem_Free(swappedinput);
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000303
304 return NULL;
305}
306
307PyDoc_STRVAR(iconvcodec_decode__doc__,
308"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\
309\n\
310Decodes `string' using I, an iconvcodec instance. errors may be given\n\
311to set a different error handling scheme. Default is 'strict' meaning\n\
312that encoding errors raise a UnicodeDecodeError. Other possible values\n\
313are 'ignore' and 'replace' as well as any other name registerd with\n\
314codecs.register_error that is able to handle UnicodeDecodeErrors.");
315
316static PyObject *
317iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
318{
319 static char *kwlist[] = { "input", "errors", NULL };
320 char *errors = NULL/*strict*/, *out, *out_top;
321 const char *inp, *inp_top;
322 int inplen_int;
323 size_t inplen, inplen_total, outlen, outlen_total, estep;
324 PyObject *outputobj = NULL, *errorcb = NULL,
325 *exceptionobj = NULL;
326
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode",
328 kwlist, &inp, &inplen_int, &errors))
329 return NULL; /* TypeError */
330
331 errorcb = get_errorcallback(errors);
332 if (errorcb == NULL)
333 return NULL; /* LookupError or something else from error handler */
334
335 inp_top = inp;
336 inplen_total = inplen = (size_t)inplen_int;
337
338 outputobj = PyUnicode_FromUnicode(NULL, inplen);
339 if (outputobj == NULL)
340 return NULL;
341 outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj);
342 out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj);
343
344 estep = outlen / 2;
345
346#define RESIZE_OUTBUFFER(size) { \
347 size_t toadd = (size); \
348 outlen_total += toadd; \
349 outlen += toadd; \
350 if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1) \
351 goto errorexit; \
352 out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top); \
353 out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \
354}
355 while (inplen > 0) {
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000356 char *oldout = out;
Walter Dörwaldbda1c862003-02-04 18:02:28 +0000357 size_t res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen);
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000358
359 if (byteswap) {
360 while (oldout < out)
361 {
362 char c0 = oldout[0];
363#if Py_UNICODE_SIZE == 2
364 oldout[0] = oldout[1];
365 oldout[1] = c0;
366#else
367 char c1 = oldout[1];
368 oldout[0] = oldout[3];
369 oldout[1] = oldout[2];
370 oldout[2] = c1;
371 oldout[3] = c0;
372#endif
373 oldout += sizeof(Py_UNICODE);
374 }
375 }
Walter Dörwaldbda1c862003-02-04 18:02:28 +0000376 if (res == (size_t)-1) {
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000377 char reason[128], *reasonpos = (char *)reason;
378 int errpos;
379
380 if (errno == E2BIG) {
381 RESIZE_OUTBUFFER(estep);
382 continue;
383 }
384
385 if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
386 inplen--; inp++;
387 if (errorcb == ERROR_REPLACE) {
388 Py_UNICODE *replp;
389
390 if (outlen < Py_UNICODE_SIZE)
391 RESIZE_OUTBUFFER(
392 errno == EINVAL || Py_UNICODE_SIZE > estep
393 ? Py_UNICODE_SIZE : estep);
394
395 /* some compilers hate casted lvalue */
396 replp = (Py_UNICODE *)out;
397 assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */
398 *replp = REPLACEMENT_CHAR_DECODE;
399
400 out += Py_UNICODE_SIZE;
401 outlen -= Py_UNICODE_SIZE;
402 }
403 if (errno == EINVAL) break;
404 else continue;
405 }
406
407 errpos = (int)(inp - inp_top);
408 reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x",
409 (unsigned char)*inp);
410 if (inplen > 1) {
411 reasonpos += sprintf(reasonpos,
412 "\\x%02x", (unsigned char)*(inp+1));
413 if (inplen > 2)
414 sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2));
415 }
416
417 if (exceptionobj == NULL) {
418 exceptionobj = PyUnicodeDecodeError_Create(
419 self->encoding, inp_top, inplen_total,
420 errpos, errpos + 1, reason);
421 if (exceptionobj == NULL)
422 goto errorexit;
423 } else {
424 if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0)
425 goto errorexit;
426 if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0)
427 goto errorexit;
428 if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0)
429 goto errorexit;
430 }
431
432 if (errorcb == ERROR_STRICT) {
433 PyCodec_StrictErrors(exceptionobj);
434 goto errorexit;
435 } else {
436 PyObject *argsobj, *retobj, *retuni;
437 long newpos;
438
439 argsobj = PyTuple_New(1);
440 if (argsobj == NULL)
441 goto errorexit;
442 PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
443 Py_INCREF(exceptionobj);
444 retobj = PyObject_CallObject(errorcb, argsobj);
445 Py_DECREF(argsobj);
446 if (retobj == NULL)
447 goto errorexit;
448
449 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
450 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
451 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
452 Py_DECREF(retobj);
453 PyErr_SetString(PyExc_ValueError, "decoding error handler "
454 "must return (unicode, int) tuple");
455 goto errorexit;
456 }
457 if (PyUnicode_GET_SIZE(retuni) > 0) {
458#define errorexit errorexit_cbpad
459 size_t retunisize;
460
461 retunisize = PyUnicode_GET_DATA_SIZE(retuni);
462 if (outlen < retunisize)
463 RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep
464 ? retunisize - outlen : estep);
465
466 memcpy(out, PyUnicode_AS_DATA(retuni), retunisize);
467 out += retunisize;
468 outlen -= retunisize;
469#undef errorexit
470 if (0) {
471errorexit_cbpad: Py_DECREF(retobj);
472 goto errorexit;
473 }
474 }
475
476 newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
477 Py_DECREF(retobj);
478
479 if (newpos < 0)
Walter Dörwald2e0b18a2003-01-31 17:19:08 +0000480 newpos = inplen_total + newpos;
481 if (newpos < 0 || newpos > inplen_total) {
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000482 PyErr_Format(PyExc_IndexError,
483 "position %ld from error handler out of bounds",
484 newpos);
Walter Dörwald2e0b18a2003-01-31 17:19:08 +0000485 goto errorexit;
486 }
487 if (newpos == inplen_total)
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000488 break;
489 inp = inp_top + newpos;
490 inplen = inplen_total - newpos;
491 }
492 } else
493 break;
494 }
495#undef RESIZE_OUTBUFFER
496
497 {
498 PyObject *rettup;
499 int finalsize;
500
501 finalsize = (int)(out - out_top);
502 if (finalsize != outlen_total) {
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000503 if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE)
504 == -1)
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000505 goto errorexit;
506 }
507
508 if (errorcb > ERROR_MAX) {
509 Py_DECREF(errorcb);
510 }
511 Py_XDECREF(exceptionobj);
512
513 rettup = PyTuple_New(2);
514 if (rettup == NULL) {
515 Py_DECREF(outputobj);
516 return NULL;
517 }
518 PyTuple_SET_ITEM(rettup, 0, outputobj);
519 PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total));
520 return rettup;
521 }
522
523errorexit:
524 Py_XDECREF(outputobj);
525 if (errorcb > ERROR_MAX) {
526 Py_DECREF(errorcb);
527 }
528 Py_XDECREF(exceptionobj);
529
530 return NULL;
531}
532
533static struct PyMethodDef iconvcodec_methods[] = {
534 {"encode", (PyCFunction)iconvcodec_encode,
535 METH_VARARGS | METH_KEYWORDS,
536 iconvcodec_encode__doc__},
537 {"decode", (PyCFunction)iconvcodec_decode,
538 METH_VARARGS | METH_KEYWORDS,
539 iconvcodec_decode__doc__},
540 {NULL, NULL},
541};
542
543static PyObject *
544iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
545{
546 PyObject *encobj = NULL;
547 iconvcodecObject *new = NULL;
548
549 new = (iconvcodecObject *)type->tp_alloc(type, 0);
550 if (new == NULL)
551 return NULL;
552
553 new->encoding = NULL;
554 new->enchdl = new->dechdl = (iconv_t)(-1);
555
556 encobj = PyObject_GetAttrString((PyObject *)new, "encoding");
557 if (encobj == NULL) {
558 PyErr_Clear();
559 new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING));
560 strcpy(new->encoding, DEFAULT_ENCODING);
561 } else if (!PyString_Check(encobj)) {
562 Py_DECREF(encobj);
563 PyErr_SetString(PyExc_TypeError,
564 "`encoding' attribute must be a string.");
565 goto errorexit;
566 } else {
567 new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1);
568 strcpy(new->encoding, PyString_AS_STRING(encobj));
569 Py_DECREF(encobj);
570 }
571
572 new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding);
573 if (new->dechdl == (iconv_t)(-1)) {
574 PyErr_SetString(PyExc_ValueError, "unsupported decoding");
575 goto errorexit;
576 }
577
578 new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING);
579 if (new->enchdl == (iconv_t)(-1)) {
580 PyErr_SetString(PyExc_ValueError, "unsupported encoding");
581 iconv_close(new->dechdl);
582 new->dechdl = (iconv_t)(-1);
583 goto errorexit;
584 }
585
586 return (PyObject *)new;
587
588errorexit:
589 Py_XDECREF(new);
590
591 return NULL;
592}
593
594static void
595iconvcodec_dealloc(iconvcodecObject *self)
596{
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000597 if (self->enchdl != (iconv_t)-1)
598 iconv_close(self->enchdl);
599 if (self->dechdl != (iconv_t)-1)
600 iconv_close(self->dechdl);
601 if (self->encoding != NULL)
602 PyMem_Free(self->encoding);
603
Martin v. Löwis7a565f02003-01-27 11:39:04 +0000604 self->ob_type->tp_free((PyObject *)self);
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000605}
606
607static PyObject *
608iconvcodec_repr(PyObject *self)
609{
610 return PyString_FromFormat("<iconvcodec encoding='%s'>",
611 ((iconvcodecObject *)self)->encoding);
612}
613
Neal Norwitz7fe16e72003-02-04 20:46:50 +0000614static PyTypeObject iconvcodec_Type = {
Jason Tishler0c100152003-02-10 20:48:35 +0000615 PyObject_HEAD_INIT(NULL)
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000616 0, /* Number of items for varobject */
617 "iconvcodec", /* Name of this type */
618 sizeof(iconvcodecObject), /* Basic object size */
619 0, /* Item size for varobject */
620 (destructor)iconvcodec_dealloc, /* tp_dealloc */
621 0, /* tp_print */
622 0, /* tp_getattr */
623 0, /* tp_setattr */
624 0, /* tp_compare */
625 iconvcodec_repr, /* tp_repr */
626 0, /* tp_as_number */
627 0, /* tp_as_sequence */
628 0, /* tp_as_mapping */
629 0, /* tp_hash */
630 0, /* tp_call */
631 0, /* tp_str */
632 PyObject_GenericGetAttr, /* tp_getattro */
633 0, /* tp_setattro */
634 0, /* tp_as_buffer */
Martin v. Löwis7a565f02003-01-27 11:39:04 +0000635 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000636 iconvcodec_doc, /* tp_doc */
637 0, /* tp_traverse */
638 0, /* tp_clear */
639 0, /* tp_richcompare */
640 0, /* tp_weaklistoffset */
641 0, /* tp_iter */
642 0, /* tp_iterext */
643 iconvcodec_methods, /* tp_methods */
644 0, /* tp_members */
645 0, /* tp_getset */
646 0, /* tp_base */
647 0, /* tp_dict */
648 0, /* tp_descr_get */
649 0, /* tp_descr_set */
650 0, /* tp_dictoffset */
651 0, /* tp_init */
652 PyType_GenericAlloc, /* tp_alloc */
653 iconvcodec_new, /* tp_new */
Martin v. Löwis7a565f02003-01-27 11:39:04 +0000654 PyObject_Del, /* tp_free */
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000655};
656
657static struct PyMethodDef _iconv_codec_methods[] = {
658 {NULL, NULL},
659};
660
661void
662init_iconv_codec(void)
663{
Martin v. Löwis727fe662003-01-26 11:48:20 +0000664 PyObject *m;
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000665
Walter Dörwalde9b851a2003-02-21 18:18:49 +0000666 char in = '0';
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000667 char *inptr = &in;
Walter Dörwaldbda1c862003-02-04 18:02:28 +0000668 size_t insize = 1;
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000669 Py_UNICODE out = 0;
670 char *outptr = (char *)&out;
Walter Dörwaldbda1c862003-02-04 18:02:28 +0000671 size_t outsize = sizeof(out);
672 size_t res;
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000673
Walter Dörwalddd8766a2003-02-24 20:17:32 +0000674 iconv_t hdl = iconv_open(UNICODE_ENCODING, "ISO-8859-1");
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000675
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000676 if (hdl == (iconv_t)-1) {
Neal Norwitz3f5fcc82003-02-28 17:21:39 +0000677 PyErr_SetString(PyExc_RuntimeError,
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000678 "can't initialize the _iconv_codec module: iconv_open() failed");
679 return;
680 }
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000681
682 res = iconv(hdl, &inptr, &insize, &outptr, &outsize);
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000683 if (res == (size_t)-1) {
Neal Norwitz3f5fcc82003-02-28 17:21:39 +0000684 PyErr_SetString(PyExc_RuntimeError,
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000685 "can't initialize the _iconv_codec module: iconv() failed");
686 return;
687 }
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000688
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000689 /* Check whether conv() returned native endianess or not for the chosen
690 encoding */
Walter Dörwalde9b851a2003-02-21 18:18:49 +0000691 if (out == 0x30)
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000692 byteswap = 0;
693#if Py_UNICODE_SIZE == 2
Walter Dörwalde9b851a2003-02-21 18:18:49 +0000694 else if (out == 0x3000)
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000695#else
Walter Dörwalde9b851a2003-02-21 18:18:49 +0000696 else if (out == 0x30000000)
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000697#endif
698 byteswap = 1;
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000699 else {
700 iconv_close(hdl);
Neal Norwitz3f5fcc82003-02-28 17:21:39 +0000701 PyErr_SetString(PyExc_RuntimeError,
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000702 "can't initialize the _iconv_codec module: mixed endianess");
703 return;
704 }
Walter Dörwaldb4ff1112003-01-30 19:55:28 +0000705 iconv_close(hdl);
706
Jason Tishler0c100152003-02-10 20:48:35 +0000707 iconvcodec_Type.ob_type = &PyType_Type;
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000708 m = Py_InitModule("_iconv_codec", _iconv_codec_methods);
709
Martin v. Löwis727fe662003-01-26 11:48:20 +0000710 PyModule_AddStringConstant(m, "__version__", (char*)__version__);
Martin v. Löwis7a565f02003-01-27 11:39:04 +0000711 Py_INCREF(&iconvcodec_Type);
Martin v. Löwis727fe662003-01-26 11:48:20 +0000712 PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type));
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000713 PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING);
714
715 if (PyErr_Occurred())
Guido van Rossum55dc26c2003-02-18 16:11:11 +0000716 PyErr_SetString(PyExc_RuntimeError,
717 "can't initialize the _iconv_codec module");
Martin v. Löwis9789aef2003-01-26 11:30:36 +0000718}
719
720/*
721 * ex: ts=8 sts=4 et
722 * $Id$
723 */