blob: f51b6f29cccf8789c7a710d10b08f85d8755d422 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
9#include "multibytecodec.h"
10
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000011typedef struct {
12 const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
13 unsigned char *outbuf, *outbuf_end;
14 PyObject *excobj, *outobj;
15} MultibyteEncodeBuffer;
16
17typedef struct {
18 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
19 Py_UNICODE *outbuf, *outbuf_end;
20 PyObject *excobj, *outobj;
21} MultibyteDecodeBuffer;
22
23PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
24"I.encode(unicode[, errors]) -> (string, length consumed)\n\
25\n\
26Return an encoded string version of `unicode'. errors may be given to\n\
27set a different error handling scheme. Default is 'strict' meaning that\n\
28encoding errors raise a UnicodeEncodeError. Other possible values are\n\
29'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
30registered with codecs.register_error that can handle UnicodeEncodeErrors.");
31
32PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
33"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
34\n\
35Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
36to set a different error handling scheme. Default is 'strict' meaning\n\
37that encoding errors raise a UnicodeDecodeError. Other possible values\n\
38are 'ignore' and 'replace' as well as any other name registerd with\n\
39codecs.register_error that is able to handle UnicodeDecodeErrors.");
40
41PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
42"I.StreamReader(stream[, errors]) -> StreamReader instance");
43
44PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
45"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
46
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000047static char *codeckwarglist[] = {"input", "errors", NULL};
48static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000049
50static PyObject *multibytecodec_encode(MultibyteCodec *,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000051 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000052 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000053static PyObject *mbstreamreader_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000054 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055static PyObject *mbstreamwriter_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000056 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000057
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000058#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000059
60static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000061make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000062{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000063 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000064
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000065 if (object == NULL)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000066 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000067
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000068 v = PyTuple_New(2);
69 if (v == NULL) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000070 Py_DECREF(object);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000071 return NULL;
72 }
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000073 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000074
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000075 w = PyInt_FromSsize_t(len);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000076 if (w == NULL) {
77 Py_DECREF(v);
78 return NULL;
79 }
80 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000081
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000082 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000083}
84
85static PyObject *
86get_errorcallback(const char *errors)
87{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000088 if (errors == NULL || strcmp(errors, "strict") == 0)
89 return ERROR_STRICT;
90 else if (strcmp(errors, "ignore") == 0)
91 return ERROR_IGNORE;
92 else if (strcmp(errors, "replace") == 0)
93 return ERROR_REPLACE;
94 else {
95 return PyCodec_LookupError(errors);
96 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000097}
98
99static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000100expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000101{
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000102 Py_ssize_t orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000103
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000104 orgpos = (Py_ssize_t)((char*)buf->outbuf -
105 PyString_AS_STRING(buf->outobj));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000106 orgsize = PyString_GET_SIZE(buf->outobj);
107 if (_PyString_Resize(&buf->outobj, orgsize + (
108 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
109 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000110
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000111 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
112 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
113 + PyString_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000114
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000115 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000116}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000117#define REQUIRE_ENCODEBUFFER(buf, s) { \
118 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
119 if (expand_encodebuffer(buf, s) == -1) \
120 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000121}
122
123static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000124expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000125{
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000126 Py_ssize_t orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000127
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000128 orgpos = (Py_ssize_t)(buf->outbuf -
129 PyUnicode_AS_UNICODE(buf->outobj));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000130 orgsize = PyUnicode_GET_SIZE(buf->outobj);
131 if (PyUnicode_Resize(&buf->outobj, orgsize + (
132 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
133 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000134
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000135 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
136 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
137 + PyUnicode_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000138
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000139 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000140}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000141#define REQUIRE_DECODEBUFFER(buf, s) { \
142 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
143 if (expand_decodebuffer(buf, s) == -1) \
144 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000145}
146
147static int
148multibytecodec_encerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000149 MultibyteCodec_State *state,
150 MultibyteEncodeBuffer *buf,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000151 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000152{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000153 PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000154 Py_ssize_t retstrsize, newpos;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000155 const char *reason;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000156 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000157
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000158 if (e > 0) {
159 reason = "illegal multibyte sequence";
160 esize = e;
161 }
162 else {
163 switch (e) {
164 case MBERR_TOOSMALL:
165 REQUIRE_ENCODEBUFFER(buf, -1);
166 return 0; /* retry it */
167 case MBERR_TOOFEW:
168 reason = "incomplete multibyte sequence";
169 esize = (size_t)(buf->inbuf_end - buf->inbuf);
170 break;
171 case MBERR_INTERNAL:
172 PyErr_SetString(PyExc_RuntimeError,
173 "internal codec error");
174 return -1;
175 default:
176 PyErr_SetString(PyExc_RuntimeError,
177 "unknown runtime error");
178 return -1;
179 }
180 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000181
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000182 if (errors == ERROR_REPLACE) {
183 const Py_UNICODE replchar = '?', *inbuf = &replchar;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000184 Py_ssize_t r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000185
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000186 for (;;) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000187 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000188
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000189 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000190 r = codec->encode(state, codec->config, &inbuf, 1,
191 &buf->outbuf, outleft, 0);
192 if (r == MBERR_TOOSMALL) {
193 REQUIRE_ENCODEBUFFER(buf, -1);
194 continue;
195 }
196 else
197 break;
198 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000199
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000200 if (r != 0) {
201 REQUIRE_ENCODEBUFFER(buf, 1);
202 *buf->outbuf++ = '?';
203 }
204 }
205 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
206 buf->inbuf += esize;
207 return 0;
208 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000209
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000210 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000211 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000213 /* use cached exception object if available */
214 if (buf->excobj == NULL) {
215 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
216 buf->inbuf_top,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000217 buf->inbuf_end - buf->inbuf_top,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000218 start, end, reason);
219 if (buf->excobj == NULL)
220 goto errorexit;
221 }
222 else
223 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
224 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
225 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
226 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000227
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000228 if (errors == ERROR_STRICT) {
229 PyCodec_StrictErrors(buf->excobj);
230 goto errorexit;
231 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000232
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000233 argsobj = PyTuple_New(1);
234 if (argsobj == NULL)
235 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000236
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000237 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
238 Py_INCREF(buf->excobj);
239 retobj = PyObject_CallObject(errors, argsobj);
240 Py_DECREF(argsobj);
241 if (retobj == NULL)
242 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000243
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000244 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
245 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
246 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
247 PyErr_SetString(PyExc_ValueError,
248 "encoding error handler must return "
249 "(unicode, int) tuple");
250 goto errorexit;
251 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000252
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000253 {
254 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000255
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000256 retstr = multibytecodec_encode(codec, state, &uraw,
257 PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
258 MBENC_FLUSH);
259 if (retstr == NULL)
260 goto errorexit;
261 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000262
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000263 retstrsize = PyString_GET_SIZE(retstr);
264 REQUIRE_ENCODEBUFFER(buf, retstrsize);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000266 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
267 buf->outbuf += retstrsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000268
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000269 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000270 if (newpos < 0)
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000271 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000272 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
273 PyErr_Format(PyExc_IndexError,
274 "position %d from error handler out of bounds",
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000275 (int)newpos);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000276 goto errorexit;
277 }
278 buf->inbuf = buf->inbuf_top + newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000279
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000280 Py_DECREF(retobj);
281 Py_DECREF(retstr);
282 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000283
284errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000285 Py_XDECREF(retobj);
286 Py_XDECREF(retstr);
287 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000288}
289
290static int
291multibytecodec_decerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000292 MultibyteCodec_State *state,
293 MultibyteDecodeBuffer *buf,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000294 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000295{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000296 PyObject *argsobj, *retobj = NULL, *retuni = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000297 Py_ssize_t retunisize, newpos;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000298 const char *reason;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000299 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000300
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000301 if (e > 0) {
302 reason = "illegal multibyte sequence";
303 esize = e;
304 }
305 else {
306 switch (e) {
307 case MBERR_TOOSMALL:
308 REQUIRE_DECODEBUFFER(buf, -1);
309 return 0; /* retry it */
310 case MBERR_TOOFEW:
311 reason = "incomplete multibyte sequence";
312 esize = (size_t)(buf->inbuf_end - buf->inbuf);
313 break;
314 case MBERR_INTERNAL:
315 PyErr_SetString(PyExc_RuntimeError,
316 "internal codec error");
317 return -1;
318 default:
319 PyErr_SetString(PyExc_RuntimeError,
320 "unknown runtime error");
321 return -1;
322 }
323 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000324
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000325 if (errors == ERROR_REPLACE) {
326 REQUIRE_DECODEBUFFER(buf, 1);
327 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
328 }
329 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
330 buf->inbuf += esize;
331 return 0;
332 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000333
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000334 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000335 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000336
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000337 /* use cached exception object if available */
338 if (buf->excobj == NULL) {
339 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
Hye-Shik Changf5a149a2004-08-19 17:49:56 +0000340 (const char *)buf->inbuf_top,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000341 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000342 start, end, reason);
343 if (buf->excobj == NULL)
344 goto errorexit;
345 }
346 else
347 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
348 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
349 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
350 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000351
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000352 if (errors == ERROR_STRICT) {
353 PyCodec_StrictErrors(buf->excobj);
354 goto errorexit;
355 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000356
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000357 argsobj = PyTuple_New(1);
358 if (argsobj == NULL)
359 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000360
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000361 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
362 Py_INCREF(buf->excobj);
363 retobj = PyObject_CallObject(errors, argsobj);
364 Py_DECREF(argsobj);
365 if (retobj == NULL)
366 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000367
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000368 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
369 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
370 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
371 PyErr_SetString(PyExc_ValueError,
372 "decoding error handler must return "
373 "(unicode, int) tuple");
374 goto errorexit;
375 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000376
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000377 retunisize = PyUnicode_GET_SIZE(retuni);
378 if (retunisize > 0) {
379 REQUIRE_DECODEBUFFER(buf, retunisize);
380 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
381 retunisize * Py_UNICODE_SIZE);
382 buf->outbuf += retunisize;
383 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000384
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000385 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000386 if (newpos < 0)
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000387 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000388 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
389 PyErr_Format(PyExc_IndexError,
390 "position %d from error handler out of bounds",
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000391 (int)newpos);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000392 goto errorexit;
393 }
394 buf->inbuf = buf->inbuf_top + newpos;
395 Py_DECREF(retobj);
396 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000397
398errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000399 Py_XDECREF(retobj);
400 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000401}
402
403static PyObject *
404multibytecodec_encode(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000405 MultibyteCodec_State *state,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000406 const Py_UNICODE **data, Py_ssize_t datalen,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000407 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000408{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000409 MultibyteEncodeBuffer buf;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000410 Py_ssize_t finalsize, r = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000411
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000412 if (datalen == 0)
413 return PyString_FromString("");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000414
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000415 buf.excobj = NULL;
416 buf.inbuf = buf.inbuf_top = *data;
417 buf.inbuf_end = buf.inbuf_top + datalen;
418 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
419 if (buf.outobj == NULL)
420 goto errorexit;
421 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
422 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000423
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000424 while (buf.inbuf < buf.inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000425 Py_ssize_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000426
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000427 /* we don't reuse inleft and outleft here.
428 * error callbacks can relocate the cursor anywhere on buffer*/
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000429 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
430 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000431 r = codec->encode(state, codec->config, &buf.inbuf, inleft,
432 &buf.outbuf, outleft, flags);
433 *data = buf.inbuf;
434 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
435 break;
436 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
437 goto errorexit;
438 else if (r == MBERR_TOOFEW)
439 break;
440 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000441
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000442 if (codec->encreset != NULL)
443 for (;;) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000444 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000445
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000446 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000447 r = codec->encreset(state, codec->config, &buf.outbuf,
448 outleft);
449 if (r == 0)
450 break;
451 else if (multibytecodec_encerror(codec, state,
452 &buf, errors, r))
453 goto errorexit;
454 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000455
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000456 finalsize = (Py_ssize_t)((char*)buf.outbuf -
457 PyString_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000458
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000459 if (finalsize != PyString_GET_SIZE(buf.outobj))
460 if (_PyString_Resize(&buf.outobj, finalsize) == -1)
461 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000462
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000463 Py_XDECREF(buf.excobj);
464 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000465
466errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000467 Py_XDECREF(buf.excobj);
468 Py_XDECREF(buf.outobj);
469 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000470}
471
472static PyObject *
473MultibyteCodec_Encode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000474 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000475{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000476 MultibyteCodec_State state;
477 Py_UNICODE *data;
478 PyObject *errorcb, *r, *arg, *ucvt;
479 const char *errors = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000480 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000482 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
483 codeckwarglist, &arg, &errors))
484 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000485
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000486 if (PyUnicode_Check(arg))
487 ucvt = NULL;
488 else {
489 arg = ucvt = PyObject_Unicode(arg);
490 if (arg == NULL)
491 return NULL;
492 else if (!PyUnicode_Check(arg)) {
493 PyErr_SetString(PyExc_TypeError,
494 "couldn't convert the object to unicode.");
495 Py_DECREF(ucvt);
496 return NULL;
497 }
498 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000499
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000500 data = PyUnicode_AS_UNICODE(arg);
501 datalen = PyUnicode_GET_SIZE(arg);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000502
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000503 errorcb = get_errorcallback(errors);
504 if (errorcb == NULL) {
505 Py_XDECREF(ucvt);
506 return NULL;
507 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000508
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000509 if (self->codec->encinit != NULL &&
510 self->codec->encinit(&state, self->codec->config) != 0)
511 goto errorexit;
512 r = multibytecodec_encode(self->codec, &state,
513 (const Py_UNICODE **)&data, datalen, errorcb,
514 MBENC_FLUSH | MBENC_RESET);
515 if (r == NULL)
516 goto errorexit;
517
518 if (errorcb > ERROR_MAX) {
519 Py_DECREF(errorcb);
520 }
521 Py_XDECREF(ucvt);
522 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000523
524errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000525 if (errorcb > ERROR_MAX) {
526 Py_DECREF(errorcb);
527 }
528 Py_XDECREF(ucvt);
529 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000530}
531
532static PyObject *
533MultibyteCodec_Decode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000534 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000535{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000536 MultibyteCodec_State state;
537 MultibyteDecodeBuffer buf;
538 PyObject *errorcb;
539 const char *data, *errors = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000540 Py_ssize_t datalen, finalsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000542 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
543 codeckwarglist, &data, &datalen, &errors))
544 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000545
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000546 errorcb = get_errorcallback(errors);
547 if (errorcb == NULL)
548 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000549
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000550 if (datalen == 0) {
551 if (errorcb > ERROR_MAX) {
552 Py_DECREF(errorcb);
553 }
554 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
555 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000556
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000557 buf.outobj = buf.excobj = NULL;
558 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
559 buf.inbuf_end = buf.inbuf_top + datalen;
560 buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
561 if (buf.outobj == NULL)
562 goto errorexit;
563 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
564 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000565
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000566 if (self->codec->decinit != NULL &&
567 self->codec->decinit(&state, self->codec->config) != 0)
568 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000569
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000570 while (buf.inbuf < buf.inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000571 Py_ssize_t inleft, outleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000572
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000573 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
574 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000575
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000576 r = self->codec->decode(&state, self->codec->config,
577 &buf.inbuf, inleft, &buf.outbuf, outleft);
578 if (r == 0)
579 break;
580 else if (multibytecodec_decerror(self->codec, &state,
581 &buf, errorcb, r))
582 goto errorexit;
583 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000584
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000585 finalsize = (Py_ssize_t)(buf.outbuf -
586 PyUnicode_AS_UNICODE(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000587
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000588 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
589 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
590 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000591
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000592 Py_XDECREF(buf.excobj);
593 if (errorcb > ERROR_MAX) {
594 Py_DECREF(errorcb);
595 }
596 return make_tuple(buf.outobj, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000597
598errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000599 if (errorcb > ERROR_MAX) {
600 Py_DECREF(errorcb);
601 }
602 Py_XDECREF(buf.excobj);
603 Py_XDECREF(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000604
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000605 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000606}
607
608static PyObject *
609MultibyteCodec_StreamReader(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000610 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000611{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000612 PyObject *stream;
613 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000614
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
616 streamkwarglist, &stream, &errors))
617 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000618
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000619 return mbstreamreader_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000620}
621
622static PyObject *
623MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000624 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000625{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000626 PyObject *stream;
627 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000628
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000629 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
630 streamkwarglist, &stream, &errors))
631 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000633 return mbstreamwriter_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000634}
635
636static struct PyMethodDef multibytecodec_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000637 {"encode", (PyCFunction)MultibyteCodec_Encode,
638 METH_VARARGS | METH_KEYWORDS,
639 MultibyteCodec_Encode__doc__},
640 {"decode", (PyCFunction)MultibyteCodec_Decode,
641 METH_VARARGS | METH_KEYWORDS,
642 MultibyteCodec_Decode__doc__},
643 {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
644 METH_VARARGS | METH_KEYWORDS,
645 MultibyteCodec_StreamReader__doc__},
646 {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
647 METH_VARARGS | METH_KEYWORDS,
648 MultibyteCodec_StreamWriter__doc__},
649 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000650};
651
652static void
653multibytecodec_dealloc(MultibyteCodecObject *self)
654{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000655 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000656}
657
658
659
660static PyTypeObject MultibyteCodec_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000661 PyObject_HEAD_INIT(NULL)
662 0, /* ob_size */
663 "MultibyteCodec", /* tp_name */
664 sizeof(MultibyteCodecObject), /* tp_basicsize */
665 0, /* tp_itemsize */
666 /* methods */
667 (destructor)multibytecodec_dealloc, /* tp_dealloc */
668 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000669 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000670 0, /* tp_setattr */
671 0, /* tp_compare */
672 0, /* tp_repr */
673 0, /* tp_as_number */
674 0, /* tp_as_sequence */
675 0, /* tp_as_mapping */
676 0, /* tp_hash */
677 0, /* tp_call */
678 0, /* tp_str */
679 PyObject_GenericGetAttr, /* tp_getattro */
680 0, /* tp_setattro */
681 0, /* tp_as_buffer */
682 Py_TPFLAGS_DEFAULT, /* tp_flags */
683 0, /* tp_doc */
684 0, /* tp_traverse */
685 0, /* tp_clear */
686 0, /* tp_richcompare */
687 0, /* tp_weaklistoffset */
688 0, /* tp_iter */
689 0, /* tp_iterext */
690 multibytecodec_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000691};
692
693static PyObject *
694mbstreamreader_iread(MultibyteStreamReaderObject *self,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000695 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000696{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000697 MultibyteDecodeBuffer buf;
698 PyObject *cres;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000699 Py_ssize_t rsize, r, finalsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000700
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000701 if (sizehint == 0)
702 return PyUnicode_FromUnicode(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000703
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000704 buf.outobj = buf.excobj = NULL;
705 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000706
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000707 for (;;) {
708 if (sizehint < 0)
709 cres = PyObject_CallMethod(self->stream,
710 (char *)method, NULL);
711 else
712 cres = PyObject_CallMethod(self->stream,
713 (char *)method, "i", sizehint);
714 if (cres == NULL)
715 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000717 if (!PyString_Check(cres)) {
718 PyErr_SetString(PyExc_TypeError,
719 "stream function returned a "
720 "non-string object");
721 goto errorexit;
722 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000723
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000724 if (self->pendingsize > 0) {
725 PyObject *ctr;
726 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000727
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000728 rsize = PyString_GET_SIZE(cres) + self->pendingsize;
729 ctr = PyString_FromStringAndSize(NULL, rsize);
730 if (ctr == NULL)
731 goto errorexit;
732 ctrdata = PyString_AS_STRING(ctr);
733 memcpy(ctrdata, self->pending, self->pendingsize);
734 memcpy(ctrdata + self->pendingsize,
735 PyString_AS_STRING(cres),
736 PyString_GET_SIZE(cres));
737 Py_DECREF(cres);
738 cres = ctr;
739 self->pendingsize = 0;
740 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000741
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000742 rsize = PyString_GET_SIZE(cres);
743 buf.inbuf = buf.inbuf_top =
744 (unsigned char *)PyString_AS_STRING(cres);
745 buf.inbuf_end = buf.inbuf_top + rsize;
746 if (buf.outobj == NULL) {
747 buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
748 if (buf.outobj == NULL)
749 goto errorexit;
750 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
751 buf.outbuf_end = buf.outbuf +
752 PyUnicode_GET_SIZE(buf.outobj);
753 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000754
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000755 r = 0;
756 if (rsize > 0)
757 while (buf.inbuf < buf.inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000758 Py_ssize_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000759
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000760 inleft = (Py_ssize_t)(buf.inbuf_end -
761 buf.inbuf);
762 outleft = (Py_ssize_t)(buf.outbuf_end -
763 buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000764
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000765 r = self->codec->decode(&self->state,
766 self->codec->config,
767 &buf.inbuf, inleft,
768 &buf.outbuf, outleft);
769 if (r == 0 || r == MBERR_TOOFEW)
770 break;
771 else if (multibytecodec_decerror(self->codec,
772 &self->state, &buf,
773 self->errors, r))
774 goto errorexit;
775 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000776
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000777 if (rsize == 0 || sizehint < 0) { /* end of file */
778 if (buf.inbuf < buf.inbuf_end &&
779 multibytecodec_decerror(self->codec, &self->state,
780 &buf, self->errors, MBERR_TOOFEW))
781 goto errorexit;
782 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000783
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000784 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000785 Py_ssize_t npendings;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000786
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000787 /* we can't assume that pendingsize is still 0 here.
788 * because this function can be called recursively
789 * from error callback */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000790 npendings = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000791 if (npendings + self->pendingsize > MAXDECPENDING) {
792 PyErr_SetString(PyExc_RuntimeError,
793 "pending buffer overflow");
794 goto errorexit;
795 }
796 memcpy(self->pending + self->pendingsize, buf.inbuf,
797 npendings);
798 self->pendingsize += npendings;
799 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000800
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000801 finalsize = (Py_ssize_t)(buf.outbuf -
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000802 PyUnicode_AS_UNICODE(buf.outobj));
803 Py_DECREF(cres);
804 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000805
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000806 if (sizehint < 0 || finalsize != 0 || rsize == 0)
807 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000808
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000809 sizehint = 1; /* read 1 more byte and retry */
810 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000811
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000812 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
813 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
814 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000815
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000816 Py_XDECREF(cres);
817 Py_XDECREF(buf.excobj);
818 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000819
820errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000821 Py_XDECREF(cres);
822 Py_XDECREF(buf.excobj);
823 Py_XDECREF(buf.outobj);
824 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000825}
826
827static PyObject *
828mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
829{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000830 PyObject *sizeobj = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000831 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000832
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000833 if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
834 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000835
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000836 if (sizeobj == Py_None || sizeobj == NULL)
837 size = -1;
838 else if (PyInt_Check(sizeobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000839 size = PyInt_AsSsize_t(sizeobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000840 else {
841 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
842 return NULL;
843 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000844
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000845 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000846}
847
848static PyObject *
849mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
850{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000851 PyObject *sizeobj = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000852 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000853
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000854 if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
855 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000856
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000857 if (sizeobj == Py_None || sizeobj == NULL)
858 size = -1;
859 else if (PyInt_Check(sizeobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000860 size = PyInt_AsSsize_t(sizeobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000861 else {
862 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
863 return NULL;
864 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000865
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000866 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000867}
868
869static PyObject *
870mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
871{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000872 PyObject *sizehintobj = NULL, *r, *sr;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000873 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000874
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000875 if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
876 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000877
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000878 if (sizehintobj == Py_None || sizehintobj == NULL)
879 sizehint = -1;
880 else if (PyInt_Check(sizehintobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000881 sizehint = PyInt_AsSsize_t(sizehintobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000882 else {
883 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
884 return NULL;
885 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000886
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000887 r = mbstreamreader_iread(self, "read", sizehint);
888 if (r == NULL)
889 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000890
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000891 sr = PyUnicode_Splitlines(r, 1);
892 Py_DECREF(r);
893 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000894}
895
896static PyObject *
897mbstreamreader_reset(MultibyteStreamReaderObject *self)
898{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000899 if (self->codec->decreset != NULL &&
900 self->codec->decreset(&self->state, self->codec->config) != 0)
901 return NULL;
902 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000903
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000904 Py_INCREF(Py_None);
905 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000906}
907
908static struct PyMethodDef mbstreamreader_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000909 {"read", (PyCFunction)mbstreamreader_read,
910 METH_VARARGS, NULL},
911 {"readline", (PyCFunction)mbstreamreader_readline,
912 METH_VARARGS, NULL},
913 {"readlines", (PyCFunction)mbstreamreader_readlines,
914 METH_VARARGS, NULL},
915 {"reset", (PyCFunction)mbstreamreader_reset,
916 METH_NOARGS, NULL},
917 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000918};
919
920static void
921mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
922{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000923 if (self->errors > ERROR_MAX) {
924 Py_DECREF(self->errors);
925 }
926 Py_DECREF(self->stream);
927 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000928}
929
930
931
932static PyTypeObject MultibyteStreamReader_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000933 PyObject_HEAD_INIT(NULL)
934 0, /* ob_size */
935 "MultibyteStreamReader", /* tp_name */
936 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
937 0, /* tp_itemsize */
938 /* methods */
939 (destructor)mbstreamreader_dealloc, /* tp_dealloc */
940 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000941 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000942 0, /* tp_setattr */
943 0, /* tp_compare */
944 0, /* tp_repr */
945 0, /* tp_as_number */
946 0, /* tp_as_sequence */
947 0, /* tp_as_mapping */
948 0, /* tp_hash */
949 0, /* tp_call */
950 0, /* tp_str */
951 PyObject_GenericGetAttr, /* tp_getattro */
952 0, /* tp_setattro */
953 0, /* tp_as_buffer */
954 Py_TPFLAGS_DEFAULT, /* tp_flags */
955 0, /* tp_doc */
956 0, /* tp_traverse */
957 0, /* tp_clear */
958 0, /* tp_richcompare */
959 0, /* tp_weaklistoffset */
960 0, /* tp_iter */
961 0, /* tp_iterext */
962 mbstreamreader_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000963};
964
965static int
966mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000967 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000968{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000969 PyObject *wr, *ucvt, *r = NULL;
Hye-Shik Changf5a149a2004-08-19 17:49:56 +0000970 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000971 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000972
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000973 if (PyUnicode_Check(unistr))
974 ucvt = NULL;
975 else {
976 unistr = ucvt = PyObject_Unicode(unistr);
977 if (unistr == NULL)
978 return -1;
979 else if (!PyUnicode_Check(unistr)) {
980 PyErr_SetString(PyExc_TypeError,
981 "couldn't convert the object to unicode.");
982 Py_DECREF(ucvt);
983 return -1;
984 }
985 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000986
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000987 datalen = PyUnicode_GET_SIZE(unistr);
988 if (datalen == 0) {
989 Py_XDECREF(ucvt);
990 return 0;
991 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000992
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000993 if (self->pendingsize > 0) {
994 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize);
995 if (inbuf_tmp == NULL)
996 goto errorexit;
997 memcpy(inbuf_tmp, self->pending,
998 Py_UNICODE_SIZE * self->pendingsize);
999 memcpy(inbuf_tmp + self->pendingsize,
1000 PyUnicode_AS_UNICODE(unistr),
1001 Py_UNICODE_SIZE * datalen);
1002 datalen += self->pendingsize;
1003 self->pendingsize = 0;
1004 inbuf = inbuf_tmp;
1005 }
1006 else
1007 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001008
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001009 inbuf_end = inbuf + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001010
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001011 r = multibytecodec_encode(self->codec, &self->state,
1012 (const Py_UNICODE **)&inbuf, datalen, self->errors, 0);
1013 if (r == NULL)
1014 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001015
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001016 if (inbuf < inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001017 self->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001018 if (self->pendingsize > MAXENCPENDING) {
1019 self->pendingsize = 0;
1020 PyErr_SetString(PyExc_RuntimeError,
1021 "pending buffer overflow");
1022 goto errorexit;
1023 }
1024 memcpy(self->pending, inbuf,
1025 self->pendingsize * Py_UNICODE_SIZE);
1026 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001027
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001028 wr = PyObject_CallMethod(self->stream, "write", "O", r);
1029 if (wr == NULL)
1030 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001031
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001032 if (inbuf_tmp != NULL)
1033 PyMem_Del(inbuf_tmp);
1034 Py_DECREF(r);
1035 Py_DECREF(wr);
1036 Py_XDECREF(ucvt);
1037 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001038
1039errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001040 if (inbuf_tmp != NULL)
1041 PyMem_Del(inbuf_tmp);
1042 Py_XDECREF(r);
1043 Py_XDECREF(ucvt);
1044 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001045}
1046
1047static PyObject *
1048mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args)
1049{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001050 PyObject *strobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001051
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001052 if (!PyArg_ParseTuple(args, "O:write", &strobj))
1053 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001054
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001055 if (mbstreamwriter_iwrite(self, strobj))
1056 return NULL;
1057 else {
1058 Py_INCREF(Py_None);
1059 return Py_None;
1060 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001061}
1062
1063static PyObject *
1064mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args)
1065{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001066 PyObject *lines, *strobj;
1067 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001068
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001069 if (!PyArg_ParseTuple(args, "O:writelines", &lines))
1070 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001071
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001072 if (!PySequence_Check(lines)) {
1073 PyErr_SetString(PyExc_TypeError,
1074 "arg must be a sequence object");
1075 return NULL;
1076 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001077
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001078 for (i = 0; i < PySequence_Length(lines); i++) {
1079 /* length can be changed even within this loop */
1080 strobj = PySequence_GetItem(lines, i);
1081 if (strobj == NULL)
1082 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001083
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001084 r = mbstreamwriter_iwrite(self, strobj);
1085 Py_DECREF(strobj);
1086 if (r == -1)
1087 return NULL;
1088 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001089
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001090 Py_INCREF(Py_None);
1091 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001092}
1093
1094static PyObject *
1095mbstreamwriter_reset(MultibyteStreamWriterObject *self)
1096{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001097 const Py_UNICODE *pending;
1098 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001099
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001100 pending = self->pending;
1101 pwrt = multibytecodec_encode(self->codec, &self->state,
1102 &pending, self->pendingsize, self->errors,
1103 MBENC_FLUSH | MBENC_RESET);
1104 /* some pending buffer can be truncated when UnicodeEncodeError is
1105 * raised on 'strict' mode. but, 'reset' method is designed to
1106 * reset the pending buffer or states so failed string sequence
1107 * ought to be missed */
1108 self->pendingsize = 0;
1109 if (pwrt == NULL)
1110 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001111
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001112 if (PyString_Size(pwrt) > 0) {
1113 PyObject *wr;
1114 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
1115 if (wr == NULL) {
1116 Py_DECREF(pwrt);
1117 return NULL;
1118 }
1119 }
1120 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001121
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001122 Py_INCREF(Py_None);
1123 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001124}
1125
1126static void
1127mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1128{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001129 if (self->errors > ERROR_MAX) {
1130 Py_DECREF(self->errors);
1131 }
1132 Py_DECREF(self->stream);
1133 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001134}
1135
1136static struct PyMethodDef mbstreamwriter_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001137 {"write", (PyCFunction)mbstreamwriter_write,
1138 METH_VARARGS, NULL},
1139 {"writelines", (PyCFunction)mbstreamwriter_writelines,
1140 METH_VARARGS, NULL},
1141 {"reset", (PyCFunction)mbstreamwriter_reset,
1142 METH_NOARGS, NULL},
1143 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001144};
1145
1146
1147
1148static PyTypeObject MultibyteStreamWriter_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001149 PyObject_HEAD_INIT(NULL)
1150 0, /* ob_size */
1151 "MultibyteStreamWriter", /* tp_name */
1152 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1153 0, /* tp_itemsize */
1154 /* methods */
1155 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1156 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001157 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001158 0, /* tp_setattr */
1159 0, /* tp_compare */
1160 0, /* tp_repr */
1161 0, /* tp_as_number */
1162 0, /* tp_as_sequence */
1163 0, /* tp_as_mapping */
1164 0, /* tp_hash */
1165 0, /* tp_call */
1166 0, /* tp_str */
1167 PyObject_GenericGetAttr, /* tp_getattro */
1168 0, /* tp_setattro */
1169 0, /* tp_as_buffer */
1170 Py_TPFLAGS_DEFAULT, /* tp_flags */
1171 0, /* tp_doc */
1172 0, /* tp_traverse */
1173 0, /* tp_clear */
1174 0, /* tp_richcompare */
1175 0, /* tp_weaklistoffset */
1176 0, /* tp_iter */
1177 0, /* tp_iterext */
1178 mbstreamwriter_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001179};
1180
1181static PyObject *
1182__create_codec(PyObject *ignore, PyObject *arg)
1183{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001184 MultibyteCodecObject *self;
1185 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001186
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001187 if (!PyCObject_Check(arg)) {
1188 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1189 return NULL;
1190 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001191
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001192 codec = PyCObject_AsVoidPtr(arg);
1193 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1194 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001195
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001196 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1197 if (self == NULL)
1198 return NULL;
1199 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001200
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001201 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001202}
1203
1204static PyObject *
1205mbstreamreader_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001206 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001207{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001208 MultibyteStreamReaderObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001209
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001210 self = PyObject_New(MultibyteStreamReaderObject,
1211 &MultibyteStreamReader_Type);
1212 if (self == NULL)
1213 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001214
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001215 self->codec = codec;
1216 self->stream = stream;
1217 Py_INCREF(stream);
1218 self->pendingsize = 0;
1219 self->errors = get_errorcallback(errors);
1220 if (self->errors == NULL)
1221 goto errorexit;
1222 if (self->codec->decinit != NULL &&
1223 self->codec->decinit(&self->state, self->codec->config) != 0)
1224 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001225
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001226 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001227
1228errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001229 Py_XDECREF(self);
1230 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001231}
1232
1233static PyObject *
1234mbstreamwriter_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001235 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001236{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001237 MultibyteStreamWriterObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001238
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001239 self = PyObject_New(MultibyteStreamWriterObject,
1240 &MultibyteStreamWriter_Type);
1241 if (self == NULL)
1242 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001243
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001244 self->codec = codec;
1245 self->stream = stream;
1246 Py_INCREF(stream);
1247 self->pendingsize = 0;
1248 self->errors = get_errorcallback(errors);
1249 if (self->errors == NULL)
1250 goto errorexit;
1251 if (self->codec->encinit != NULL &&
1252 self->codec->encinit(&self->state, self->codec->config) != 0)
1253 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001254
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001255 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001256
1257errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001258 Py_XDECREF(self);
1259 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001260}
1261
1262static struct PyMethodDef __methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001263 {"__create_codec", (PyCFunction)__create_codec, METH_O},
1264 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001265};
1266
1267void
1268init_multibytecodec(void)
1269{
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001270 if (PyType_Ready(&MultibyteCodec_Type) < 0)
1271 return;
1272 if (PyType_Ready(&MultibyteStreamReader_Type) < 0)
1273 return;
1274 if (PyType_Ready(&MultibyteStreamWriter_Type) < 0)
1275 return;
Neal Norwitz058bde12005-09-21 06:44:25 +00001276
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001277 Py_InitModule("_multibytecodec", __methods);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001278
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001279 if (PyErr_Occurred())
1280 Py_FatalError("can't initialize the _multibytecodec module");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001281}