blob: 26d5c944c9d28e10b2cb836888a0e2e1b2c85018 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00009#include "structmember.h"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010#include "multibytecodec.h"
11
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000012typedef struct {
13 const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
14 unsigned char *outbuf, *outbuf_end;
15 PyObject *excobj, *outobj;
16} MultibyteEncodeBuffer;
17
18typedef struct {
19 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
20 Py_UNICODE *outbuf, *outbuf_end;
21 PyObject *excobj, *outobj;
22} MultibyteDecodeBuffer;
23
24PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
25"I.encode(unicode[, errors]) -> (string, length consumed)\n\
26\n\
27Return an encoded string version of `unicode'. errors may be given to\n\
28set a different error handling scheme. Default is 'strict' meaning that\n\
29encoding errors raise a UnicodeEncodeError. Other possible values are\n\
30'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
31registered with codecs.register_error that can handle UnicodeEncodeErrors.");
32
33PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
34"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
35\n\
36Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
37to set a different error handling scheme. Default is 'strict' meaning\n\
38that encoding errors raise a UnicodeDecodeError. Other possible values\n\
39are 'ignore' and 'replace' as well as any other name registerd with\n\
40codecs.register_error that is able to handle UnicodeDecodeErrors.");
41
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000042static char *codeckwarglist[] = {"input", "errors", NULL};
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000043static char *incnewkwarglist[] = {"errors", NULL};
44static char *incrementalkwarglist[] = {"input", "final", NULL};
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000045static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000046
47static PyObject *multibytecodec_encode(MultibyteCodec *,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000048 MultibyteCodec_State *, const Py_UNICODE **, Py_ssize_t,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000049 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000050
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000051#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000052
53static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000054make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000056 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000057
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000058 if (object == NULL)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000059 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000060
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000061 v = PyTuple_New(2);
62 if (v == NULL) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000063 Py_DECREF(object);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000064 return NULL;
65 }
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000066 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000067
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000068 w = PyInt_FromSsize_t(len);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000069 if (w == NULL) {
70 Py_DECREF(v);
71 return NULL;
72 }
73 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000074
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000075 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000076}
77
78static PyObject *
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000079internal_error_callback(const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000081 if (errors == NULL || strcmp(errors, "strict") == 0)
82 return ERROR_STRICT;
83 else if (strcmp(errors, "ignore") == 0)
84 return ERROR_IGNORE;
85 else if (strcmp(errors, "replace") == 0)
86 return ERROR_REPLACE;
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000087 else
88 return PyString_FromString(errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000089}
90
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +000091static PyObject *
92call_error_callback(PyObject *errors, PyObject *exc)
93{
94 PyObject *args, *cb, *r;
95
96 assert(PyString_Check(errors));
97 cb = PyCodec_LookupError(PyString_AS_STRING(errors));
98 if (cb == NULL)
99 return NULL;
100
101 args = PyTuple_New(1);
102 if (args == NULL) {
103 Py_DECREF(cb);
104 return NULL;
105 }
106
107 PyTuple_SET_ITEM(args, 0, exc);
108 Py_INCREF(exc);
109
110 r = PyObject_CallObject(cb, args);
111 Py_DECREF(args);
112 Py_DECREF(cb);
113 return r;
114}
115
116static PyObject *
117codecctx_errors_get(MultibyteStatefulCodecContext *self)
118{
119 const char *errors;
120
121 if (self->errors == ERROR_STRICT)
122 errors = "strict";
123 else if (self->errors == ERROR_IGNORE)
124 errors = "ignore";
125 else if (self->errors == ERROR_REPLACE)
126 errors = "replace";
127 else {
128 Py_INCREF(self->errors);
129 return self->errors;
130 }
131
132 return PyString_FromString(errors);
133}
134
135static int
136codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
137 void *closure)
138{
139 PyObject *cb;
140
141 if (!PyString_Check(value)) {
142 PyErr_SetString(PyExc_TypeError, "errors must be a string");
143 return -1;
144 }
145
146 cb = internal_error_callback(PyString_AS_STRING(value));
147 if (cb == NULL)
148 return -1;
149
150 ERROR_DECREF(self->errors);
151 self->errors = cb;
152 return 0;
153}
154
155/* This getset handlers list is used by all the stateful codec objects */
156static PyGetSetDef codecctx_getsets[] = {
157 {"errors", (getter)codecctx_errors_get,
158 (setter)codecctx_errors_set,
159 PyDoc_STR("how to treat errors")},
160 {NULL,}
161};
162
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000163static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000164expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000165{
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000166 Py_ssize_t orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000167
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000168 orgpos = (Py_ssize_t)((char *)buf->outbuf -
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000169 PyString_AS_STRING(buf->outobj));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000170 orgsize = PyString_GET_SIZE(buf->outobj);
171 if (_PyString_Resize(&buf->outobj, orgsize + (
172 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
173 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000174
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000175 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
176 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
177 + PyString_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000178
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000179 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000180}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000181#define REQUIRE_ENCODEBUFFER(buf, s) { \
182 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
183 if (expand_encodebuffer(buf, s) == -1) \
184 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000185}
186
187static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000188expand_decodebuffer(MultibyteDecodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000189{
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000190 Py_ssize_t orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000191
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000192 orgpos = (Py_ssize_t)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000193 orgsize = PyUnicode_GET_SIZE(buf->outobj);
194 if (PyUnicode_Resize(&buf->outobj, orgsize + (
195 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
196 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000197
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000198 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
199 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
200 + PyUnicode_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000201
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000202 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000203}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000204#define REQUIRE_DECODEBUFFER(buf, s) { \
205 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
206 if (expand_decodebuffer(buf, s) == -1) \
207 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000208}
209
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000210
211/**
212 * MultibyteCodec object
213 */
214
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000215static int
216multibytecodec_encerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000217 MultibyteCodec_State *state,
218 MultibyteEncodeBuffer *buf,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000219 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000220{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000221 PyObject *retobj = NULL, *retstr = NULL, *tobj;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000222 Py_ssize_t retstrsize, newpos;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000223 Py_ssize_t esize, start, end;
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000224 const char *reason;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000225
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000226 if (e > 0) {
227 reason = "illegal multibyte sequence";
228 esize = e;
229 }
230 else {
231 switch (e) {
232 case MBERR_TOOSMALL:
233 REQUIRE_ENCODEBUFFER(buf, -1);
234 return 0; /* retry it */
235 case MBERR_TOOFEW:
236 reason = "incomplete multibyte sequence";
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000237 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000238 break;
239 case MBERR_INTERNAL:
240 PyErr_SetString(PyExc_RuntimeError,
241 "internal codec error");
242 return -1;
243 default:
244 PyErr_SetString(PyExc_RuntimeError,
245 "unknown runtime error");
246 return -1;
247 }
248 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000249
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000250 if (errors == ERROR_REPLACE) {
251 const Py_UNICODE replchar = '?', *inbuf = &replchar;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000252 Py_ssize_t r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000253
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000254 for (;;) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000255 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000256
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000257 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000258 r = codec->encode(state, codec->config, &inbuf, 1,
259 &buf->outbuf, outleft, 0);
260 if (r == MBERR_TOOSMALL) {
261 REQUIRE_ENCODEBUFFER(buf, -1);
262 continue;
263 }
264 else
265 break;
266 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000267
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000268 if (r != 0) {
269 REQUIRE_ENCODEBUFFER(buf, 1);
270 *buf->outbuf++ = '?';
271 }
272 }
273 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
274 buf->inbuf += esize;
275 return 0;
276 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000277
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000278 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000279 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000280
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000281 /* use cached exception object if available */
282 if (buf->excobj == NULL) {
283 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
284 buf->inbuf_top,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000285 buf->inbuf_end - buf->inbuf_top,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000286 start, end, reason);
287 if (buf->excobj == NULL)
288 goto errorexit;
289 }
290 else
291 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
292 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
293 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
294 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000295
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000296 if (errors == ERROR_STRICT) {
297 PyCodec_StrictErrors(buf->excobj);
298 goto errorexit;
299 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000300
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000301 retobj = call_error_callback(errors, buf->excobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000302 if (retobj == NULL)
303 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000304
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000305 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
306 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
307 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000308 PyErr_SetString(PyExc_TypeError,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000309 "encoding error handler must return "
310 "(unicode, int) tuple");
311 goto errorexit;
312 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000313
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000314 {
315 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000316
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000317 retstr = multibytecodec_encode(codec, state, &uraw,
318 PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
319 MBENC_FLUSH);
320 if (retstr == NULL)
321 goto errorexit;
322 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000323
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000324 retstrsize = PyString_GET_SIZE(retstr);
325 REQUIRE_ENCODEBUFFER(buf, retstrsize);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000326
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000327 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
328 buf->outbuf += retstrsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000329
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000330 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000331 if (newpos < 0)
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000332 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000333 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
334 PyErr_Format(PyExc_IndexError,
335 "position %d from error handler out of bounds",
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000336 (int)newpos);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000337 goto errorexit;
338 }
339 buf->inbuf = buf->inbuf_top + newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000340
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000341 Py_DECREF(retobj);
342 Py_DECREF(retstr);
343 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000344
345errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000346 Py_XDECREF(retobj);
347 Py_XDECREF(retstr);
348 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000349}
350
351static int
352multibytecodec_decerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000353 MultibyteCodec_State *state,
354 MultibyteDecodeBuffer *buf,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000355 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000356{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000357 PyObject *retobj = NULL, *retuni = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000358 Py_ssize_t retunisize, newpos;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000359 const char *reason;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000360 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000361
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362 if (e > 0) {
363 reason = "illegal multibyte sequence";
364 esize = e;
365 }
366 else {
367 switch (e) {
368 case MBERR_TOOSMALL:
369 REQUIRE_DECODEBUFFER(buf, -1);
370 return 0; /* retry it */
371 case MBERR_TOOFEW:
372 reason = "incomplete multibyte sequence";
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000373 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000374 break;
375 case MBERR_INTERNAL:
376 PyErr_SetString(PyExc_RuntimeError,
377 "internal codec error");
378 return -1;
379 default:
380 PyErr_SetString(PyExc_RuntimeError,
381 "unknown runtime error");
382 return -1;
383 }
384 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000385
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000386 if (errors == ERROR_REPLACE) {
387 REQUIRE_DECODEBUFFER(buf, 1);
388 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
389 }
390 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
391 buf->inbuf += esize;
392 return 0;
393 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000394
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000395 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000396 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000397
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000398 /* use cached exception object if available */
399 if (buf->excobj == NULL) {
400 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
Hye-Shik Changf5a149a2004-08-19 17:49:56 +0000401 (const char *)buf->inbuf_top,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000402 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000403 start, end, reason);
404 if (buf->excobj == NULL)
405 goto errorexit;
406 }
407 else
408 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
409 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
410 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
411 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000412
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000413 if (errors == ERROR_STRICT) {
414 PyCodec_StrictErrors(buf->excobj);
415 goto errorexit;
416 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000417
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000418 retobj = call_error_callback(errors, buf->excobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000419 if (retobj == NULL)
420 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000421
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000422 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
423 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
424 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000425 PyErr_SetString(PyExc_TypeError,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000426 "decoding error handler must return "
427 "(unicode, int) tuple");
428 goto errorexit;
429 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000430
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000431 retunisize = PyUnicode_GET_SIZE(retuni);
432 if (retunisize > 0) {
433 REQUIRE_DECODEBUFFER(buf, retunisize);
434 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
435 retunisize * Py_UNICODE_SIZE);
436 buf->outbuf += retunisize;
437 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000438
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000439 newpos = PyInt_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000440 if (newpos < 0)
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000441 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000442 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
443 PyErr_Format(PyExc_IndexError,
444 "position %d from error handler out of bounds",
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000445 (int)newpos);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000446 goto errorexit;
447 }
448 buf->inbuf = buf->inbuf_top + newpos;
449 Py_DECREF(retobj);
450 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000451
452errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000453 Py_XDECREF(retobj);
454 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000455}
456
457static PyObject *
458multibytecodec_encode(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000459 MultibyteCodec_State *state,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000460 const Py_UNICODE **data, Py_ssize_t datalen,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000461 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000462{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000463 MultibyteEncodeBuffer buf;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000464 Py_ssize_t finalsize, r = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000465
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000466 if (datalen == 0)
467 return PyString_FromString("");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000468
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000469 buf.excobj = NULL;
470 buf.inbuf = buf.inbuf_top = *data;
471 buf.inbuf_end = buf.inbuf_top + datalen;
472 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
473 if (buf.outobj == NULL)
474 goto errorexit;
475 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
476 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000477
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000478 while (buf.inbuf < buf.inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000479 Py_ssize_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000480
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000481 /* we don't reuse inleft and outleft here.
482 * error callbacks can relocate the cursor anywhere on buffer*/
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000483 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
484 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000485 r = codec->encode(state, codec->config, &buf.inbuf, inleft,
486 &buf.outbuf, outleft, flags);
487 *data = buf.inbuf;
488 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
489 break;
490 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
491 goto errorexit;
492 else if (r == MBERR_TOOFEW)
493 break;
494 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000495
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000496 if (codec->encreset != NULL)
497 for (;;) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000498 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000499
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000500 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000501 r = codec->encreset(state, codec->config, &buf.outbuf,
502 outleft);
503 if (r == 0)
504 break;
505 else if (multibytecodec_encerror(codec, state,
506 &buf, errors, r))
507 goto errorexit;
508 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000509
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000510 finalsize = (Py_ssize_t)((char *)buf.outbuf -
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000511 PyString_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000512
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000513 if (finalsize != PyString_GET_SIZE(buf.outobj))
514 if (_PyString_Resize(&buf.outobj, finalsize) == -1)
515 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000516
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000517 Py_XDECREF(buf.excobj);
518 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000519
520errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000521 Py_XDECREF(buf.excobj);
522 Py_XDECREF(buf.outobj);
523 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000524}
525
526static PyObject *
527MultibyteCodec_Encode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000528 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000529{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000530 MultibyteCodec_State state;
531 Py_UNICODE *data;
532 PyObject *errorcb, *r, *arg, *ucvt;
533 const char *errors = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000534 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000535
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000536 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
537 codeckwarglist, &arg, &errors))
538 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000539
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000540 if (PyUnicode_Check(arg))
541 ucvt = NULL;
542 else {
543 arg = ucvt = PyObject_Unicode(arg);
544 if (arg == NULL)
545 return NULL;
546 else if (!PyUnicode_Check(arg)) {
547 PyErr_SetString(PyExc_TypeError,
548 "couldn't convert the object to unicode.");
549 Py_DECREF(ucvt);
550 return NULL;
551 }
552 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000553
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000554 data = PyUnicode_AS_UNICODE(arg);
555 datalen = PyUnicode_GET_SIZE(arg);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000556
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000557 errorcb = internal_error_callback(errors);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000558 if (errorcb == NULL) {
559 Py_XDECREF(ucvt);
560 return NULL;
561 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000562
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000563 if (self->codec->encinit != NULL &&
564 self->codec->encinit(&state, self->codec->config) != 0)
565 goto errorexit;
566 r = multibytecodec_encode(self->codec, &state,
567 (const Py_UNICODE **)&data, datalen, errorcb,
568 MBENC_FLUSH | MBENC_RESET);
569 if (r == NULL)
570 goto errorexit;
571
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000572 ERROR_DECREF(errorcb);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000573 Py_XDECREF(ucvt);
574 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000575
576errorexit:
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000577 ERROR_DECREF(errorcb);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000578 Py_XDECREF(ucvt);
579 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000580}
581
582static PyObject *
583MultibyteCodec_Decode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000584 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000585{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000586 MultibyteCodec_State state;
587 MultibyteDecodeBuffer buf;
588 PyObject *errorcb;
589 const char *data, *errors = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000590 Py_ssize_t datalen, finalsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000591
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000592 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
593 codeckwarglist, &data, &datalen, &errors))
594 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000595
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000596 errorcb = internal_error_callback(errors);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000597 if (errorcb == NULL)
598 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000599
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000600 if (datalen == 0) {
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000601 ERROR_DECREF(errorcb);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000602 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
603 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000604
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000605 buf.excobj = NULL;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000606 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
607 buf.inbuf_end = buf.inbuf_top + datalen;
608 buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
609 if (buf.outobj == NULL)
610 goto errorexit;
611 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
612 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000613
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000614 if (self->codec->decinit != NULL &&
615 self->codec->decinit(&state, self->codec->config) != 0)
616 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000617
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000618 while (buf.inbuf < buf.inbuf_end) {
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000619 Py_ssize_t inleft, outleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000620
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000621 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
622 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000623
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000624 r = self->codec->decode(&state, self->codec->config,
625 &buf.inbuf, inleft, &buf.outbuf, outleft);
626 if (r == 0)
627 break;
628 else if (multibytecodec_decerror(self->codec, &state,
629 &buf, errorcb, r))
630 goto errorexit;
631 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000633 finalsize = (Py_ssize_t)(buf.outbuf -
634 PyUnicode_AS_UNICODE(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000635
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000636 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
637 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
638 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000639
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000640 Py_XDECREF(buf.excobj);
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000641 ERROR_DECREF(errorcb);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000642 return make_tuple(buf.outobj, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000643
644errorexit:
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000645 ERROR_DECREF(errorcb);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000646 Py_XDECREF(buf.excobj);
647 Py_XDECREF(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000648
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000649 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000650}
651
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000652static struct PyMethodDef multibytecodec_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000653 {"encode", (PyCFunction)MultibyteCodec_Encode,
654 METH_VARARGS | METH_KEYWORDS,
655 MultibyteCodec_Encode__doc__},
656 {"decode", (PyCFunction)MultibyteCodec_Decode,
657 METH_VARARGS | METH_KEYWORDS,
658 MultibyteCodec_Decode__doc__},
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000659 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000660};
661
662static void
663multibytecodec_dealloc(MultibyteCodecObject *self)
664{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000665 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000666}
667
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000668static PyTypeObject MultibyteCodec_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000669 PyObject_HEAD_INIT(NULL)
670 0, /* ob_size */
671 "MultibyteCodec", /* tp_name */
672 sizeof(MultibyteCodecObject), /* tp_basicsize */
673 0, /* tp_itemsize */
674 /* methods */
675 (destructor)multibytecodec_dealloc, /* tp_dealloc */
676 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000677 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000678 0, /* tp_setattr */
679 0, /* tp_compare */
680 0, /* tp_repr */
681 0, /* tp_as_number */
682 0, /* tp_as_sequence */
683 0, /* tp_as_mapping */
684 0, /* tp_hash */
685 0, /* tp_call */
686 0, /* tp_str */
687 PyObject_GenericGetAttr, /* tp_getattro */
688 0, /* tp_setattro */
689 0, /* tp_as_buffer */
690 Py_TPFLAGS_DEFAULT, /* tp_flags */
691 0, /* tp_doc */
692 0, /* tp_traverse */
693 0, /* tp_clear */
694 0, /* tp_richcompare */
695 0, /* tp_weaklistoffset */
696 0, /* tp_iter */
697 0, /* tp_iterext */
698 multibytecodec_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000699};
700
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +0000701
702/**
703 * Utility functions for stateful codec mechanism
704 */
705
706#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
707#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
708
709static PyObject *
710encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
711 PyObject *unistr, int final)
712{
713 PyObject *ucvt, *r = NULL;
714 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
715 Py_ssize_t datalen, origpending;
716
717 if (PyUnicode_Check(unistr))
718 ucvt = NULL;
719 else {
720 unistr = ucvt = PyObject_Unicode(unistr);
721 if (unistr == NULL)
722 return NULL;
723 else if (!PyUnicode_Check(unistr)) {
724 PyErr_SetString(PyExc_TypeError,
725 "couldn't convert the object to unicode.");
726 Py_DECREF(ucvt);
727 return NULL;
728 }
729 }
730
731 datalen = PyUnicode_GET_SIZE(unistr);
732 origpending = ctx->pendingsize;
733
734 if (ctx->pendingsize > 0) {
735 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + ctx->pendingsize);
736 if (inbuf_tmp == NULL)
737 goto errorexit;
738 memcpy(inbuf_tmp, ctx->pending,
739 Py_UNICODE_SIZE * ctx->pendingsize);
740 memcpy(inbuf_tmp + ctx->pendingsize,
741 PyUnicode_AS_UNICODE(unistr),
742 Py_UNICODE_SIZE * datalen);
743 datalen += ctx->pendingsize;
744 ctx->pendingsize = 0;
745 inbuf = inbuf_tmp;
746 }
747 else
748 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
749
750 inbuf_end = inbuf + datalen;
751
752 r = multibytecodec_encode(ctx->codec, &ctx->state,
753 (const Py_UNICODE **)&inbuf,
754 datalen, ctx->errors, final ? MBENC_FLUSH : 0);
755 if (r == NULL) {
756 /* recover the original pending buffer */
757 memcpy(ctx->pending, inbuf_tmp, Py_UNICODE_SIZE * origpending);
758 ctx->pendingsize = origpending;
759 goto errorexit;
760 }
761
762 if (inbuf < inbuf_end) {
763 ctx->pendingsize = (Py_ssize_t)(inbuf_end - inbuf);
764 if (ctx->pendingsize > MAXENCPENDING) {
765 /* normal codecs can't reach here */
766 ctx->pendingsize = 0;
767 PyErr_SetString(PyExc_UnicodeError,
768 "pending buffer overflow");
769 goto errorexit;
770 }
771 memcpy(ctx->pending, inbuf,
772 ctx->pendingsize * Py_UNICODE_SIZE);
773 }
774
775 if (inbuf_tmp != NULL)
776 PyMem_Del(inbuf_tmp);
777 Py_XDECREF(ucvt);
778 return r;
779
780errorexit:
781 if (inbuf_tmp != NULL)
782 PyMem_Del(inbuf_tmp);
783 Py_XDECREF(r);
784 Py_XDECREF(ucvt);
785 return NULL;
786}
787
788static int
789decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
790 MultibyteDecodeBuffer *buf)
791{
792 Py_ssize_t npendings;
793
794 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
795 if (npendings + ctx->pendingsize > MAXDECPENDING) {
796 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
797 return -1;
798 }
799 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
800 ctx->pendingsize += npendings;
801 return 0;
802}
803
804static int
805decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
806 Py_ssize_t size)
807{
808 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
809 buf->inbuf_end = buf->inbuf_top + size;
810 if (buf->outobj == NULL) { /* only if outobj is not allocated yet */
811 buf->outobj = PyUnicode_FromUnicode(NULL, size);
812 if (buf->outobj == NULL)
813 return -1;
814 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj);
815 buf->outbuf_end = buf->outbuf +
816 PyUnicode_GET_SIZE(buf->outobj);
817 }
818
819 return 0;
820}
821
822static int
823decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
824 MultibyteDecodeBuffer *buf)
825{
826 while (buf->inbuf < buf->inbuf_end) {
827 Py_ssize_t inleft, outleft;
828 int r;
829
830 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
831 outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
832
833 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
834 &buf->inbuf, inleft, &buf->outbuf, outleft);
835 if (r == 0 || r == MBERR_TOOFEW)
836 break;
837 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
838 buf, ctx->errors, r))
839 return -1;
840 }
841 return 0;
842}
843
844
845/**
846 * MultibyteIncrementalEncoder object
847 */
848
849static PyObject *
850mbiencoder_encode(MultibyteIncrementalEncoderObject *self,
851 PyObject *args, PyObject *kwargs)
852{
853 PyObject *data;
854 int final = 0;
855
856 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:encode",
857 incrementalkwarglist, &data, &final))
858 return NULL;
859
860 return encoder_encode_stateful(STATEFUL_ECTX(self), data, final);
861}
862
863static PyObject *
864mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
865{
866 if (self->codec->decreset != NULL &&
867 self->codec->decreset(&self->state, self->codec->config) != 0)
868 return NULL;
869 self->pendingsize = 0;
870
871 Py_RETURN_NONE;
872}
873
874static struct PyMethodDef mbiencoder_methods[] = {
875 {"encode", (PyCFunction)mbiencoder_encode,
876 METH_VARARGS | METH_KEYWORDS, NULL},
877 {"reset", (PyCFunction)mbiencoder_reset,
878 METH_NOARGS, NULL},
879 {NULL, NULL},
880};
881
882static PyObject *
883mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
884{
885 MultibyteIncrementalEncoderObject *self;
886 PyObject *codec;
887 char *errors = NULL;
888
889 codec = PyObject_GetAttrString((PyObject *)type, "codec");
890 if (codec == NULL)
891 return NULL;
892 if (!MultibyteCodec_Check(codec)) {
893 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
894 return NULL;
895 }
896
897 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
898 incnewkwarglist, &errors))
899 return NULL;
900
901 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
902 if (self == NULL)
903 return NULL;
904
905 self->codec = ((MultibyteCodecObject *)codec)->codec;
906 self->pendingsize = 0;
907 self->errors = internal_error_callback(errors);
908 if (self->errors == NULL)
909 goto errorexit;
910 if (self->codec->encinit != NULL &&
911 self->codec->encinit(&self->state, self->codec->config) != 0)
912 goto errorexit;
913
914 return (PyObject *)self;
915
916errorexit:
917 Py_XDECREF(self);
918 return NULL;
919}
920
921static int
922mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
923 visitproc visit, void *arg)
924{
925 if (ERROR_ISCUSTOM(self->errors))
926 Py_VISIT(self->errors);
927 return 0;
928}
929
930static void
931mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
932{
933 PyObject_GC_UnTrack(self);
934 ERROR_DECREF(self->errors);
935 self->ob_type->tp_free(self);
936}
937
938static PyTypeObject MultibyteIncrementalEncoder_Type = {
939 PyObject_HEAD_INIT(NULL)
940 0, /* ob_size */
941 "MultibyteIncrementalEncoder", /* tp_name */
942 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
943 0, /* tp_itemsize */
944 /* methods */
945 (destructor)mbiencoder_dealloc, /* tp_dealloc */
946 0, /* tp_print */
947 0, /* tp_getattr */
948 0, /* tp_setattr */
949 0, /* tp_compare */
950 0, /* tp_repr */
951 0, /* tp_as_number */
952 0, /* tp_as_sequence */
953 0, /* tp_as_mapping */
954 0, /* tp_hash */
955 0, /* tp_call */
956 0, /* tp_str */
957 PyObject_GenericGetAttr, /* tp_getattro */
958 0, /* tp_setattro */
959 0, /* tp_as_buffer */
960 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
961 | Py_TPFLAGS_BASETYPE, /* tp_flags */
962 0, /* tp_doc */
963 (traverseproc)mbiencoder_traverse, /* tp_traverse */
964 0, /* tp_clear */
965 0, /* tp_richcompare */
966 0, /* tp_weaklistoffset */
967 0, /* tp_iter */
968 0, /* tp_iterext */
969 mbiencoder_methods, /* tp_methods */
970 0, /* tp_members */
971 codecctx_getsets, /* tp_getset */
972 0, /* tp_base */
973 0, /* tp_dict */
974 0, /* tp_descr_get */
975 0, /* tp_descr_set */
976 0, /* tp_dictoffset */
977 0, /* tp_init */
978 0, /* tp_alloc */
979 mbiencoder_new, /* tp_new */
980};
981
982
983/**
984 * MultibyteIncrementalDecoder object
985 */
986
987static PyObject *
988mbidecoder_decode(MultibyteIncrementalDecoderObject *self,
989 PyObject *args, PyObject *kwargs)
990{
991 MultibyteDecodeBuffer buf;
992 char *data, *wdata;
993 Py_ssize_t wsize, finalsize = 0, size, origpending;
994 int final = 0;
995
996 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "t#|i:decode",
997 incrementalkwarglist, &data, &size, &final))
998 return NULL;
999
1000 buf.outobj = buf.excobj = NULL;
1001 origpending = self->pendingsize;
1002
1003 if (self->pendingsize == 0) {
1004 wsize = size;
1005 wdata = data;
1006 }
1007 else {
1008 wsize = size + self->pendingsize;
1009 wdata = PyMem_Malloc(wsize);
1010 if (wdata == NULL)
1011 goto errorexit;
1012 memcpy(wdata, self->pending, self->pendingsize);
1013 memcpy(wdata + self->pendingsize, data, size);
1014 self->pendingsize = 0;
1015 }
1016
1017 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1018 goto errorexit;
1019
1020 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1021 goto errorexit;
1022
1023 if (final && buf.inbuf < buf.inbuf_end) {
1024 if (multibytecodec_decerror(self->codec, &self->state,
1025 &buf, self->errors, MBERR_TOOFEW)) {
1026 /* recover the original pending buffer */
1027 memcpy(self->pending, wdata, origpending);
1028 self->pendingsize = origpending;
1029 goto errorexit;
1030 }
1031 }
1032
1033 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1034 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1035 goto errorexit;
1036 }
1037
1038 finalsize = (Py_ssize_t)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
1039 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
1040 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
1041 goto errorexit;
1042
1043 if (wdata != data)
1044 PyMem_Del(wdata);
1045 Py_XDECREF(buf.excobj);
1046 return buf.outobj;
1047
1048errorexit:
1049 if (wdata != NULL && wdata != data)
1050 PyMem_Del(wdata);
1051 Py_XDECREF(buf.excobj);
1052 Py_XDECREF(buf.outobj);
1053 return NULL;
1054}
1055
1056static PyObject *
1057mbidecoder_reset(MultibyteIncrementalDecoderObject *self)
1058{
1059 if (self->codec->decreset != NULL &&
1060 self->codec->decreset(&self->state, self->codec->config) != 0)
1061 return NULL;
1062 self->pendingsize = 0;
1063
1064 Py_RETURN_NONE;
1065}
1066
1067static struct PyMethodDef mbidecoder_methods[] = {
1068 {"decode", (PyCFunction)mbidecoder_decode,
1069 METH_VARARGS | METH_KEYWORDS, NULL},
1070 {"reset", (PyCFunction)mbidecoder_reset,
1071 METH_NOARGS, NULL},
1072 {NULL, NULL},
1073};
1074
1075static PyObject *
1076mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1077{
1078 MultibyteIncrementalDecoderObject *self;
1079 PyObject *codec;
1080 char *errors = NULL;
1081
1082 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1083 if (codec == NULL)
1084 return NULL;
1085 if (!MultibyteCodec_Check(codec)) {
1086 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1087 return NULL;
1088 }
1089
1090 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1091 incnewkwarglist, &errors))
1092 return NULL;
1093
1094 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1095 if (self == NULL)
1096 return NULL;
1097
1098 self->codec = ((MultibyteCodecObject *)codec)->codec;
1099 self->pendingsize = 0;
1100 self->errors = internal_error_callback(errors);
1101 if (self->errors == NULL)
1102 goto errorexit;
1103 if (self->codec->decinit != NULL &&
1104 self->codec->decinit(&self->state, self->codec->config) != 0)
1105 goto errorexit;
1106
1107 return (PyObject *)self;
1108
1109errorexit:
1110 Py_XDECREF(self);
1111 return NULL;
1112}
1113
1114static int
1115mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1116 visitproc visit, void *arg)
1117{
1118 if (ERROR_ISCUSTOM(self->errors))
1119 Py_VISIT(self->errors);
1120 return 0;
1121}
1122
1123static void
1124mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1125{
1126 PyObject_GC_UnTrack(self);
1127 ERROR_DECREF(self->errors);
1128 self->ob_type->tp_free(self);
1129}
1130
1131static PyTypeObject MultibyteIncrementalDecoder_Type = {
1132 PyObject_HEAD_INIT(NULL)
1133 0, /* ob_size */
1134 "MultibyteIncrementalDecoder", /* tp_name */
1135 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1136 0, /* tp_itemsize */
1137 /* methods */
1138 (destructor)mbidecoder_dealloc, /* tp_dealloc */
1139 0, /* tp_print */
1140 0, /* tp_getattr */
1141 0, /* tp_setattr */
1142 0, /* tp_compare */
1143 0, /* tp_repr */
1144 0, /* tp_as_number */
1145 0, /* tp_as_sequence */
1146 0, /* tp_as_mapping */
1147 0, /* tp_hash */
1148 0, /* tp_call */
1149 0, /* tp_str */
1150 PyObject_GenericGetAttr, /* tp_getattro */
1151 0, /* tp_setattro */
1152 0, /* tp_as_buffer */
1153 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1154 | Py_TPFLAGS_BASETYPE, /* tp_flags */
1155 0, /* tp_doc */
1156 (traverseproc)mbidecoder_traverse, /* tp_traverse */
1157 0, /* tp_clear */
1158 0, /* tp_richcompare */
1159 0, /* tp_weaklistoffset */
1160 0, /* tp_iter */
1161 0, /* tp_iterext */
1162 mbidecoder_methods, /* tp_methods */
1163 0, /* tp_members */
1164 codecctx_getsets, /* tp_getset */
1165 0, /* tp_base */
1166 0, /* tp_dict */
1167 0, /* tp_descr_get */
1168 0, /* tp_descr_set */
1169 0, /* tp_dictoffset */
1170 0, /* tp_init */
1171 0, /* tp_alloc */
1172 mbidecoder_new, /* tp_new */
1173};
1174
1175
1176/**
1177 * MultibyteStreamReader object
1178 */
1179
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001180static PyObject *
1181mbstreamreader_iread(MultibyteStreamReaderObject *self,
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001182 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001183{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001184 MultibyteDecodeBuffer buf;
1185 PyObject *cres;
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001186 Py_ssize_t rsize, finalsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001187
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001188 if (sizehint == 0)
1189 return PyUnicode_FromUnicode(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001190
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001191 buf.outobj = buf.excobj = NULL;
1192 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001193
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001194 for (;;) {
1195 if (sizehint < 0)
1196 cres = PyObject_CallMethod(self->stream,
1197 (char *)method, NULL);
1198 else
1199 cres = PyObject_CallMethod(self->stream,
1200 (char *)method, "i", sizehint);
1201 if (cres == NULL)
1202 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001203
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001204 if (!PyString_Check(cres)) {
1205 PyErr_SetString(PyExc_TypeError,
1206 "stream function returned a "
1207 "non-string object");
1208 goto errorexit;
1209 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001210
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001211 if (self->pendingsize > 0) {
1212 PyObject *ctr;
1213 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001214
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001215 rsize = PyString_GET_SIZE(cres) + self->pendingsize;
1216 ctr = PyString_FromStringAndSize(NULL, rsize);
1217 if (ctr == NULL)
1218 goto errorexit;
1219 ctrdata = PyString_AS_STRING(ctr);
1220 memcpy(ctrdata, self->pending, self->pendingsize);
1221 memcpy(ctrdata + self->pendingsize,
1222 PyString_AS_STRING(cres),
1223 PyString_GET_SIZE(cres));
1224 Py_DECREF(cres);
1225 cres = ctr;
1226 self->pendingsize = 0;
1227 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001228
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001229 rsize = PyString_GET_SIZE(cres);
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001230 if (decoder_prepare_buffer(&buf, PyString_AS_STRING(cres),
1231 rsize) != 0)
1232 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001233
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001234 if (rsize > 0 && decoder_feed_buffer(
1235 (MultibyteStatefulDecoderContext *)self, &buf))
1236 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001237
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001238 if (rsize == 0 || sizehint < 0) { /* end of file */
1239 if (buf.inbuf < buf.inbuf_end &&
1240 multibytecodec_decerror(self->codec, &self->state,
1241 &buf, self->errors, MBERR_TOOFEW))
1242 goto errorexit;
1243 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001244
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001245 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001246 if (decoder_append_pending(STATEFUL_DCTX(self),
1247 &buf) != 0)
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001248 goto errorexit;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001249 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001250
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001251 finalsize = (Py_ssize_t)(buf.outbuf -
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001252 PyUnicode_AS_UNICODE(buf.outobj));
1253 Py_DECREF(cres);
1254 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001255
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001256 if (sizehint < 0 || finalsize != 0 || rsize == 0)
1257 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001258
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001259 sizehint = 1; /* read 1 more byte and retry */
1260 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001261
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001262 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
1263 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
1264 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001265
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001266 Py_XDECREF(cres);
1267 Py_XDECREF(buf.excobj);
1268 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001269
1270errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001271 Py_XDECREF(cres);
1272 Py_XDECREF(buf.excobj);
1273 Py_XDECREF(buf.outobj);
1274 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001275}
1276
1277static PyObject *
1278mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
1279{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001280 PyObject *sizeobj = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001281 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001282
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001283 if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
1284 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001285
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001286 if (sizeobj == Py_None || sizeobj == NULL)
1287 size = -1;
1288 else if (PyInt_Check(sizeobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001289 size = PyInt_AsSsize_t(sizeobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001290 else {
1291 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1292 return NULL;
1293 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001294
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001295 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001296}
1297
1298static PyObject *
1299mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
1300{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001301 PyObject *sizeobj = NULL;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001302 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001303
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001304 if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
1305 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001306
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001307 if (sizeobj == Py_None || sizeobj == NULL)
1308 size = -1;
1309 else if (PyInt_Check(sizeobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001310 size = PyInt_AsSsize_t(sizeobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001311 else {
1312 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1313 return NULL;
1314 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001315
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001316 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001317}
1318
1319static PyObject *
1320mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
1321{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001322 PyObject *sizehintobj = NULL, *r, *sr;
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001323 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001324
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001325 if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
1326 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001327
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001328 if (sizehintobj == Py_None || sizehintobj == NULL)
1329 sizehint = -1;
1330 else if (PyInt_Check(sizehintobj))
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001331 sizehint = PyInt_AsSsize_t(sizehintobj);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001332 else {
1333 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1334 return NULL;
1335 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001336
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001337 r = mbstreamreader_iread(self, "read", sizehint);
1338 if (r == NULL)
1339 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001340
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001341 sr = PyUnicode_Splitlines(r, 1);
1342 Py_DECREF(r);
1343 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001344}
1345
1346static PyObject *
1347mbstreamreader_reset(MultibyteStreamReaderObject *self)
1348{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001349 if (self->codec->decreset != NULL &&
1350 self->codec->decreset(&self->state, self->codec->config) != 0)
1351 return NULL;
1352 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001353
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001354 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001355}
1356
1357static struct PyMethodDef mbstreamreader_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001358 {"read", (PyCFunction)mbstreamreader_read,
1359 METH_VARARGS, NULL},
1360 {"readline", (PyCFunction)mbstreamreader_readline,
1361 METH_VARARGS, NULL},
1362 {"readlines", (PyCFunction)mbstreamreader_readlines,
1363 METH_VARARGS, NULL},
1364 {"reset", (PyCFunction)mbstreamreader_reset,
1365 METH_NOARGS, NULL},
1366 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001367};
1368
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001369static PyMemberDef mbstreamreader_members[] = {
1370 {"stream", T_OBJECT,
1371 offsetof(MultibyteStreamReaderObject, stream),
1372 READONLY, NULL},
1373 {NULL,}
1374};
1375
1376static PyObject *
1377mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1378{
1379 MultibyteStreamReaderObject *self;
1380 PyObject *codec, *stream;
1381 char *errors = NULL;
1382
1383 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1384 if (codec == NULL)
1385 return NULL;
1386 if (!MultibyteCodec_Check(codec)) {
1387 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1388 return NULL;
1389 }
1390
1391 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1392 streamkwarglist, &stream, &errors))
1393 return NULL;
1394
1395 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1396 if (self == NULL)
1397 return NULL;
1398
1399 self->codec = ((MultibyteCodecObject *)codec)->codec;
1400 self->stream = stream;
1401 Py_INCREF(stream);
1402 self->pendingsize = 0;
1403 self->errors = internal_error_callback(errors);
1404 if (self->errors == NULL)
1405 goto errorexit;
1406 if (self->codec->decinit != NULL &&
1407 self->codec->decinit(&self->state, self->codec->config) != 0)
1408 goto errorexit;
1409
1410 return (PyObject *)self;
1411
1412errorexit:
1413 Py_XDECREF(self);
1414 return NULL;
1415}
1416
1417static int
1418mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1419 visitproc visit, void *arg)
1420{
1421 if (ERROR_ISCUSTOM(self->errors))
1422 Py_VISIT(self->errors);
1423 Py_VISIT(self->stream);
1424 return 0;
1425}
1426
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001427static void
1428mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1429{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001430 PyObject_GC_UnTrack(self);
1431 ERROR_DECREF(self->errors);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001432 Py_DECREF(self->stream);
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001433 self->ob_type->tp_free(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001434}
1435
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001436static PyTypeObject MultibyteStreamReader_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001437 PyObject_HEAD_INIT(NULL)
1438 0, /* ob_size */
1439 "MultibyteStreamReader", /* tp_name */
1440 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1441 0, /* tp_itemsize */
1442 /* methods */
1443 (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1444 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001445 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001446 0, /* tp_setattr */
1447 0, /* tp_compare */
1448 0, /* tp_repr */
1449 0, /* tp_as_number */
1450 0, /* tp_as_sequence */
1451 0, /* tp_as_mapping */
1452 0, /* tp_hash */
1453 0, /* tp_call */
1454 0, /* tp_str */
1455 PyObject_GenericGetAttr, /* tp_getattro */
1456 0, /* tp_setattro */
1457 0, /* tp_as_buffer */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001458 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1459 | Py_TPFLAGS_BASETYPE, /* tp_flags */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001460 0, /* tp_doc */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001461 (traverseproc)mbstreamreader_traverse, /* tp_traverse */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001462 0, /* tp_clear */
1463 0, /* tp_richcompare */
1464 0, /* tp_weaklistoffset */
1465 0, /* tp_iter */
1466 0, /* tp_iterext */
1467 mbstreamreader_methods, /* tp_methods */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001468 mbstreamreader_members, /* tp_members */
1469 codecctx_getsets, /* tp_getset */
1470 0, /* tp_base */
1471 0, /* tp_dict */
1472 0, /* tp_descr_get */
1473 0, /* tp_descr_set */
1474 0, /* tp_dictoffset */
1475 0, /* tp_init */
1476 0, /* tp_alloc */
1477 mbstreamreader_new, /* tp_new */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001478};
1479
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001480
1481/**
1482 * MultibyteStreamWriter object
1483 */
1484
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001485static int
1486mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001487 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001488{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001489 PyObject *str, *wr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001490
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001491 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1492 if (str == NULL)
1493 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001494
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001495 wr = PyObject_CallMethod(self->stream, "write", "O", str);
1496 Py_DECREF(str);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001497 if (wr == NULL)
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001498 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001499
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001500 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001501}
1502
1503static PyObject *
1504mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args)
1505{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001506 PyObject *strobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001507
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001508 if (!PyArg_ParseTuple(args, "O:write", &strobj))
1509 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001510
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001511 if (mbstreamwriter_iwrite(self, strobj))
1512 return NULL;
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001513 else
1514 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001515}
1516
1517static PyObject *
1518mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args)
1519{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001520 PyObject *lines, *strobj;
1521 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001522
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001523 if (!PyArg_ParseTuple(args, "O:writelines", &lines))
1524 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001525
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001526 if (!PySequence_Check(lines)) {
1527 PyErr_SetString(PyExc_TypeError,
1528 "arg must be a sequence object");
1529 return NULL;
1530 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001531
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001532 for (i = 0; i < PySequence_Length(lines); i++) {
1533 /* length can be changed even within this loop */
1534 strobj = PySequence_GetItem(lines, i);
1535 if (strobj == NULL)
1536 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001537
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001538 r = mbstreamwriter_iwrite(self, strobj);
1539 Py_DECREF(strobj);
1540 if (r == -1)
1541 return NULL;
1542 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001543
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001544 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001545}
1546
1547static PyObject *
1548mbstreamwriter_reset(MultibyteStreamWriterObject *self)
1549{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001550 const Py_UNICODE *pending;
1551 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001552
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001553 pending = self->pending;
1554 pwrt = multibytecodec_encode(self->codec, &self->state,
1555 &pending, self->pendingsize, self->errors,
1556 MBENC_FLUSH | MBENC_RESET);
1557 /* some pending buffer can be truncated when UnicodeEncodeError is
1558 * raised on 'strict' mode. but, 'reset' method is designed to
1559 * reset the pending buffer or states so failed string sequence
1560 * ought to be missed */
1561 self->pendingsize = 0;
1562 if (pwrt == NULL)
1563 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001564
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001565 if (PyString_Size(pwrt) > 0) {
1566 PyObject *wr;
1567 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
1568 if (wr == NULL) {
1569 Py_DECREF(pwrt);
1570 return NULL;
1571 }
1572 }
1573 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001574
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001575 Py_RETURN_NONE;
1576}
1577
1578static PyObject *
1579mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1580{
1581 MultibyteStreamWriterObject *self;
1582 PyObject *codec, *stream;
1583 char *errors = NULL;
1584
1585 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1586 if (codec == NULL)
1587 return NULL;
1588 if (!MultibyteCodec_Check(codec)) {
1589 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1590 return NULL;
1591 }
1592
1593 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1594 streamkwarglist, &stream, &errors))
1595 return NULL;
1596
1597 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1598 if (self == NULL)
1599 return NULL;
1600
1601 self->codec = ((MultibyteCodecObject *)codec)->codec;
1602 self->stream = stream;
1603 Py_INCREF(stream);
1604 self->pendingsize = 0;
1605 self->errors = internal_error_callback(errors);
1606 if (self->errors == NULL)
1607 goto errorexit;
1608 if (self->codec->encinit != NULL &&
1609 self->codec->encinit(&self->state, self->codec->config) != 0)
1610 goto errorexit;
1611
1612 return (PyObject *)self;
1613
1614errorexit:
1615 Py_XDECREF(self);
1616 return NULL;
1617}
1618
1619static int
1620mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1621 visitproc visit, void *arg)
1622{
1623 if (ERROR_ISCUSTOM(self->errors))
1624 Py_VISIT(self->errors);
1625 Py_VISIT(self->stream);
1626 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001627}
1628
1629static void
1630mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1631{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001632 PyObject_GC_UnTrack(self);
1633 ERROR_DECREF(self->errors);
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001634 Py_DECREF(self->stream);
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001635 self->ob_type->tp_free(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001636}
1637
1638static struct PyMethodDef mbstreamwriter_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001639 {"write", (PyCFunction)mbstreamwriter_write,
1640 METH_VARARGS, NULL},
1641 {"writelines", (PyCFunction)mbstreamwriter_writelines,
1642 METH_VARARGS, NULL},
1643 {"reset", (PyCFunction)mbstreamwriter_reset,
1644 METH_NOARGS, NULL},
1645 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001646};
1647
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001648static PyMemberDef mbstreamwriter_members[] = {
1649 {"stream", T_OBJECT,
1650 offsetof(MultibyteStreamWriterObject, stream),
1651 READONLY, NULL},
1652 {NULL,}
1653};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001654
1655static PyTypeObject MultibyteStreamWriter_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001656 PyObject_HEAD_INIT(NULL)
1657 0, /* ob_size */
1658 "MultibyteStreamWriter", /* tp_name */
1659 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1660 0, /* tp_itemsize */
1661 /* methods */
1662 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1663 0, /* tp_print */
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001664 0, /* tp_getattr */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001665 0, /* tp_setattr */
1666 0, /* tp_compare */
1667 0, /* tp_repr */
1668 0, /* tp_as_number */
1669 0, /* tp_as_sequence */
1670 0, /* tp_as_mapping */
1671 0, /* tp_hash */
1672 0, /* tp_call */
1673 0, /* tp_str */
1674 PyObject_GenericGetAttr, /* tp_getattro */
1675 0, /* tp_setattro */
1676 0, /* tp_as_buffer */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001677 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1678 | Py_TPFLAGS_BASETYPE, /* tp_flags */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001679 0, /* tp_doc */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001680 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001681 0, /* tp_clear */
1682 0, /* tp_richcompare */
1683 0, /* tp_weaklistoffset */
1684 0, /* tp_iter */
1685 0, /* tp_iterext */
1686 mbstreamwriter_methods, /* tp_methods */
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001687 mbstreamwriter_members, /* tp_members */
1688 codecctx_getsets, /* tp_getset */
1689 0, /* tp_base */
1690 0, /* tp_dict */
1691 0, /* tp_descr_get */
1692 0, /* tp_descr_set */
1693 0, /* tp_dictoffset */
1694 0, /* tp_init */
1695 0, /* tp_alloc */
1696 mbstreamwriter_new, /* tp_new */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001697};
1698
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001699
1700/**
1701 * Exposed factory function
1702 */
1703
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001704static PyObject *
1705__create_codec(PyObject *ignore, PyObject *arg)
1706{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001707 MultibyteCodecObject *self;
1708 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001709
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001710 if (!PyCObject_Check(arg)) {
1711 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1712 return NULL;
1713 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001714
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001715 codec = PyCObject_AsVoidPtr(arg);
1716 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1717 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001718
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001719 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1720 if (self == NULL)
1721 return NULL;
1722 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001723
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001724 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001725}
1726
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001727static struct PyMethodDef __methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001728 {"__create_codec", (PyCFunction)__create_codec, METH_O},
1729 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001730};
1731
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001732PyMODINIT_FUNC
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001733init_multibytecodec(void)
1734{
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001735 int i;
1736 PyObject *m;
1737 PyTypeObject *typelist[] = {
1738 &MultibyteIncrementalEncoder_Type,
1739 &MultibyteIncrementalDecoder_Type,
1740 &MultibyteStreamReader_Type,
1741 &MultibyteStreamWriter_Type,
1742 NULL
1743 };
1744
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001745 if (PyType_Ready(&MultibyteCodec_Type) < 0)
1746 return;
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001747
1748 m = Py_InitModule("_multibytecodec", __methods);
1749 if (m == NULL)
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00001750 return;
Neal Norwitz058bde12005-09-21 06:44:25 +00001751
Hye-Shik Change2ac4ab2006-03-26 02:34:59 +00001752 for (i = 0; typelist[i] != NULL; i++) {
1753 if (PyType_Ready(typelist[i]) < 0)
1754 return;
1755 Py_INCREF(typelist[i]);
1756 PyModule_AddObject(m, typelist[i]->tp_name,
1757 (PyObject *)typelist[i]);
1758 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001759
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001760 if (PyErr_Occurred())
1761 Py_FatalError("can't initialize the _multibytecodec module");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001762}