blob: fe5b36256f422a4132cd9aa51643d7448eea0330 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00009#include "structmember.h"
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010#include "multibytecodec.h"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040011#include "clinic/multibytecodec.c.h"
12
13/*[clinic input]
14output preset file
15module _multibytecodec
16[clinic start generated code]*/
17/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e0cf1b7f3c472d17]*/
18
19/*[clinic input]
20class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "&MultibyteCodec_Type"
21[clinic start generated code]*/
22/*[clinic end generated code: output=da39a3ee5e6b4b0d input=d5b1fc1fec8eb003]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000023
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000024typedef struct {
Victor Stinnerd9491262013-04-14 02:06:32 +020025 PyObject *inobj;
26 Py_ssize_t inpos, inlen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000027 unsigned char *outbuf, *outbuf_end;
28 PyObject *excobj, *outobj;
29} MultibyteEncodeBuffer;
30
31typedef struct {
32 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
Victor Stinnera0dd0212013-04-11 22:09:04 +020033 PyObject *excobj;
34 _PyUnicodeWriter writer;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000035} MultibyteDecodeBuffer;
36
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000037static char *incnewkwarglist[] = {"errors", NULL};
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000038static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000039
40static PyObject *multibytecodec_encode(MultibyteCodec *,
Victor Stinnerd9491262013-04-14 02:06:32 +020041 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000042 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000045
Victor Stinner3f36a572013-11-12 21:39:02 +010046_Py_IDENTIFIER(write);
47
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000048static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000049make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000050{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000053 if (object == NULL)
54 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000056 v = PyTuple_New(2);
57 if (v == NULL) {
58 Py_DECREF(object);
59 return NULL;
60 }
61 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 w = PyLong_FromSsize_t(len);
64 if (w == NULL) {
65 Py_DECREF(v);
66 return NULL;
67 }
68 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071}
72
73static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000074internal_error_callback(const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000075{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 if (errors == NULL || strcmp(errors, "strict") == 0)
77 return ERROR_STRICT;
78 else if (strcmp(errors, "ignore") == 0)
79 return ERROR_IGNORE;
80 else if (strcmp(errors, "replace") == 0)
81 return ERROR_REPLACE;
82 else
83 return PyUnicode_FromString(errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000084}
85
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000086static PyObject *
87call_error_callback(PyObject *errors, PyObject *exc)
88{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 PyObject *args, *cb, *r;
90 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 assert(PyUnicode_Check(errors));
93 str = _PyUnicode_AsString(errors);
94 if (str == NULL)
95 return NULL;
96 cb = PyCodec_LookupError(str);
97 if (cb == NULL)
98 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 args = PyTuple_New(1);
101 if (args == NULL) {
102 Py_DECREF(cb);
103 return NULL;
104 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 PyTuple_SET_ITEM(args, 0, exc);
107 Py_INCREF(exc);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 r = PyObject_CallObject(cb, args);
110 Py_DECREF(args);
111 Py_DECREF(cb);
112 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000113}
114
115static PyObject *
116codecctx_errors_get(MultibyteStatefulCodecContext *self)
117{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 const char *errors;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 if (self->errors == ERROR_STRICT)
121 errors = "strict";
122 else if (self->errors == ERROR_IGNORE)
123 errors = "ignore";
124 else if (self->errors == ERROR_REPLACE)
125 errors = "replace";
126 else {
127 Py_INCREF(self->errors);
128 return self->errors;
129 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000130
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000131 return PyUnicode_FromString(errors);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000132}
133
134static int
135codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 void *closure)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000137{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 PyObject *cb;
139 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000141 if (!PyUnicode_Check(value)) {
142 PyErr_SetString(PyExc_TypeError, "errors must be a string");
143 return -1;
144 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 str = _PyUnicode_AsString(value);
147 if (str == NULL)
148 return -1;
Neal Norwitz6ea45d32007-08-26 04:19:43 +0000149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000150 cb = internal_error_callback(str);
151 if (cb == NULL)
152 return -1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 ERROR_DECREF(self->errors);
155 self->errors = cb;
156 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000157}
158
159/* This getset handlers list is used by all the stateful codec objects */
160static PyGetSetDef codecctx_getsets[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 {"errors", (getter)codecctx_errors_get,
162 (setter)codecctx_errors_set,
163 PyDoc_STR("how to treat errors")},
164 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000165};
166
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000167static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000168expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 Py_ssize_t orgpos, orgsize, incsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 orgpos = (Py_ssize_t)((char *)buf->outbuf -
173 PyBytes_AS_STRING(buf->outobj));
174 orgsize = PyBytes_GET_SIZE(buf->outobj);
175 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000176
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200177 if (orgsize > PY_SSIZE_T_MAX - incsize) {
178 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 return -1;
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200180 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
183 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
186 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
187 + PyBytes_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000190}
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200191#define REQUIRE_ENCODEBUFFER(buf, s) do { \
192 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 if (expand_encodebuffer(buf, s) == -1) \
194 goto errorexit; \
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200195} while(0)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000196
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000197
198/**
199 * MultibyteCodec object
200 */
201
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000202static int
203multibytecodec_encerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 MultibyteCodec_State *state,
205 MultibyteEncodeBuffer *buf,
206 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 PyObject *retobj = NULL, *retstr = NULL, *tobj;
209 Py_ssize_t retstrsize, newpos;
210 Py_ssize_t esize, start, end;
211 const char *reason;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (e > 0) {
214 reason = "illegal multibyte sequence";
215 esize = e;
216 }
217 else {
218 switch (e) {
219 case MBERR_TOOSMALL:
220 REQUIRE_ENCODEBUFFER(buf, -1);
221 return 0; /* retry it */
222 case MBERR_TOOFEW:
223 reason = "incomplete multibyte sequence";
Victor Stinnerd9491262013-04-14 02:06:32 +0200224 esize = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000225 break;
226 case MBERR_INTERNAL:
227 PyErr_SetString(PyExc_RuntimeError,
228 "internal codec error");
229 return -1;
230 default:
231 PyErr_SetString(PyExc_RuntimeError,
232 "unknown runtime error");
233 return -1;
234 }
235 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 if (errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200238 PyObject *replchar;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t r;
Victor Stinnerd9491262013-04-14 02:06:32 +0200240 Py_ssize_t inpos;
241 int kind;
242 void *data;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000243
Victor Stinnerd9491262013-04-14 02:06:32 +0200244 replchar = PyUnicode_FromOrdinal('?');
245 if (replchar == NULL)
246 goto errorexit;
247 kind = PyUnicode_KIND(replchar);
248 data = PyUnicode_DATA(replchar);
249
250 inpos = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 for (;;) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200252 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000253
Victor Stinnerd9491262013-04-14 02:06:32 +0200254 r = codec->encode(state, codec->config,
255 kind, data, &inpos, 1,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 &buf->outbuf, outleft, 0);
257 if (r == MBERR_TOOSMALL) {
258 REQUIRE_ENCODEBUFFER(buf, -1);
259 continue;
260 }
261 else
262 break;
263 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000264
Victor Stinnerd9491262013-04-14 02:06:32 +0200265 Py_DECREF(replchar);
266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 if (r != 0) {
268 REQUIRE_ENCODEBUFFER(buf, 1);
269 *buf->outbuf++ = '?';
270 }
271 }
272 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200273 buf->inpos += esize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 return 0;
275 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000276
Victor Stinnerd9491262013-04-14 02:06:32 +0200277 start = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 /* use cached exception object if available */
281 if (buf->excobj == NULL) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200282 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
283 "sOnns",
284 codec->encoding, buf->inobj,
285 start, end, reason);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 if (buf->excobj == NULL)
287 goto errorexit;
288 }
289 else
290 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
291 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
292 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
293 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 if (errors == ERROR_STRICT) {
296 PyCodec_StrictErrors(buf->excobj);
297 goto errorexit;
298 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 retobj = call_error_callback(errors, buf->excobj);
301 if (retobj == NULL)
302 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000303
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500305 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
307 PyErr_SetString(PyExc_TypeError,
308 "encoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500309 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 goto errorexit;
311 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000312
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500313 if (PyUnicode_Check(tobj)) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200314 Py_ssize_t inpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000315
Victor Stinnerd9491262013-04-14 02:06:32 +0200316 retstr = multibytecodec_encode(codec, state, tobj,
317 &inpos, ERROR_STRICT,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 MBENC_FLUSH);
319 if (retstr == NULL)
320 goto errorexit;
321 }
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500322 else {
323 Py_INCREF(tobj);
324 retstr = tobj;
325 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 assert(PyBytes_Check(retstr));
328 retstrsize = PyBytes_GET_SIZE(retstr);
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200329 if (retstrsize > 0) {
330 REQUIRE_ENCODEBUFFER(buf, retstrsize);
331 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
332 buf->outbuf += retstrsize;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
336 if (newpos < 0 && !PyErr_Occurred())
Victor Stinnerd9491262013-04-14 02:06:32 +0200337 newpos += (Py_ssize_t)buf->inlen;
338 if (newpos < 0 || newpos > buf->inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 PyErr_Clear();
340 PyErr_Format(PyExc_IndexError,
341 "position %zd from error handler out of bounds",
342 newpos);
343 goto errorexit;
344 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200345 buf->inpos = newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 Py_DECREF(retobj);
348 Py_DECREF(retstr);
349 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000350
351errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 Py_XDECREF(retobj);
353 Py_XDECREF(retstr);
354 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000355}
356
357static int
358multibytecodec_decerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 MultibyteCodec_State *state,
360 MultibyteDecodeBuffer *buf,
361 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 PyObject *retobj = NULL, *retuni = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200364 Py_ssize_t newpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 const char *reason;
366 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 if (e > 0) {
369 reason = "illegal multibyte sequence";
370 esize = e;
371 }
372 else {
373 switch (e) {
374 case MBERR_TOOSMALL:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 return 0; /* retry it */
376 case MBERR_TOOFEW:
377 reason = "incomplete multibyte sequence";
378 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
379 break;
380 case MBERR_INTERNAL:
381 PyErr_SetString(PyExc_RuntimeError,
382 "internal codec error");
383 return -1;
Victor Stinnerd1f99422013-07-16 21:41:43 +0200384 case MBERR_EXCEPTION:
385 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 default:
387 PyErr_SetString(PyExc_RuntimeError,
388 "unknown runtime error");
389 return -1;
390 }
391 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 if (errors == ERROR_REPLACE) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200394 if (_PyUnicodeWriter_WriteChar(&buf->writer,
395 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
396 goto errorexit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 }
398 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
399 buf->inbuf += esize;
400 return 0;
401 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000402
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
404 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* use cached exception object if available */
407 if (buf->excobj == NULL) {
408 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
409 (const char *)buf->inbuf_top,
410 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
411 start, end, reason);
412 if (buf->excobj == NULL)
413 goto errorexit;
414 }
415 else
416 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
417 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
418 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
419 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 if (errors == ERROR_STRICT) {
422 PyCodec_StrictErrors(buf->excobj);
423 goto errorexit;
424 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 retobj = call_error_callback(errors, buf->excobj);
427 if (retobj == NULL)
428 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
431 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
432 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
433 PyErr_SetString(PyExc_TypeError,
434 "decoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500435 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 goto errorexit;
437 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000438
Victor Stinnera0dd0212013-04-11 22:09:04 +0200439 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
Victor Stinner4eea8492011-11-21 03:01:27 +0100440 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
443 if (newpos < 0 && !PyErr_Occurred())
444 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
445 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
446 PyErr_Clear();
447 PyErr_Format(PyExc_IndexError,
448 "position %zd from error handler out of bounds",
449 newpos);
450 goto errorexit;
451 }
452 buf->inbuf = buf->inbuf_top + newpos;
453 Py_DECREF(retobj);
454 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000455
456errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 Py_XDECREF(retobj);
458 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000459}
460
461static PyObject *
462multibytecodec_encode(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000463 MultibyteCodec_State *state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200464 PyObject *text, Py_ssize_t *inpos_t,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000466{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467 MultibyteEncodeBuffer buf;
468 Py_ssize_t finalsize, r = 0;
Victor Stinnerd9491262013-04-14 02:06:32 +0200469 Py_ssize_t datalen;
470 int kind;
471 void *data;
472
473 if (PyUnicode_READY(text) < 0)
474 return NULL;
475 datalen = PyUnicode_GET_LENGTH(text);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000476
Victor Stinner6bcbef72011-05-24 22:17:55 +0200477 if (datalen == 0 && !(flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 return PyBytes_FromStringAndSize(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 buf.excobj = NULL;
Brett Cannonb94767f2011-02-22 20:15:44 +0000481 buf.outobj = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200482 buf.inobj = text; /* borrowed reference */
483 buf.inpos = 0;
484 buf.inlen = datalen;
485 kind = PyUnicode_KIND(buf.inobj);
486 data = PyUnicode_DATA(buf.inobj);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
489 PyErr_NoMemory();
490 goto errorexit;
491 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000493 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
494 if (buf.outobj == NULL)
495 goto errorexit;
496 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
497 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000498
Victor Stinnerd9491262013-04-14 02:06:32 +0200499 while (buf.inpos < buf.inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 /* we don't reuse inleft and outleft here.
501 * error callbacks can relocate the cursor anywhere on buffer*/
Victor Stinnerd9491262013-04-14 02:06:32 +0200502 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
503
504 r = codec->encode(state, codec->config,
505 kind, data,
506 &buf.inpos, buf.inlen,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 &buf.outbuf, outleft, flags);
508 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
509 break;
510 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
511 goto errorexit;
512 else if (r == MBERR_TOOFEW)
513 break;
514 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000515
Victor Stinner6bcbef72011-05-24 22:17:55 +0200516 if (codec->encreset != NULL && (flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 for (;;) {
518 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000519
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000520 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
521 r = codec->encreset(state, codec->config, &buf.outbuf,
522 outleft);
523 if (r == 0)
524 break;
525 else if (multibytecodec_encerror(codec, state,
526 &buf, errors, r))
527 goto errorexit;
528 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000529
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 finalsize = (Py_ssize_t)((char *)buf.outbuf -
531 PyBytes_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
534 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
535 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000536
Victor Stinnerd9491262013-04-14 02:06:32 +0200537 if (inpos_t)
538 *inpos_t = buf.inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 Py_XDECREF(buf.excobj);
540 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
542errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 Py_XDECREF(buf.excobj);
544 Py_XDECREF(buf.outobj);
545 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000546}
547
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400548/*[clinic input]
549_multibytecodec.MultibyteCodec.encode
550
551 input: object
552 errors: str(nullable=True) = NULL
553
554Return an encoded string version of `input'.
555
556'errors' may be given to set a different error handling scheme. Default is
557'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
558values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
559registered with codecs.register_error that can handle UnicodeEncodeErrors.
560[clinic start generated code]*/
561
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000562static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400563_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self, PyObject *input, const char *errors)
564/*[clinic end generated code: output=a36bfa08783a0d0b input=252e7ee695867b2d]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 MultibyteCodec_State state;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400567 PyObject *errorcb, *r, *ucvt;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000569
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400570 if (PyUnicode_Check(input))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 ucvt = NULL;
572 else {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400573 input = ucvt = PyObject_Str(input);
574 if (input == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 return NULL;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400576 else if (!PyUnicode_Check(input)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 PyErr_SetString(PyExc_TypeError,
578 "couldn't convert the object to unicode.");
579 Py_DECREF(ucvt);
580 return NULL;
581 }
582 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000583
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400584 if (PyUnicode_READY(input) < 0) {
Victor Stinner9a80fab2011-11-21 02:50:14 +0100585 Py_XDECREF(ucvt);
586 return NULL;
587 }
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400588 datalen = PyUnicode_GET_LENGTH(input);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 errorcb = internal_error_callback(errors);
591 if (errorcb == NULL) {
592 Py_XDECREF(ucvt);
593 return NULL;
594 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 if (self->codec->encinit != NULL &&
597 self->codec->encinit(&state, self->codec->config) != 0)
598 goto errorexit;
599 r = multibytecodec_encode(self->codec, &state,
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400600 input, NULL, errorcb,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 MBENC_FLUSH | MBENC_RESET);
602 if (r == NULL)
603 goto errorexit;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 ERROR_DECREF(errorcb);
606 Py_XDECREF(ucvt);
607 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000608
609errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000610 ERROR_DECREF(errorcb);
611 Py_XDECREF(ucvt);
612 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000613}
614
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400615/*[clinic input]
616_multibytecodec.MultibyteCodec.decode
617
618 input: Py_buffer
619 errors: str(nullable=True) = NULL
620
621Decodes 'input'.
622
623'errors' may be given to set a different error handling scheme. Default is
624'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
625values are 'ignore' and 'replace' as well as any other name registered with
626codecs.register_error that is able to handle UnicodeDecodeErrors."
627[clinic start generated code]*/
628
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000629static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400630_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self, Py_buffer *input, const char *errors)
631/*[clinic end generated code: output=4c8ee8b2931b014e input=37e1d9236e3ce8f3]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000633 MultibyteCodec_State state;
634 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200635 PyObject *errorcb, *res;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400636 const char *data;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200637 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000638
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400639 data = input->buf;
640 datalen = input->len;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 errorcb = internal_error_callback(errors);
643 if (errorcb == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 return NULL;
645 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 if (datalen == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000648 ERROR_DECREF(errorcb);
Victor Stinnerb37b1742011-12-01 03:18:59 +0100649 return make_tuple(PyUnicode_New(0, 0), 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000651
Victor Stinner8f674cc2013-04-17 23:02:17 +0200652 _PyUnicodeWriter_Init(&buf.writer);
653 buf.writer.min_length = datalen;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 buf.excobj = NULL;
655 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
656 buf.inbuf_end = buf.inbuf_top + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 if (self->codec->decinit != NULL &&
659 self->codec->decinit(&state, self->codec->config) != 0)
660 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 while (buf.inbuf < buf.inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200663 Py_ssize_t inleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000667 r = self->codec->decode(&state, self->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200668 &buf.inbuf, inleft, &buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000669 if (r == 0)
670 break;
671 else if (multibytecodec_decerror(self->codec, &state,
672 &buf, errorcb, r))
673 goto errorexit;
674 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000675
Victor Stinnera0dd0212013-04-11 22:09:04 +0200676 res = _PyUnicodeWriter_Finish(&buf.writer);
677 if (res == NULL)
678 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 Py_XDECREF(buf.excobj);
681 ERROR_DECREF(errorcb);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200682 return make_tuple(res, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000683
684errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 ERROR_DECREF(errorcb);
686 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200687 _PyUnicodeWriter_Dealloc(&buf.writer);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000690}
691
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000692static struct PyMethodDef multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400693 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
694 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
695 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000696};
697
698static void
699multibytecodec_dealloc(MultibyteCodecObject *self)
700{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000702}
703
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000704static PyTypeObject MultibyteCodec_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 PyVarObject_HEAD_INIT(NULL, 0)
706 "MultibyteCodec", /* tp_name */
707 sizeof(MultibyteCodecObject), /* tp_basicsize */
708 0, /* tp_itemsize */
709 /* methods */
710 (destructor)multibytecodec_dealloc, /* tp_dealloc */
711 0, /* tp_print */
712 0, /* tp_getattr */
713 0, /* tp_setattr */
714 0, /* tp_reserved */
715 0, /* tp_repr */
716 0, /* tp_as_number */
717 0, /* tp_as_sequence */
718 0, /* tp_as_mapping */
719 0, /* tp_hash */
720 0, /* tp_call */
721 0, /* tp_str */
722 PyObject_GenericGetAttr, /* tp_getattro */
723 0, /* tp_setattro */
724 0, /* tp_as_buffer */
725 Py_TPFLAGS_DEFAULT, /* tp_flags */
726 0, /* tp_doc */
727 0, /* tp_traverse */
728 0, /* tp_clear */
729 0, /* tp_richcompare */
730 0, /* tp_weaklistoffset */
731 0, /* tp_iter */
732 0, /* tp_iterext */
733 multibytecodec_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000734};
735
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000736
737/**
738 * Utility functions for stateful codec mechanism
739 */
740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
742#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000743
744static PyObject *
745encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 PyObject *unistr, int final)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000747{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 PyObject *ucvt, *r = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200749 PyObject *inbuf = NULL;
750 Py_ssize_t inpos, datalen;
751 PyObject *origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 if (PyUnicode_Check(unistr))
754 ucvt = NULL;
755 else {
756 unistr = ucvt = PyObject_Str(unistr);
757 if (unistr == NULL)
758 return NULL;
759 else if (!PyUnicode_Check(unistr)) {
760 PyErr_SetString(PyExc_TypeError,
Benjamin Petersonaff47232012-12-02 10:53:41 -0500761 "couldn't convert the object to str.");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 Py_DECREF(ucvt);
763 return NULL;
764 }
765 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000766
Victor Stinnerd9491262013-04-14 02:06:32 +0200767 if (ctx->pending) {
768 PyObject *inbuf_tmp;
769
770 Py_INCREF(ctx->pending);
771 origpending = ctx->pending;
772
773 Py_INCREF(ctx->pending);
774 inbuf_tmp = ctx->pending;
775 PyUnicode_Append(&inbuf_tmp, unistr);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 if (inbuf_tmp == NULL)
777 goto errorexit;
Victor Stinnerd9491262013-04-14 02:06:32 +0200778 Py_CLEAR(ctx->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 inbuf = inbuf_tmp;
780 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200781 else {
782 origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000783
Victor Stinnerd9491262013-04-14 02:06:32 +0200784 Py_INCREF(unistr);
785 inbuf = unistr;
786 }
787 if (PyUnicode_READY(inbuf) < 0)
788 goto errorexit;
789 inpos = 0;
790 datalen = PyUnicode_GET_LENGTH(inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 r = multibytecodec_encode(ctx->codec, &ctx->state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200793 inbuf, &inpos,
794 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 if (r == NULL) {
796 /* recover the original pending buffer */
Victor Stinnerd9491262013-04-14 02:06:32 +0200797 Py_CLEAR(ctx->pending);
798 ctx->pending = origpending;
799 origpending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 goto errorexit;
801 }
Victor Stinner322cc742013-04-14 18:11:41 +0200802 Py_XDECREF(origpending);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000803
Victor Stinnerd9491262013-04-14 02:06:32 +0200804 if (inpos < datalen) {
805 if (datalen - inpos > MAXENCPENDING) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 /* normal codecs can't reach here */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 PyErr_SetString(PyExc_UnicodeError,
808 "pending buffer overflow");
809 goto errorexit;
810 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200811 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
812 if (ctx->pending == NULL) {
813 /* normal codecs can't reach here */
814 goto errorexit;
815 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000817
Victor Stinner322cc742013-04-14 18:11:41 +0200818 Py_DECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 Py_XDECREF(ucvt);
820 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000821
822errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 Py_XDECREF(r);
824 Py_XDECREF(ucvt);
Victor Stinnerd9491262013-04-14 02:06:32 +0200825 Py_XDECREF(origpending);
Victor Stinner322cc742013-04-14 18:11:41 +0200826 Py_XDECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000828}
829
830static int
831decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000833{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 Py_ssize_t npendings;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
837 if (npendings + ctx->pendingsize > MAXDECPENDING ||
838 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
839 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
840 return -1;
841 }
842 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
843 ctx->pendingsize += npendings;
844 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000845}
846
847static int
848decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 Py_ssize_t size)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000850{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
852 buf->inbuf_end = buf->inbuf_top + size;
Victor Stinner8f674cc2013-04-17 23:02:17 +0200853 buf->writer.min_length += size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000855}
856
857static int
858decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000860{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 while (buf->inbuf < buf->inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200862 Py_ssize_t inleft;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 Py_ssize_t r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000866
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200868 &buf->inbuf, inleft, &buf->writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 if (r == 0 || r == MBERR_TOOFEW)
870 break;
871 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
872 buf, ctx->errors, r))
873 return -1;
874 }
875 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000876}
877
878
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400879/*[clinic input]
880 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "&MultibyteIncrementalEncoder_Type"
881[clinic start generated code]*/
882/*[clinic end generated code: output=da39a3ee5e6b4b0d input=3be82909cd08924d]*/
883
884/*[clinic input]
885_multibytecodec.MultibyteIncrementalEncoder.encode
886
887 input: object
888 final: int = 0
889[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000890
891static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400892_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self, PyObject *input, int final)
893/*[clinic end generated code: output=3cd8780c8a719bbf input=456b76d73e464661]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000894{
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400895 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000896}
897
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400898/*[clinic input]
899_multibytecodec.MultibyteIncrementalEncoder.reset
900[clinic start generated code]*/
901
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000902static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400903_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
904/*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000905{
Victor Stinnere15dce32011-05-30 22:56:00 +0200906 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
907 unsigned char buffer[4], *outbuf;
908 Py_ssize_t r;
909 if (self->codec->encreset != NULL) {
910 outbuf = buffer;
911 r = self->codec->encreset(&self->state, self->codec->config,
912 &outbuf, sizeof(buffer));
913 if (r != 0)
914 return NULL;
915 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200916 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000918}
919
920static struct PyMethodDef mbiencoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400921 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
922 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
923 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000924};
925
926static PyObject *
927mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
928{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 MultibyteIncrementalEncoderObject *self;
930 PyObject *codec = NULL;
931 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
934 incnewkwarglist, &errors))
935 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
938 if (self == NULL)
939 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 codec = PyObject_GetAttrString((PyObject *)type, "codec");
942 if (codec == NULL)
943 goto errorexit;
944 if (!MultibyteCodec_Check(codec)) {
945 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
946 goto errorexit;
947 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 self->codec = ((MultibyteCodecObject *)codec)->codec;
Victor Stinnerd9491262013-04-14 02:06:32 +0200950 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 self->errors = internal_error_callback(errors);
952 if (self->errors == NULL)
953 goto errorexit;
954 if (self->codec->encinit != NULL &&
955 self->codec->encinit(&self->state, self->codec->config) != 0)
956 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 Py_DECREF(codec);
959 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000960
961errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 Py_XDECREF(self);
963 Py_XDECREF(codec);
964 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000965}
966
967static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +0000968mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
969{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +0000971}
972
973static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000974mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000976{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 if (ERROR_ISCUSTOM(self->errors))
978 Py_VISIT(self->errors);
979 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000980}
981
982static void
983mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
984{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 PyObject_GC_UnTrack(self);
986 ERROR_DECREF(self->errors);
987 Py_TYPE(self)->tp_free(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000988}
989
990static PyTypeObject MultibyteIncrementalEncoder_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 PyVarObject_HEAD_INIT(NULL, 0)
992 "MultibyteIncrementalEncoder", /* tp_name */
993 sizeof(MultibyteIncrementalEncoderObject), /* tp_basicsize */
994 0, /* tp_itemsize */
995 /* methods */
996 (destructor)mbiencoder_dealloc, /* tp_dealloc */
997 0, /* tp_print */
998 0, /* tp_getattr */
999 0, /* tp_setattr */
1000 0, /* tp_reserved */
1001 0, /* tp_repr */
1002 0, /* tp_as_number */
1003 0, /* tp_as_sequence */
1004 0, /* tp_as_mapping */
1005 0, /* tp_hash */
1006 0, /* tp_call */
1007 0, /* tp_str */
1008 PyObject_GenericGetAttr, /* tp_getattro */
1009 0, /* tp_setattro */
1010 0, /* tp_as_buffer */
1011 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1012 | Py_TPFLAGS_BASETYPE, /* tp_flags */
1013 0, /* tp_doc */
1014 (traverseproc)mbiencoder_traverse, /* tp_traverse */
1015 0, /* tp_clear */
1016 0, /* tp_richcompare */
1017 0, /* tp_weaklistoffset */
1018 0, /* tp_iter */
1019 0, /* tp_iterext */
1020 mbiencoder_methods, /* tp_methods */
1021 0, /* tp_members */
1022 codecctx_getsets, /* tp_getset */
1023 0, /* tp_base */
1024 0, /* tp_dict */
1025 0, /* tp_descr_get */
1026 0, /* tp_descr_set */
1027 0, /* tp_dictoffset */
1028 mbiencoder_init, /* tp_init */
1029 0, /* tp_alloc */
1030 mbiencoder_new, /* tp_new */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001031};
1032
1033
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001034/*[clinic input]
1035 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "&MultibyteIncrementalDecoder_Type"
1036[clinic start generated code]*/
1037/*[clinic end generated code: output=da39a3ee5e6b4b0d input=f6003faaf2cea692]*/
1038
1039/*[clinic input]
1040_multibytecodec.MultibyteIncrementalDecoder.decode
1041
1042 input: Py_buffer
1043 final: int = 0
1044[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001045
1046static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001047_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self, Py_buffer *input, int final)
1048/*[clinic end generated code: output=a0f3f92aa7303cf7 input=eb18c2f6e83589e1]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001049{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 MultibyteDecodeBuffer buf;
1051 char *data, *wdata = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001052 Py_ssize_t wsize, size, origpending;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001053 PyObject *res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001054
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001055 data = input->buf;
1056 size = input->len;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001057
Victor Stinner8f674cc2013-04-17 23:02:17 +02001058 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001059 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 origpending = self->pendingsize;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 if (self->pendingsize == 0) {
1063 wsize = size;
1064 wdata = data;
1065 }
1066 else {
1067 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1068 PyErr_NoMemory();
1069 goto errorexit;
1070 }
1071 wsize = size + self->pendingsize;
1072 wdata = PyMem_Malloc(wsize);
Victor Stinner33283ba2013-07-15 17:47:39 +02001073 if (wdata == NULL) {
1074 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 goto errorexit;
Victor Stinner33283ba2013-07-15 17:47:39 +02001076 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 memcpy(wdata, self->pending, self->pendingsize);
1078 memcpy(wdata + self->pendingsize, data, size);
1079 self->pendingsize = 0;
1080 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1083 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1086 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 if (final && buf.inbuf < buf.inbuf_end) {
1089 if (multibytecodec_decerror(self->codec, &self->state,
1090 &buf, self->errors, MBERR_TOOFEW)) {
1091 /* recover the original pending buffer */
1092 memcpy(self->pending, wdata, origpending);
1093 self->pendingsize = origpending;
1094 goto errorexit;
1095 }
1096 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1099 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1100 goto errorexit;
1101 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001102
Victor Stinnera0dd0212013-04-11 22:09:04 +02001103 res = _PyUnicodeWriter_Finish(&buf.writer);
1104 if (res == NULL)
1105 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (wdata != data)
1108 PyMem_Del(wdata);
1109 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001110 return res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001111
1112errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 if (wdata != NULL && wdata != data)
1114 PyMem_Del(wdata);
1115 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001116 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001118}
1119
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001120/*[clinic input]
1121_multibytecodec.MultibyteIncrementalDecoder.reset
1122[clinic start generated code]*/
1123
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001124static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001125_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1126/*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 if (self->codec->decreset != NULL &&
1129 self->codec->decreset(&self->state, self->codec->config) != 0)
1130 return NULL;
1131 self->pendingsize = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001134}
1135
1136static struct PyMethodDef mbidecoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001137 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1138 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1139 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140};
1141
1142static PyObject *
1143mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1144{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 MultibyteIncrementalDecoderObject *self;
1146 PyObject *codec = NULL;
1147 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1150 incnewkwarglist, &errors))
1151 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1154 if (self == NULL)
1155 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1158 if (codec == NULL)
1159 goto errorexit;
1160 if (!MultibyteCodec_Check(codec)) {
1161 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1162 goto errorexit;
1163 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 self->codec = ((MultibyteCodecObject *)codec)->codec;
1166 self->pendingsize = 0;
1167 self->errors = internal_error_callback(errors);
1168 if (self->errors == NULL)
1169 goto errorexit;
1170 if (self->codec->decinit != NULL &&
1171 self->codec->decinit(&self->state, self->codec->config) != 0)
1172 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 Py_DECREF(codec);
1175 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001176
1177errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 Py_XDECREF(self);
1179 Py_XDECREF(codec);
1180 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001181}
1182
1183static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001184mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1185{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001187}
1188
1189static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001190mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 if (ERROR_ISCUSTOM(self->errors))
1194 Py_VISIT(self->errors);
1195 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001196}
1197
1198static void
1199mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 PyObject_GC_UnTrack(self);
1202 ERROR_DECREF(self->errors);
1203 Py_TYPE(self)->tp_free(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001204}
1205
1206static PyTypeObject MultibyteIncrementalDecoder_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 PyVarObject_HEAD_INIT(NULL, 0)
1208 "MultibyteIncrementalDecoder", /* tp_name */
1209 sizeof(MultibyteIncrementalDecoderObject), /* tp_basicsize */
1210 0, /* tp_itemsize */
1211 /* methods */
1212 (destructor)mbidecoder_dealloc, /* tp_dealloc */
1213 0, /* tp_print */
1214 0, /* tp_getattr */
1215 0, /* tp_setattr */
1216 0, /* tp_reserved */
1217 0, /* tp_repr */
1218 0, /* tp_as_number */
1219 0, /* tp_as_sequence */
1220 0, /* tp_as_mapping */
1221 0, /* tp_hash */
1222 0, /* tp_call */
1223 0, /* tp_str */
1224 PyObject_GenericGetAttr, /* tp_getattro */
1225 0, /* tp_setattro */
1226 0, /* tp_as_buffer */
1227 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1228 | Py_TPFLAGS_BASETYPE, /* tp_flags */
1229 0, /* tp_doc */
1230 (traverseproc)mbidecoder_traverse, /* tp_traverse */
1231 0, /* tp_clear */
1232 0, /* tp_richcompare */
1233 0, /* tp_weaklistoffset */
1234 0, /* tp_iter */
1235 0, /* tp_iterext */
1236 mbidecoder_methods, /* tp_methods */
1237 0, /* tp_members */
1238 codecctx_getsets, /* tp_getset */
1239 0, /* tp_base */
1240 0, /* tp_dict */
1241 0, /* tp_descr_get */
1242 0, /* tp_descr_set */
1243 0, /* tp_dictoffset */
1244 mbidecoder_init, /* tp_init */
1245 0, /* tp_alloc */
1246 mbidecoder_new, /* tp_new */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001247};
1248
1249
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001250/*[clinic input]
1251 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "MultibyteStreamReader_Type"
1252[clinic start generated code]*/
1253/*[clinic end generated code: output=da39a3ee5e6b4b0d input=d323634b74976f09]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001254
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001255static PyObject *
1256mbstreamreader_iread(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001260 PyObject *cres, *res;
1261 Py_ssize_t rsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 if (sizehint == 0)
Victor Stinnerb37b1742011-12-01 03:18:59 +01001264 return PyUnicode_New(0, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001265
Victor Stinner8f674cc2013-04-17 23:02:17 +02001266 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001267 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 for (;;) {
1271 int endoffile;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 if (sizehint < 0)
1274 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001275 method, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 else
1277 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001278 method, "i", sizehint);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (cres == NULL)
1280 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 if (!PyBytes_Check(cres)) {
1283 PyErr_Format(PyExc_TypeError,
1284 "stream function returned a "
1285 "non-bytes object (%.100s)",
1286 cres->ob_type->tp_name);
1287 goto errorexit;
1288 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001289
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 endoffile = (PyBytes_GET_SIZE(cres) == 0);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 if (self->pendingsize > 0) {
1293 PyObject *ctr;
1294 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001295
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1297 PyErr_NoMemory();
1298 goto errorexit;
Victor Stinner064bbdc2013-07-08 22:28:27 +02001299 }
1300 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1301 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1302 if (ctr == NULL)
1303 goto errorexit;
1304 ctrdata = PyBytes_AS_STRING(ctr);
1305 memcpy(ctrdata, self->pending, self->pendingsize);
1306 memcpy(ctrdata + self->pendingsize,
1307 PyBytes_AS_STRING(cres),
1308 PyBytes_GET_SIZE(cres));
1309 Py_DECREF(cres);
1310 cres = ctr;
1311 self->pendingsize = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 rsize = PyBytes_GET_SIZE(cres);
1315 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1316 rsize) != 0)
1317 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 if (rsize > 0 && decoder_feed_buffer(
1320 (MultibyteStatefulDecoderContext *)self, &buf))
1321 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 if (endoffile || sizehint < 0) {
1324 if (buf.inbuf < buf.inbuf_end &&
1325 multibytecodec_decerror(self->codec, &self->state,
1326 &buf, self->errors, MBERR_TOOFEW))
1327 goto errorexit;
1328 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1331 if (decoder_append_pending(STATEFUL_DCTX(self),
1332 &buf) != 0)
1333 goto errorexit;
1334 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_DECREF(cres);
1337 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001338
Victor Stinnera0dd0212013-04-11 22:09:04 +02001339 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 sizehint = 1; /* read 1 more byte and retry */
1343 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001344
Victor Stinnera0dd0212013-04-11 22:09:04 +02001345 res = _PyUnicodeWriter_Finish(&buf.writer);
1346 if (res == NULL)
1347 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 Py_XDECREF(cres);
1350 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001351 return res;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001352
1353errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 Py_XDECREF(cres);
1355 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001356 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001358}
1359
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001360/*[clinic input]
1361 _multibytecodec.MultibyteStreamReader.read
1362
1363 sizeobj: object = None
1364 /
1365[clinic start generated code]*/
1366
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001367static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001368_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self, PyObject *sizeobj)
1369/*[clinic end generated code: output=f298ea6e1bd2083c input=015b0d3ff2fca485]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001370{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001372
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001373 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 size = -1;
1375 else if (PyLong_Check(sizeobj))
1376 size = PyLong_AsSsize_t(sizeobj);
1377 else {
1378 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1379 return NULL;
1380 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 if (size == -1 && PyErr_Occurred())
1383 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001386}
1387
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001388/*[clinic input]
1389 _multibytecodec.MultibyteStreamReader.readline
1390
1391 sizeobj: object = None
1392 /
1393[clinic start generated code]*/
1394
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001395static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001396_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self, PyObject *sizeobj)
1397/*[clinic end generated code: output=e5ac302a6d0999de input=41ccc64f9bb0cec3]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001398{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001400
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001401 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 size = -1;
1403 else if (PyLong_Check(sizeobj))
1404 size = PyLong_AsSsize_t(sizeobj);
1405 else {
1406 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1407 return NULL;
1408 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 if (size == -1 && PyErr_Occurred())
1411 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001414}
1415
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001416/*[clinic input]
1417 _multibytecodec.MultibyteStreamReader.readlines
1418
1419 sizehintobj: object = None
1420 /
1421[clinic start generated code]*/
1422
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001423static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001424_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self, PyObject *sizehintobj)
1425/*[clinic end generated code: output=68f024178b77cb0f input=54932f5d4d88e880]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001426{
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001427 PyObject *r, *sr;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001429
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001430 if (sizehintobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 sizehint = -1;
1432 else if (PyLong_Check(sizehintobj))
1433 sizehint = PyLong_AsSsize_t(sizehintobj);
1434 else {
1435 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1436 return NULL;
1437 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 if (sizehint == -1 && PyErr_Occurred())
1440 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 r = mbstreamreader_iread(self, "read", sizehint);
1443 if (r == NULL)
1444 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 sr = PyUnicode_Splitlines(r, 1);
1447 Py_DECREF(r);
1448 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001449}
1450
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001451/*[clinic input]
1452 _multibytecodec.MultibyteStreamReader.reset
1453[clinic start generated code]*/
1454
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001455static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001456_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1457/*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001458{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 if (self->codec->decreset != NULL &&
1460 self->codec->decreset(&self->state, self->codec->config) != 0)
1461 return NULL;
1462 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001465}
1466
1467static struct PyMethodDef mbstreamreader_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001468 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1469 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1470 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1471 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001473};
1474
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001475static PyMemberDef mbstreamreader_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 {"stream", T_OBJECT,
1477 offsetof(MultibyteStreamReaderObject, stream),
1478 READONLY, NULL},
1479 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001480};
1481
1482static PyObject *
1483mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1484{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 MultibyteStreamReaderObject *self;
1486 PyObject *stream, *codec = NULL;
1487 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1490 streamkwarglist, &stream, &errors))
1491 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1494 if (self == NULL)
1495 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1498 if (codec == NULL)
1499 goto errorexit;
1500 if (!MultibyteCodec_Check(codec)) {
1501 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1502 goto errorexit;
1503 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 self->codec = ((MultibyteCodecObject *)codec)->codec;
1506 self->stream = stream;
1507 Py_INCREF(stream);
1508 self->pendingsize = 0;
1509 self->errors = internal_error_callback(errors);
1510 if (self->errors == NULL)
1511 goto errorexit;
1512 if (self->codec->decinit != NULL &&
1513 self->codec->decinit(&self->state, self->codec->config) != 0)
1514 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001515
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 Py_DECREF(codec);
1517 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001518
1519errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 Py_XDECREF(self);
1521 Py_XDECREF(codec);
1522 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001523}
1524
1525static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001526mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001529}
1530
1531static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001532mbstreamreader_traverse(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001534{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001535 if (ERROR_ISCUSTOM(self->errors))
1536 Py_VISIT(self->errors);
1537 Py_VISIT(self->stream);
1538 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001539}
1540
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001541static void
1542mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1543{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001544 PyObject_GC_UnTrack(self);
1545 ERROR_DECREF(self->errors);
1546 Py_XDECREF(self->stream);
1547 Py_TYPE(self)->tp_free(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001548}
1549
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001550static PyTypeObject MultibyteStreamReader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 PyVarObject_HEAD_INIT(NULL, 0)
1552 "MultibyteStreamReader", /* tp_name */
1553 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
1554 0, /* tp_itemsize */
1555 /* methods */
1556 (destructor)mbstreamreader_dealloc, /* tp_dealloc */
1557 0, /* tp_print */
1558 0, /* tp_getattr */
1559 0, /* tp_setattr */
1560 0, /* tp_reserved */
1561 0, /* tp_repr */
1562 0, /* tp_as_number */
1563 0, /* tp_as_sequence */
1564 0, /* tp_as_mapping */
1565 0, /* tp_hash */
1566 0, /* tp_call */
1567 0, /* tp_str */
1568 PyObject_GenericGetAttr, /* tp_getattro */
1569 0, /* tp_setattro */
1570 0, /* tp_as_buffer */
1571 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1572 | Py_TPFLAGS_BASETYPE, /* tp_flags */
1573 0, /* tp_doc */
1574 (traverseproc)mbstreamreader_traverse, /* tp_traverse */
1575 0, /* tp_clear */
1576 0, /* tp_richcompare */
1577 0, /* tp_weaklistoffset */
1578 0, /* tp_iter */
1579 0, /* tp_iterext */
1580 mbstreamreader_methods, /* tp_methods */
1581 mbstreamreader_members, /* tp_members */
1582 codecctx_getsets, /* tp_getset */
1583 0, /* tp_base */
1584 0, /* tp_dict */
1585 0, /* tp_descr_get */
1586 0, /* tp_descr_set */
1587 0, /* tp_dictoffset */
1588 mbstreamreader_init, /* tp_init */
1589 0, /* tp_alloc */
1590 mbstreamreader_new, /* tp_new */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001591};
1592
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001593
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001594/*[clinic input]
1595 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "&MultibyteStreamWriter_Type"
1596[clinic start generated code]*/
1597/*[clinic end generated code: output=da39a3ee5e6b4b0d input=cde22780a215d6ac]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001598
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001599static int
1600mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001602{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 PyObject *str, *wr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1606 if (str == NULL)
1607 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001608
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001609 wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 Py_DECREF(str);
1611 if (wr == NULL)
1612 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 Py_DECREF(wr);
1615 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001616}
1617
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001618/*[clinic input]
1619 _multibytecodec.MultibyteStreamWriter.write
1620
1621 strobj: object
1622 /
1623[clinic start generated code]*/
1624
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001625static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001626_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self, PyObject *strobj)
1627/*[clinic end generated code: output=44e9eb0db0374cb1 input=551dc4c018c10a2b]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001628{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 if (mbstreamwriter_iwrite(self, strobj))
1630 return NULL;
1631 else
1632 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001633}
1634
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001635/*[clinic input]
1636 _multibytecodec.MultibyteStreamWriter.writelines
1637
1638 lines: object
1639 /
1640[clinic start generated code]*/
1641
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001642static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001643_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)
1644/*[clinic end generated code: output=4facbb0638dde172 input=57797fe7008d4e96]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001645{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 PyObject *strobj;
1647 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 if (!PySequence_Check(lines)) {
1650 PyErr_SetString(PyExc_TypeError,
1651 "arg must be a sequence object");
1652 return NULL;
1653 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 for (i = 0; i < PySequence_Length(lines); i++) {
1656 /* length can be changed even within this loop */
1657 strobj = PySequence_GetItem(lines, i);
1658 if (strobj == NULL)
1659 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 r = mbstreamwriter_iwrite(self, strobj);
1662 Py_DECREF(strobj);
1663 if (r == -1)
1664 return NULL;
1665 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001668}
1669
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001670/*[clinic input]
1671 _multibytecodec.MultibyteStreamWriter.reset
1672[clinic start generated code]*/
1673
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001674static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001675_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1676/*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 pwrt = multibytecodec_encode(self->codec, &self->state,
Victor Stinnerd9491262013-04-14 02:06:32 +02001681 self->pending, NULL, self->errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 MBENC_FLUSH | MBENC_RESET);
1683 /* some pending buffer can be truncated when UnicodeEncodeError is
1684 * raised on 'strict' mode. but, 'reset' method is designed to
1685 * reset the pending buffer or states so failed string sequence
1686 * ought to be missed */
Victor Stinnerd9491262013-04-14 02:06:32 +02001687 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 if (pwrt == NULL)
1689 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 assert(PyBytes_Check(pwrt));
1692 if (PyBytes_Size(pwrt) > 0) {
1693 PyObject *wr;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001694
1695 wr = _PyObject_CallMethodId(self->stream, &PyId_write, "O", pwrt);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 if (wr == NULL) {
1697 Py_DECREF(pwrt);
1698 return NULL;
1699 }
1700 }
1701 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001702
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001703 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001704}
1705
1706static PyObject *
1707mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1708{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001709 MultibyteStreamWriterObject *self;
1710 PyObject *stream, *codec = NULL;
1711 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1714 streamkwarglist, &stream, &errors))
1715 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1718 if (self == NULL)
1719 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1722 if (codec == NULL)
1723 goto errorexit;
1724 if (!MultibyteCodec_Check(codec)) {
1725 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1726 goto errorexit;
1727 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 self->codec = ((MultibyteCodecObject *)codec)->codec;
1730 self->stream = stream;
1731 Py_INCREF(stream);
Victor Stinnerd9491262013-04-14 02:06:32 +02001732 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 self->errors = internal_error_callback(errors);
1734 if (self->errors == NULL)
1735 goto errorexit;
1736 if (self->codec->encinit != NULL &&
1737 self->codec->encinit(&self->state, self->codec->config) != 0)
1738 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 Py_DECREF(codec);
1741 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001742
1743errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001744 Py_XDECREF(self);
1745 Py_XDECREF(codec);
1746 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001747}
1748
1749static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001750mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1751{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001753}
1754
1755static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001756mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001758{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 if (ERROR_ISCUSTOM(self->errors))
1760 Py_VISIT(self->errors);
1761 Py_VISIT(self->stream);
1762 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001763}
1764
1765static void
1766mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1767{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001768 PyObject_GC_UnTrack(self);
1769 ERROR_DECREF(self->errors);
1770 Py_XDECREF(self->stream);
1771 Py_TYPE(self)->tp_free(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001772}
1773
1774static struct PyMethodDef mbstreamwriter_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001775 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1776 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1777 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1778 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001779};
1780
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001781static PyMemberDef mbstreamwriter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 {"stream", T_OBJECT,
1783 offsetof(MultibyteStreamWriterObject, stream),
1784 READONLY, NULL},
1785 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001786};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001787
1788static PyTypeObject MultibyteStreamWriter_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789 PyVarObject_HEAD_INIT(NULL, 0)
1790 "MultibyteStreamWriter", /* tp_name */
1791 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1792 0, /* tp_itemsize */
1793 /* methods */
1794 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1795 0, /* tp_print */
1796 0, /* tp_getattr */
1797 0, /* tp_setattr */
1798 0, /* tp_reserved */
1799 0, /* tp_repr */
1800 0, /* tp_as_number */
1801 0, /* tp_as_sequence */
1802 0, /* tp_as_mapping */
1803 0, /* tp_hash */
1804 0, /* tp_call */
1805 0, /* tp_str */
1806 PyObject_GenericGetAttr, /* tp_getattro */
1807 0, /* tp_setattro */
1808 0, /* tp_as_buffer */
1809 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
1810 | Py_TPFLAGS_BASETYPE, /* tp_flags */
1811 0, /* tp_doc */
1812 (traverseproc)mbstreamwriter_traverse, /* tp_traverse */
1813 0, /* tp_clear */
1814 0, /* tp_richcompare */
1815 0, /* tp_weaklistoffset */
1816 0, /* tp_iter */
1817 0, /* tp_iterext */
1818 mbstreamwriter_methods, /* tp_methods */
1819 mbstreamwriter_members, /* tp_members */
1820 codecctx_getsets, /* tp_getset */
1821 0, /* tp_base */
1822 0, /* tp_dict */
1823 0, /* tp_descr_get */
1824 0, /* tp_descr_set */
1825 0, /* tp_dictoffset */
1826 mbstreamwriter_init, /* tp_init */
1827 0, /* tp_alloc */
1828 mbstreamwriter_new, /* tp_new */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001829};
1830
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001831
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001832/*[clinic input]
1833_multibytecodec.__create_codec
1834
1835 arg: object
1836 /
1837[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001838
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001839static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001840_multibytecodec___create_codec(PyModuleDef *module, PyObject *arg)
1841/*[clinic end generated code: output=fbe74f6510640163 input=6840b2a6b183fcfa]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001842{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001843 MultibyteCodecObject *self;
1844 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1847 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1848 return NULL;
1849 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1852 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1853 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1856 if (self == NULL)
1857 return NULL;
1858 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001859
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001860 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001861}
1862
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001863static struct PyMethodDef __methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001864 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001866};
1867
Martin v. Löwis1a214512008-06-11 05:26:20 +00001868
1869static struct PyModuleDef _multibytecodecmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 PyModuleDef_HEAD_INIT,
1871 "_multibytecodec",
1872 NULL,
1873 -1,
1874 __methods,
1875 NULL,
1876 NULL,
1877 NULL,
1878 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001879};
1880
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001881PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001882PyInit__multibytecodec(void)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001884 int i;
1885 PyObject *m;
1886 PyTypeObject *typelist[] = {
1887 &MultibyteIncrementalEncoder_Type,
1888 &MultibyteIncrementalDecoder_Type,
1889 &MultibyteStreamReader_Type,
1890 &MultibyteStreamWriter_Type,
1891 NULL
1892 };
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001893
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001894 if (PyType_Ready(&MultibyteCodec_Type) < 0)
1895 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001896
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 m = PyModule_Create(&_multibytecodecmodule);
1898 if (m == NULL)
1899 return NULL;
Neal Norwitz058bde12005-09-21 06:44:25 +00001900
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001901 for (i = 0; typelist[i] != NULL; i++) {
1902 if (PyType_Ready(typelist[i]) < 0)
1903 return NULL;
1904 Py_INCREF(typelist[i]);
1905 PyModule_AddObject(m, typelist[i]->tp_name,
1906 (PyObject *)typelist[i]);
1907 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001909 if (PyErr_Occurred()) {
1910 Py_FatalError("can't initialize the _multibytecodec module");
1911 Py_DECREF(m);
1912 m = NULL;
1913 }
1914 return m;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001915}