blob: cb7182ff21fbcd3ba261d1696d24f7bfad7c02b8 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02009#include "structmember.h" // PyMemberDef
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010#include "multibytecodec.h"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040011#include "clinic/multibytecodec.c.h"
12
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010013#define MODULE_NAME "_multibytecodec"
14
15typedef struct {
16 PyTypeObject *encoder_type;
17 PyTypeObject *decoder_type;
18 PyTypeObject *reader_type;
19 PyTypeObject *writer_type;
20 PyTypeObject *multibytecodec_type;
21} _multibytecodec_state;
22
23static _multibytecodec_state *
24_multibytecodec_get_state(PyObject *module)
25{
26 _multibytecodec_state *state = PyModule_GetState(module);
27 assert(state != NULL);
28 return state;
29}
30
31static struct PyModuleDef _multibytecodecmodule;
32static _multibytecodec_state *
33_multibyte_codec_find_state_by_type(PyTypeObject *type)
34{
35 PyObject *module = _PyType_GetModuleByDef(type, &_multibytecodecmodule);
36 assert(module != NULL);
37 return _multibytecodec_get_state(module);
38}
39
40#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
Brett Cannonf2de1fc2014-08-22 11:45:03 -040041/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -040042module _multibytecodec
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010043class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
44class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
45class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
46class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
47class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040048[clinic start generated code]*/
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010049/*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
50#undef clinic_get_state
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000051
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000052typedef struct {
Victor Stinnerd9491262013-04-14 02:06:32 +020053 PyObject *inobj;
54 Py_ssize_t inpos, inlen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055 unsigned char *outbuf, *outbuf_end;
56 PyObject *excobj, *outobj;
57} MultibyteEncodeBuffer;
58
59typedef struct {
60 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
Victor Stinnera0dd0212013-04-11 22:09:04 +020061 PyObject *excobj;
62 _PyUnicodeWriter writer;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000063} MultibyteDecodeBuffer;
64
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065static char *incnewkwarglist[] = {"errors", NULL};
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000066static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000067
68static PyObject *multibytecodec_encode(MultibyteCodec *,
Victor Stinnerd9491262013-04-14 02:06:32 +020069 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000073
Victor Stinner3f36a572013-11-12 21:39:02 +010074_Py_IDENTIFIER(write);
75
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000076static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000077make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 if (object == NULL)
82 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(object);
87 return NULL;
88 }
89 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 w = PyLong_FromSsize_t(len);
92 if (w == NULL) {
93 Py_DECREF(v);
94 return NULL;
95 }
96 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000099}
100
101static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000102internal_error_callback(const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (errors == NULL || strcmp(errors, "strict") == 0)
105 return ERROR_STRICT;
106 else if (strcmp(errors, "ignore") == 0)
107 return ERROR_IGNORE;
108 else if (strcmp(errors, "replace") == 0)
109 return ERROR_REPLACE;
110 else
111 return PyUnicode_FromString(errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112}
113
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000114static PyObject *
115call_error_callback(PyObject *errors, PyObject *exc)
116{
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200117 PyObject *cb, *r;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 assert(PyUnicode_Check(errors));
Serhiy Storchaka06515832016-11-20 09:13:07 +0200121 str = PyUnicode_AsUTF8(errors);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 if (str == NULL)
123 return NULL;
124 cb = PyCodec_LookupError(str);
125 if (cb == NULL)
126 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000127
Petr Viktorinffd97532020-02-11 17:46:57 +0100128 r = PyObject_CallOneArg(cb, exc);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_DECREF(cb);
130 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000131}
132
133static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200134codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 const char *errors;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 if (self->errors == ERROR_STRICT)
139 errors = "strict";
140 else if (self->errors == ERROR_IGNORE)
141 errors = "ignore";
142 else if (self->errors == ERROR_REPLACE)
143 errors = "replace";
144 else {
145 Py_INCREF(self->errors);
146 return self->errors;
147 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return PyUnicode_FromString(errors);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000150}
151
152static int
153codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 void *closure)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 PyObject *cb;
157 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000158
Zackery Spytz842acaa2018-12-17 07:52:45 -0700159 if (value == NULL) {
160 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
161 return -1;
162 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 if (!PyUnicode_Check(value)) {
164 PyErr_SetString(PyExc_TypeError, "errors must be a string");
165 return -1;
166 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000167
Serhiy Storchaka06515832016-11-20 09:13:07 +0200168 str = PyUnicode_AsUTF8(value);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 if (str == NULL)
170 return -1;
Neal Norwitz6ea45d32007-08-26 04:19:43 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 cb = internal_error_callback(str);
173 if (cb == NULL)
174 return -1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 ERROR_DECREF(self->errors);
177 self->errors = cb;
178 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000179}
180
181/* This getset handlers list is used by all the stateful codec objects */
182static PyGetSetDef codecctx_getsets[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 {"errors", (getter)codecctx_errors_get,
184 (setter)codecctx_errors_set,
185 PyDoc_STR("how to treat errors")},
186 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000187};
188
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000189static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000190expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 Py_ssize_t orgpos, orgsize, incsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 orgpos = (Py_ssize_t)((char *)buf->outbuf -
195 PyBytes_AS_STRING(buf->outobj));
196 orgsize = PyBytes_GET_SIZE(buf->outobj);
197 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000198
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200199 if (orgsize > PY_SSIZE_T_MAX - incsize) {
200 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return -1;
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200202 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
205 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
208 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
209 + PyBytes_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212}
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200213#define REQUIRE_ENCODEBUFFER(buf, s) do { \
214 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (expand_encodebuffer(buf, s) == -1) \
216 goto errorexit; \
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200217} while(0)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000218
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000219
220/**
221 * MultibyteCodec object
222 */
223
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000224static int
225multibytecodec_encerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 MultibyteCodec_State *state,
227 MultibyteEncodeBuffer *buf,
228 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 PyObject *retobj = NULL, *retstr = NULL, *tobj;
231 Py_ssize_t retstrsize, newpos;
232 Py_ssize_t esize, start, end;
233 const char *reason;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 if (e > 0) {
236 reason = "illegal multibyte sequence";
237 esize = e;
238 }
239 else {
240 switch (e) {
241 case MBERR_TOOSMALL:
242 REQUIRE_ENCODEBUFFER(buf, -1);
243 return 0; /* retry it */
244 case MBERR_TOOFEW:
245 reason = "incomplete multibyte sequence";
Victor Stinnerd9491262013-04-14 02:06:32 +0200246 esize = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 break;
248 case MBERR_INTERNAL:
249 PyErr_SetString(PyExc_RuntimeError,
250 "internal codec error");
251 return -1;
252 default:
253 PyErr_SetString(PyExc_RuntimeError,
254 "unknown runtime error");
255 return -1;
256 }
257 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 if (errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200260 PyObject *replchar;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 Py_ssize_t r;
Victor Stinnerd9491262013-04-14 02:06:32 +0200262 Py_ssize_t inpos;
263 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300264 const void *data;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Victor Stinnerd9491262013-04-14 02:06:32 +0200266 replchar = PyUnicode_FromOrdinal('?');
267 if (replchar == NULL)
268 goto errorexit;
269 kind = PyUnicode_KIND(replchar);
270 data = PyUnicode_DATA(replchar);
271
272 inpos = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 for (;;) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200274 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000275
Victor Stinnerd9491262013-04-14 02:06:32 +0200276 r = codec->encode(state, codec->config,
277 kind, data, &inpos, 1,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 &buf->outbuf, outleft, 0);
279 if (r == MBERR_TOOSMALL) {
280 REQUIRE_ENCODEBUFFER(buf, -1);
281 continue;
282 }
283 else
284 break;
285 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000286
Victor Stinnerd9491262013-04-14 02:06:32 +0200287 Py_DECREF(replchar);
288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 if (r != 0) {
290 REQUIRE_ENCODEBUFFER(buf, 1);
291 *buf->outbuf++ = '?';
292 }
293 }
294 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200295 buf->inpos += esize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 return 0;
297 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000298
Victor Stinnerd9491262013-04-14 02:06:32 +0200299 start = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 /* use cached exception object if available */
303 if (buf->excobj == NULL) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200304 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
305 "sOnns",
306 codec->encoding, buf->inobj,
307 start, end, reason);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 if (buf->excobj == NULL)
309 goto errorexit;
310 }
311 else
312 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
313 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
314 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
315 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (errors == ERROR_STRICT) {
318 PyCodec_StrictErrors(buf->excobj);
319 goto errorexit;
320 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 retobj = call_error_callback(errors, buf->excobj);
323 if (retobj == NULL)
324 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500327 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
329 PyErr_SetString(PyExc_TypeError,
330 "encoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500331 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 goto errorexit;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500335 if (PyUnicode_Check(tobj)) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200336 Py_ssize_t inpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000337
Victor Stinnerd9491262013-04-14 02:06:32 +0200338 retstr = multibytecodec_encode(codec, state, tobj,
339 &inpos, ERROR_STRICT,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 MBENC_FLUSH);
341 if (retstr == NULL)
342 goto errorexit;
343 }
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500344 else {
345 Py_INCREF(tobj);
346 retstr = tobj;
347 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 assert(PyBytes_Check(retstr));
350 retstrsize = PyBytes_GET_SIZE(retstr);
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200351 if (retstrsize > 0) {
352 REQUIRE_ENCODEBUFFER(buf, retstrsize);
353 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
354 buf->outbuf += retstrsize;
355 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
358 if (newpos < 0 && !PyErr_Occurred())
Victor Stinnerd9491262013-04-14 02:06:32 +0200359 newpos += (Py_ssize_t)buf->inlen;
360 if (newpos < 0 || newpos > buf->inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyErr_Clear();
362 PyErr_Format(PyExc_IndexError,
363 "position %zd from error handler out of bounds",
364 newpos);
365 goto errorexit;
366 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200367 buf->inpos = newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 Py_DECREF(retobj);
370 Py_DECREF(retstr);
371 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000372
373errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 Py_XDECREF(retobj);
375 Py_XDECREF(retstr);
376 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000377}
378
379static int
380multibytecodec_decerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 MultibyteCodec_State *state,
382 MultibyteDecodeBuffer *buf,
383 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 PyObject *retobj = NULL, *retuni = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200386 Py_ssize_t newpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 const char *reason;
388 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (e > 0) {
391 reason = "illegal multibyte sequence";
392 esize = e;
393 }
394 else {
395 switch (e) {
396 case MBERR_TOOSMALL:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 return 0; /* retry it */
398 case MBERR_TOOFEW:
399 reason = "incomplete multibyte sequence";
400 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
401 break;
402 case MBERR_INTERNAL:
403 PyErr_SetString(PyExc_RuntimeError,
404 "internal codec error");
405 return -1;
Victor Stinnerd1f99422013-07-16 21:41:43 +0200406 case MBERR_EXCEPTION:
407 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 default:
409 PyErr_SetString(PyExc_RuntimeError,
410 "unknown runtime error");
411 return -1;
412 }
413 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (errors == ERROR_REPLACE) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200416 if (_PyUnicodeWriter_WriteChar(&buf->writer,
417 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
418 goto errorexit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 }
420 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
421 buf->inbuf += esize;
422 return 0;
423 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
426 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 /* use cached exception object if available */
429 if (buf->excobj == NULL) {
430 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
431 (const char *)buf->inbuf_top,
432 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
433 start, end, reason);
434 if (buf->excobj == NULL)
435 goto errorexit;
436 }
437 else
438 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
439 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
440 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
441 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 if (errors == ERROR_STRICT) {
444 PyCodec_StrictErrors(buf->excobj);
445 goto errorexit;
446 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 retobj = call_error_callback(errors, buf->excobj);
449 if (retobj == NULL)
450 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
453 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
454 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
455 PyErr_SetString(PyExc_TypeError,
456 "decoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500457 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 goto errorexit;
459 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000460
Victor Stinnera0dd0212013-04-11 22:09:04 +0200461 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
Victor Stinner4eea8492011-11-21 03:01:27 +0100462 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
465 if (newpos < 0 && !PyErr_Occurred())
466 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
467 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
468 PyErr_Clear();
469 PyErr_Format(PyExc_IndexError,
470 "position %zd from error handler out of bounds",
471 newpos);
472 goto errorexit;
473 }
474 buf->inbuf = buf->inbuf_top + newpos;
475 Py_DECREF(retobj);
476 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000477
478errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 Py_XDECREF(retobj);
480 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481}
482
483static PyObject *
484multibytecodec_encode(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 MultibyteCodec_State *state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200486 PyObject *text, Py_ssize_t *inpos_t,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 MultibyteEncodeBuffer buf;
490 Py_ssize_t finalsize, r = 0;
Victor Stinnerd9491262013-04-14 02:06:32 +0200491 Py_ssize_t datalen;
492 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300493 const void *data;
Victor Stinnerd9491262013-04-14 02:06:32 +0200494
495 if (PyUnicode_READY(text) < 0)
496 return NULL;
497 datalen = PyUnicode_GET_LENGTH(text);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000498
Victor Stinner6bcbef72011-05-24 22:17:55 +0200499 if (datalen == 0 && !(flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 return PyBytes_FromStringAndSize(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 buf.excobj = NULL;
Brett Cannonb94767f2011-02-22 20:15:44 +0000503 buf.outobj = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200504 buf.inobj = text; /* borrowed reference */
505 buf.inpos = 0;
506 buf.inlen = datalen;
507 kind = PyUnicode_KIND(buf.inobj);
508 data = PyUnicode_DATA(buf.inobj);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
511 PyErr_NoMemory();
512 goto errorexit;
513 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
516 if (buf.outobj == NULL)
517 goto errorexit;
518 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
519 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000520
Victor Stinnerd9491262013-04-14 02:06:32 +0200521 while (buf.inpos < buf.inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 /* we don't reuse inleft and outleft here.
523 * error callbacks can relocate the cursor anywhere on buffer*/
Victor Stinnerd9491262013-04-14 02:06:32 +0200524 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
525
526 r = codec->encode(state, codec->config,
527 kind, data,
528 &buf.inpos, buf.inlen,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 &buf.outbuf, outleft, flags);
530 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
531 break;
532 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
533 goto errorexit;
534 else if (r == MBERR_TOOFEW)
535 break;
536 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000537
Victor Stinner6bcbef72011-05-24 22:17:55 +0200538 if (codec->encreset != NULL && (flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 for (;;) {
540 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
543 r = codec->encreset(state, codec->config, &buf.outbuf,
544 outleft);
545 if (r == 0)
546 break;
547 else if (multibytecodec_encerror(codec, state,
548 &buf, errors, r))
549 goto errorexit;
550 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 finalsize = (Py_ssize_t)((char *)buf.outbuf -
553 PyBytes_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
556 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
557 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000558
Victor Stinnerd9491262013-04-14 02:06:32 +0200559 if (inpos_t)
560 *inpos_t = buf.inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 Py_XDECREF(buf.excobj);
562 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000563
564errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 Py_XDECREF(buf.excobj);
566 Py_XDECREF(buf.outobj);
567 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000568}
569
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400570/*[clinic input]
571_multibytecodec.MultibyteCodec.encode
572
573 input: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300574 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400575
576Return an encoded string version of `input'.
577
578'errors' may be given to set a different error handling scheme. Default is
579'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
580values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
581registered with codecs.register_error that can handle UnicodeEncodeErrors.
582[clinic start generated code]*/
583
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000584static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400585_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
586 PyObject *input,
587 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300588/*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000589{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 MultibyteCodec_State state;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400591 PyObject *errorcb, *r, *ucvt;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000593
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400594 if (PyUnicode_Check(input))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 ucvt = NULL;
596 else {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400597 input = ucvt = PyObject_Str(input);
598 if (input == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 return NULL;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400600 else if (!PyUnicode_Check(input)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyErr_SetString(PyExc_TypeError,
602 "couldn't convert the object to unicode.");
603 Py_DECREF(ucvt);
604 return NULL;
605 }
606 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000607
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400608 if (PyUnicode_READY(input) < 0) {
Victor Stinner9a80fab2011-11-21 02:50:14 +0100609 Py_XDECREF(ucvt);
610 return NULL;
611 }
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400612 datalen = PyUnicode_GET_LENGTH(input);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 errorcb = internal_error_callback(errors);
615 if (errorcb == NULL) {
616 Py_XDECREF(ucvt);
617 return NULL;
618 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 if (self->codec->encinit != NULL &&
621 self->codec->encinit(&state, self->codec->config) != 0)
622 goto errorexit;
623 r = multibytecodec_encode(self->codec, &state,
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400624 input, NULL, errorcb,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 MBENC_FLUSH | MBENC_RESET);
626 if (r == NULL)
627 goto errorexit;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 ERROR_DECREF(errorcb);
630 Py_XDECREF(ucvt);
631 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
633errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 ERROR_DECREF(errorcb);
635 Py_XDECREF(ucvt);
636 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000637}
638
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400639/*[clinic input]
640_multibytecodec.MultibyteCodec.decode
641
642 input: Py_buffer
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300643 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400644
645Decodes 'input'.
646
647'errors' may be given to set a different error handling scheme. Default is
648'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
649values are 'ignore' and 'replace' as well as any other name registered with
650codecs.register_error that is able to handle UnicodeDecodeErrors."
651[clinic start generated code]*/
652
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000653static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400654_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
655 Py_buffer *input,
656 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300657/*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 MultibyteCodec_State state;
660 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200661 PyObject *errorcb, *res;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400662 const char *data;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200663 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000664
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400665 data = input->buf;
666 datalen = input->len;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 errorcb = internal_error_callback(errors);
669 if (errorcb == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 return NULL;
671 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 if (datalen == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 ERROR_DECREF(errorcb);
Victor Stinnerb37b1742011-12-01 03:18:59 +0100675 return make_tuple(PyUnicode_New(0, 0), 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000677
Victor Stinner8f674cc2013-04-17 23:02:17 +0200678 _PyUnicodeWriter_Init(&buf.writer);
679 buf.writer.min_length = datalen;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 buf.excobj = NULL;
681 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
682 buf.inbuf_end = buf.inbuf_top + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 if (self->codec->decinit != NULL &&
685 self->codec->decinit(&state, self->codec->config) != 0)
686 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 while (buf.inbuf < buf.inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200689 Py_ssize_t inleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 r = self->codec->decode(&state, self->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200694 &buf.inbuf, inleft, &buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 if (r == 0)
696 break;
697 else if (multibytecodec_decerror(self->codec, &state,
698 &buf, errorcb, r))
699 goto errorexit;
700 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000701
Victor Stinnera0dd0212013-04-11 22:09:04 +0200702 res = _PyUnicodeWriter_Finish(&buf.writer);
703 if (res == NULL)
704 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 Py_XDECREF(buf.excobj);
707 ERROR_DECREF(errorcb);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200708 return make_tuple(res, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000709
710errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 ERROR_DECREF(errorcb);
712 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200713 _PyUnicodeWriter_Dealloc(&buf.writer);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716}
717
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000718static struct PyMethodDef multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400719 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
720 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
721 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000722};
723
Victor Stinner11ef53a2021-01-08 15:43:59 +0100724static int
725multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
726{
727 Py_VISIT(Py_TYPE(self));
728 return 0;
729}
730
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000731static void
732multibytecodec_dealloc(MultibyteCodecObject *self)
733{
Victor Stinner11ef53a2021-01-08 15:43:59 +0100734 PyObject_GC_UnTrack(self);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100735 PyTypeObject *tp = Py_TYPE(self);
Victor Stinner11ef53a2021-01-08 15:43:59 +0100736 tp->tp_free(self);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100737 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000738}
739
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100740static PyType_Slot multibytecodec_slots[] = {
741 {Py_tp_dealloc, multibytecodec_dealloc},
742 {Py_tp_getattro, PyObject_GenericGetAttr},
743 {Py_tp_methods, multibytecodec_methods},
Victor Stinner11ef53a2021-01-08 15:43:59 +0100744 {Py_tp_traverse, multibytecodec_traverse},
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100745 {0, NULL},
746};
747
748static PyType_Spec multibytecodec_spec = {
749 .name = MODULE_NAME ".MultibyteCodec",
750 .basicsize = sizeof(MultibyteCodecObject),
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +0200751 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
752 Py_TPFLAGS_DISALLOW_INSTANTIATION),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100753 .slots = multibytecodec_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000754};
755
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000756
757/**
758 * Utility functions for stateful codec mechanism
759 */
760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
762#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000763
764static PyObject *
765encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 PyObject *unistr, int final)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000767{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 PyObject *ucvt, *r = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200769 PyObject *inbuf = NULL;
770 Py_ssize_t inpos, datalen;
771 PyObject *origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 if (PyUnicode_Check(unistr))
774 ucvt = NULL;
775 else {
776 unistr = ucvt = PyObject_Str(unistr);
777 if (unistr == NULL)
778 return NULL;
779 else if (!PyUnicode_Check(unistr)) {
780 PyErr_SetString(PyExc_TypeError,
Benjamin Petersonaff47232012-12-02 10:53:41 -0500781 "couldn't convert the object to str.");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 Py_DECREF(ucvt);
783 return NULL;
784 }
785 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000786
Victor Stinnerd9491262013-04-14 02:06:32 +0200787 if (ctx->pending) {
788 PyObject *inbuf_tmp;
789
790 Py_INCREF(ctx->pending);
791 origpending = ctx->pending;
792
793 Py_INCREF(ctx->pending);
794 inbuf_tmp = ctx->pending;
795 PyUnicode_Append(&inbuf_tmp, unistr);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 if (inbuf_tmp == NULL)
797 goto errorexit;
Victor Stinnerd9491262013-04-14 02:06:32 +0200798 Py_CLEAR(ctx->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 inbuf = inbuf_tmp;
800 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200801 else {
802 origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000803
Victor Stinnerd9491262013-04-14 02:06:32 +0200804 Py_INCREF(unistr);
805 inbuf = unistr;
806 }
807 if (PyUnicode_READY(inbuf) < 0)
808 goto errorexit;
809 inpos = 0;
810 datalen = PyUnicode_GET_LENGTH(inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 r = multibytecodec_encode(ctx->codec, &ctx->state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200813 inbuf, &inpos,
814 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 if (r == NULL) {
816 /* recover the original pending buffer */
Serhiy Storchaka48842712016-04-06 09:45:48 +0300817 Py_XSETREF(ctx->pending, origpending);
Victor Stinnerd9491262013-04-14 02:06:32 +0200818 origpending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 goto errorexit;
820 }
Victor Stinner322cc742013-04-14 18:11:41 +0200821 Py_XDECREF(origpending);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000822
Victor Stinnerd9491262013-04-14 02:06:32 +0200823 if (inpos < datalen) {
824 if (datalen - inpos > MAXENCPENDING) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 /* normal codecs can't reach here */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 PyErr_SetString(PyExc_UnicodeError,
827 "pending buffer overflow");
828 goto errorexit;
829 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200830 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
831 if (ctx->pending == NULL) {
832 /* normal codecs can't reach here */
833 goto errorexit;
834 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000836
Victor Stinner322cc742013-04-14 18:11:41 +0200837 Py_DECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 Py_XDECREF(ucvt);
839 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000840
841errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 Py_XDECREF(r);
843 Py_XDECREF(ucvt);
Victor Stinnerd9491262013-04-14 02:06:32 +0200844 Py_XDECREF(origpending);
Victor Stinner322cc742013-04-14 18:11:41 +0200845 Py_XDECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000847}
848
849static int
850decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 Py_ssize_t npendings;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
856 if (npendings + ctx->pendingsize > MAXDECPENDING ||
857 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
858 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
859 return -1;
860 }
861 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
862 ctx->pendingsize += npendings;
863 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000864}
865
866static int
867decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 Py_ssize_t size)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
871 buf->inbuf_end = buf->inbuf_top + size;
Victor Stinner8f674cc2013-04-17 23:02:17 +0200872 buf->writer.min_length += size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000874}
875
876static int
877decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 while (buf->inbuf < buf->inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200881 Py_ssize_t inleft;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 Py_ssize_t r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200887 &buf->inbuf, inleft, &buf->writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 if (r == 0 || r == MBERR_TOOFEW)
889 break;
890 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
891 buf, ctx->errors, r))
892 return -1;
893 }
894 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000895}
896
897
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400898/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400899_multibytecodec.MultibyteIncrementalEncoder.encode
900
901 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200902 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400903[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000904
905static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400906_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
907 PyObject *input,
908 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200909/*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000910{
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400911 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000912}
913
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400914/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000915_multibytecodec.MultibyteIncrementalEncoder.getstate
916[clinic start generated code]*/
917
918static PyObject *
919_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
920/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
921{
922 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
923 for UTF-8 encoded buffer (each character can use up to 4
924 bytes), and required bytes for MultibyteCodec_State.c. A byte
925 array is used to avoid different compilers generating different
926 values for the same state, e.g. as a result of struct padding.
927 */
928 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
929 Py_ssize_t statesize;
930 const char *pendingbuffer = NULL;
931 Py_ssize_t pendingsize;
932
933 if (self->pending != NULL) {
934 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
935 if (pendingbuffer == NULL) {
936 return NULL;
937 }
938 if (pendingsize > MAXENCPENDING*4) {
939 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
940 return NULL;
941 }
Victor Stinnercdbcb772018-11-22 10:25:46 +0100942 statebytes[0] = (unsigned char)pendingsize;
943 memcpy(statebytes + 1, pendingbuffer, pendingsize);
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000944 statesize = 1 + pendingsize;
945 } else {
946 statebytes[0] = 0;
947 statesize = 1;
948 }
949 memcpy(statebytes+statesize, self->state.c,
950 sizeof(self->state.c));
951 statesize += sizeof(self->state.c);
952
953 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
954 1 /* little-endian */ ,
955 0 /* unsigned */ );
956}
957
958/*[clinic input]
959_multibytecodec.MultibyteIncrementalEncoder.setstate
960 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
961 /
962[clinic start generated code]*/
963
964static PyObject *
965_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
966 PyLongObject *statelong)
967/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
968{
969 PyObject *pending = NULL;
970 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
971
972 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
973 1 /* little-endian */ ,
974 0 /* unsigned */ ) < 0) {
975 goto errorexit;
976 }
977
978 if (statebytes[0] > MAXENCPENDING*4) {
979 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
980 return NULL;
981 }
982
983 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
984 statebytes[0], "strict");
985 if (pending == NULL) {
986 goto errorexit;
987 }
988
989 Py_CLEAR(self->pending);
990 self->pending = pending;
991 memcpy(self->state.c, statebytes+1+statebytes[0],
992 sizeof(self->state.c));
993
994 Py_RETURN_NONE;
995
996errorexit:
997 Py_XDECREF(pending);
998 return NULL;
999}
1000
1001/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001002_multibytecodec.MultibyteIncrementalEncoder.reset
1003[clinic start generated code]*/
1004
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001005static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001006_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1007/*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001008{
Victor Stinnere15dce32011-05-30 22:56:00 +02001009 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1010 unsigned char buffer[4], *outbuf;
1011 Py_ssize_t r;
1012 if (self->codec->encreset != NULL) {
1013 outbuf = buffer;
1014 r = self->codec->encreset(&self->state, self->codec->config,
1015 &outbuf, sizeof(buffer));
1016 if (r != 0)
1017 return NULL;
1018 }
Victor Stinnerd9491262013-04-14 02:06:32 +02001019 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001021}
1022
1023static struct PyMethodDef mbiencoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001024 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001025 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1026 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001027 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1028 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001029};
1030
1031static PyObject *
1032mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 MultibyteIncrementalEncoderObject *self;
1035 PyObject *codec = NULL;
1036 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1039 incnewkwarglist, &errors))
1040 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1043 if (self == NULL)
1044 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1047 if (codec == NULL)
1048 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001049
1050 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1051 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1053 goto errorexit;
1054 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 self->codec = ((MultibyteCodecObject *)codec)->codec;
Victor Stinnerd9491262013-04-14 02:06:32 +02001057 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 self->errors = internal_error_callback(errors);
1059 if (self->errors == NULL)
1060 goto errorexit;
1061 if (self->codec->encinit != NULL &&
1062 self->codec->encinit(&self->state, self->codec->config) != 0)
1063 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_DECREF(codec);
1066 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001067
1068errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 Py_XDECREF(self);
1070 Py_XDECREF(codec);
1071 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072}
1073
1074static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001075mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001078}
1079
1080static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001081mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 if (ERROR_ISCUSTOM(self->errors))
1085 Py_VISIT(self->errors);
1086 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001087}
1088
1089static void
1090mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1091{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001092 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 PyObject_GC_UnTrack(self);
1094 ERROR_DECREF(self->errors);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001095 Py_CLEAR(self->pending);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001096 tp->tp_free(self);
1097 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098}
1099
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001100static PyType_Slot encoder_slots[] = {
1101 {Py_tp_dealloc, mbiencoder_dealloc},
1102 {Py_tp_getattro, PyObject_GenericGetAttr},
1103 {Py_tp_traverse, mbiencoder_traverse},
1104 {Py_tp_methods, mbiencoder_methods},
1105 {Py_tp_getset, codecctx_getsets},
1106 {Py_tp_init, mbiencoder_init},
1107 {Py_tp_new, mbiencoder_new},
1108 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001109};
1110
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001111static PyType_Spec encoder_spec = {
1112 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1113 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1114 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1115 .slots = encoder_slots,
1116};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001117
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001118
1119/*[clinic input]
1120_multibytecodec.MultibyteIncrementalDecoder.decode
1121
1122 input: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001123 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001124[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001125
1126static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001127_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1128 Py_buffer *input,
1129 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001130/*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 MultibyteDecodeBuffer buf;
1133 char *data, *wdata = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001134 Py_ssize_t wsize, size, origpending;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001135 PyObject *res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001136
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001137 data = input->buf;
1138 size = input->len;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001139
Victor Stinner8f674cc2013-04-17 23:02:17 +02001140 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001141 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 origpending = self->pendingsize;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 if (self->pendingsize == 0) {
1145 wsize = size;
1146 wdata = data;
1147 }
1148 else {
1149 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1150 PyErr_NoMemory();
1151 goto errorexit;
1152 }
1153 wsize = size + self->pendingsize;
1154 wdata = PyMem_Malloc(wsize);
Victor Stinner33283ba2013-07-15 17:47:39 +02001155 if (wdata == NULL) {
1156 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 goto errorexit;
Victor Stinner33283ba2013-07-15 17:47:39 +02001158 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 memcpy(wdata, self->pending, self->pendingsize);
1160 memcpy(wdata + self->pendingsize, data, size);
1161 self->pendingsize = 0;
1162 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001163
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1165 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1168 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 if (final && buf.inbuf < buf.inbuf_end) {
1171 if (multibytecodec_decerror(self->codec, &self->state,
1172 &buf, self->errors, MBERR_TOOFEW)) {
1173 /* recover the original pending buffer */
1174 memcpy(self->pending, wdata, origpending);
1175 self->pendingsize = origpending;
1176 goto errorexit;
1177 }
1178 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1181 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1182 goto errorexit;
1183 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001184
Victor Stinnera0dd0212013-04-11 22:09:04 +02001185 res = _PyUnicodeWriter_Finish(&buf.writer);
1186 if (res == NULL)
1187 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 if (wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001190 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001192 return res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001193
1194errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 if (wdata != NULL && wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001196 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001198 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001200}
1201
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001202/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001203_multibytecodec.MultibyteIncrementalDecoder.getstate
1204[clinic start generated code]*/
1205
1206static PyObject *
1207_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1208/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1209{
1210 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001211 PyObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001212
1213 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1214 self->pendingsize);
1215 if (buffer == NULL) {
1216 return NULL;
1217 }
1218
Christopher Thorne488c0a62018-11-02 03:29:40 +00001219 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1220 sizeof(self->state.c),
1221 1 /* little-endian */ ,
1222 0 /* unsigned */ );
1223 if (statelong == NULL) {
1224 Py_DECREF(buffer);
1225 return NULL;
1226 }
1227
1228 return Py_BuildValue("NN", buffer, statelong);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001229}
1230
1231/*[clinic input]
1232_multibytecodec.MultibyteIncrementalDecoder.setstate
1233 state: object(subclass_of='&PyTuple_Type')
1234 /
1235[clinic start generated code]*/
1236
1237static PyObject *
1238_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1239 PyObject *state)
1240/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1241{
1242 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001243 PyLongObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001244 Py_ssize_t buffersize;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001245 const char *bufferstr;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001246 unsigned char statebytes[8];
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001247
Christopher Thorne488c0a62018-11-02 03:29:40 +00001248 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1249 &buffer, &PyLong_Type, &statelong))
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001250 {
1251 return NULL;
1252 }
1253
Christopher Thorne488c0a62018-11-02 03:29:40 +00001254 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1255 1 /* little-endian */ ,
1256 0 /* unsigned */ ) < 0) {
1257 return NULL;
1258 }
1259
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001260 buffersize = PyBytes_Size(buffer);
1261 if (buffersize == -1) {
1262 return NULL;
1263 }
1264
1265 if (buffersize > MAXDECPENDING) {
1266 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1267 return NULL;
1268 }
1269
1270 bufferstr = PyBytes_AsString(buffer);
1271 if (bufferstr == NULL) {
1272 return NULL;
1273 }
1274 self->pendingsize = buffersize;
1275 memcpy(self->pending, bufferstr, self->pendingsize);
Christopher Thorne488c0a62018-11-02 03:29:40 +00001276 memcpy(self->state.c, statebytes, sizeof(statebytes));
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001277
1278 Py_RETURN_NONE;
1279}
1280
1281/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001282_multibytecodec.MultibyteIncrementalDecoder.reset
1283[clinic start generated code]*/
1284
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001285static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001286_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1287/*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001288{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001289 if (self->codec->decreset != NULL &&
1290 self->codec->decreset(&self->state, self->codec->config) != 0)
1291 return NULL;
1292 self->pendingsize = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001293
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001295}
1296
1297static struct PyMethodDef mbidecoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001298 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001299 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1300 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001301 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1302 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001303};
1304
1305static PyObject *
1306mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1307{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 MultibyteIncrementalDecoderObject *self;
1309 PyObject *codec = NULL;
1310 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1313 incnewkwarglist, &errors))
1314 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001315
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1317 if (self == NULL)
1318 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1321 if (codec == NULL)
1322 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001323
1324 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1325 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1327 goto errorexit;
1328 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 self->codec = ((MultibyteCodecObject *)codec)->codec;
1331 self->pendingsize = 0;
1332 self->errors = internal_error_callback(errors);
1333 if (self->errors == NULL)
1334 goto errorexit;
1335 if (self->codec->decinit != NULL &&
1336 self->codec->decinit(&self->state, self->codec->config) != 0)
1337 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 Py_DECREF(codec);
1340 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001341
1342errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 Py_XDECREF(self);
1344 Py_XDECREF(codec);
1345 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001346}
1347
1348static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001349mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1350{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001352}
1353
1354static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001355mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001356 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (ERROR_ISCUSTOM(self->errors))
1359 Py_VISIT(self->errors);
1360 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001361}
1362
1363static void
1364mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1365{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001366 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 PyObject_GC_UnTrack(self);
1368 ERROR_DECREF(self->errors);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001369 tp->tp_free(self);
1370 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001371}
1372
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001373static PyType_Slot decoder_slots[] = {
1374 {Py_tp_dealloc, mbidecoder_dealloc},
1375 {Py_tp_getattro, PyObject_GenericGetAttr},
1376 {Py_tp_traverse, mbidecoder_traverse},
1377 {Py_tp_methods, mbidecoder_methods},
1378 {Py_tp_getset, codecctx_getsets},
1379 {Py_tp_init, mbidecoder_init},
1380 {Py_tp_new, mbidecoder_new},
1381 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001382};
1383
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001384static PyType_Spec decoder_spec = {
1385 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1386 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1387 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1388 .slots = decoder_slots,
1389};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001390
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001391static PyObject *
1392mbstreamreader_iread(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001394{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001396 PyObject *cres, *res;
1397 Py_ssize_t rsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (sizehint == 0)
Victor Stinnerb37b1742011-12-01 03:18:59 +01001400 return PyUnicode_New(0, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001401
Victor Stinner8f674cc2013-04-17 23:02:17 +02001402 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001403 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 for (;;) {
1407 int endoffile;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (sizehint < 0)
1410 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001411 method, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 else
1413 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001414 method, "i", sizehint);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 if (cres == NULL)
1416 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (!PyBytes_Check(cres)) {
1419 PyErr_Format(PyExc_TypeError,
1420 "stream function returned a "
1421 "non-bytes object (%.100s)",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001422 Py_TYPE(cres)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 goto errorexit;
1424 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 endoffile = (PyBytes_GET_SIZE(cres) == 0);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 if (self->pendingsize > 0) {
1429 PyObject *ctr;
1430 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1433 PyErr_NoMemory();
1434 goto errorexit;
Victor Stinner064bbdc2013-07-08 22:28:27 +02001435 }
1436 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1437 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1438 if (ctr == NULL)
1439 goto errorexit;
1440 ctrdata = PyBytes_AS_STRING(ctr);
1441 memcpy(ctrdata, self->pending, self->pendingsize);
1442 memcpy(ctrdata + self->pendingsize,
1443 PyBytes_AS_STRING(cres),
1444 PyBytes_GET_SIZE(cres));
1445 Py_DECREF(cres);
1446 cres = ctr;
1447 self->pendingsize = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 rsize = PyBytes_GET_SIZE(cres);
1451 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1452 rsize) != 0)
1453 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (rsize > 0 && decoder_feed_buffer(
1456 (MultibyteStatefulDecoderContext *)self, &buf))
1457 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 if (endoffile || sizehint < 0) {
1460 if (buf.inbuf < buf.inbuf_end &&
1461 multibytecodec_decerror(self->codec, &self->state,
1462 &buf, self->errors, MBERR_TOOFEW))
1463 goto errorexit;
1464 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1467 if (decoder_append_pending(STATEFUL_DCTX(self),
1468 &buf) != 0)
1469 goto errorexit;
1470 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 Py_DECREF(cres);
1473 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001474
Victor Stinnera0dd0212013-04-11 22:09:04 +02001475 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 sizehint = 1; /* read 1 more byte and retry */
1479 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001480
Victor Stinnera0dd0212013-04-11 22:09:04 +02001481 res = _PyUnicodeWriter_Finish(&buf.writer);
1482 if (res == NULL)
1483 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 Py_XDECREF(cres);
1486 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001487 return res;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001488
1489errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 Py_XDECREF(cres);
1491 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001492 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001494}
1495
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001496/*[clinic input]
1497 _multibytecodec.MultibyteStreamReader.read
1498
1499 sizeobj: object = None
1500 /
1501[clinic start generated code]*/
1502
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001503static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001504_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1505 PyObject *sizeobj)
1506/*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001507{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001509
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001510 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 size = -1;
1512 else if (PyLong_Check(sizeobj))
1513 size = PyLong_AsSsize_t(sizeobj);
1514 else {
1515 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1516 return NULL;
1517 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 if (size == -1 && PyErr_Occurred())
1520 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001523}
1524
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001525/*[clinic input]
1526 _multibytecodec.MultibyteStreamReader.readline
1527
1528 sizeobj: object = None
1529 /
1530[clinic start generated code]*/
1531
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001532static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001533_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1534 PyObject *sizeobj)
1535/*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001536{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001538
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001539 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 size = -1;
1541 else if (PyLong_Check(sizeobj))
1542 size = PyLong_AsSsize_t(sizeobj);
1543 else {
1544 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1545 return NULL;
1546 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (size == -1 && PyErr_Occurred())
1549 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001550
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001552}
1553
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001554/*[clinic input]
1555 _multibytecodec.MultibyteStreamReader.readlines
1556
1557 sizehintobj: object = None
1558 /
1559[clinic start generated code]*/
1560
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001561static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001562_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1563 PyObject *sizehintobj)
1564/*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001565{
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001566 PyObject *r, *sr;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001568
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001569 if (sizehintobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 sizehint = -1;
1571 else if (PyLong_Check(sizehintobj))
1572 sizehint = PyLong_AsSsize_t(sizehintobj);
1573 else {
1574 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1575 return NULL;
1576 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 if (sizehint == -1 && PyErr_Occurred())
1579 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 r = mbstreamreader_iread(self, "read", sizehint);
1582 if (r == NULL)
1583 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 sr = PyUnicode_Splitlines(r, 1);
1586 Py_DECREF(r);
1587 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001588}
1589
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001590/*[clinic input]
1591 _multibytecodec.MultibyteStreamReader.reset
1592[clinic start generated code]*/
1593
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001594static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001595_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1596/*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001597{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 if (self->codec->decreset != NULL &&
1599 self->codec->decreset(&self->state, self->codec->config) != 0)
1600 return NULL;
1601 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001602
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001604}
1605
1606static struct PyMethodDef mbstreamreader_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001607 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1608 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1609 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1610 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001612};
1613
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001614static PyMemberDef mbstreamreader_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 {"stream", T_OBJECT,
1616 offsetof(MultibyteStreamReaderObject, stream),
1617 READONLY, NULL},
1618 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001619};
1620
1621static PyObject *
1622mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1623{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 MultibyteStreamReaderObject *self;
1625 PyObject *stream, *codec = NULL;
1626 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1629 streamkwarglist, &stream, &errors))
1630 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001631
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1633 if (self == NULL)
1634 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1637 if (codec == NULL)
1638 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001639
1640 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1641 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1643 goto errorexit;
1644 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 self->codec = ((MultibyteCodecObject *)codec)->codec;
1647 self->stream = stream;
1648 Py_INCREF(stream);
1649 self->pendingsize = 0;
1650 self->errors = internal_error_callback(errors);
1651 if (self->errors == NULL)
1652 goto errorexit;
1653 if (self->codec->decinit != NULL &&
1654 self->codec->decinit(&self->state, self->codec->config) != 0)
1655 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 Py_DECREF(codec);
1658 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001659
1660errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 Py_XDECREF(self);
1662 Py_XDECREF(codec);
1663 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001664}
1665
1666static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001667mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1668{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001670}
1671
1672static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001673mbstreamreader_traverse(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001675{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 if (ERROR_ISCUSTOM(self->errors))
1677 Py_VISIT(self->errors);
1678 Py_VISIT(self->stream);
1679 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001680}
1681
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001682static void
1683mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1684{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001685 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 PyObject_GC_UnTrack(self);
1687 ERROR_DECREF(self->errors);
1688 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001689 tp->tp_free(self);
1690 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001691}
1692
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001693static PyType_Slot reader_slots[] = {
1694 {Py_tp_dealloc, mbstreamreader_dealloc},
1695 {Py_tp_getattro, PyObject_GenericGetAttr},
1696 {Py_tp_traverse, mbstreamreader_traverse},
1697 {Py_tp_methods, mbstreamreader_methods},
1698 {Py_tp_members, mbstreamreader_members},
1699 {Py_tp_getset, codecctx_getsets},
1700 {Py_tp_init, mbstreamreader_init},
1701 {Py_tp_new, mbstreamreader_new},
1702 {0, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001703};
1704
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001705static PyType_Spec reader_spec = {
1706 .name = MODULE_NAME ".MultibyteStreamReader",
1707 .basicsize = sizeof(MultibyteStreamReaderObject),
1708 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1709 .slots = reader_slots,
1710};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001711
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001712static int
1713mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001715{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 PyObject *str, *wr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1719 if (str == NULL)
1720 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001721
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001722 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001723 Py_DECREF(str);
1724 if (wr == NULL)
1725 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 Py_DECREF(wr);
1728 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001729}
1730
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001731/*[clinic input]
1732 _multibytecodec.MultibyteStreamWriter.write
1733
1734 strobj: object
1735 /
1736[clinic start generated code]*/
1737
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001738static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001739_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1740 PyObject *strobj)
1741/*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001742{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 if (mbstreamwriter_iwrite(self, strobj))
1744 return NULL;
1745 else
1746 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001747}
1748
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001749/*[clinic input]
1750 _multibytecodec.MultibyteStreamWriter.writelines
1751
1752 lines: object
1753 /
1754[clinic start generated code]*/
1755
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001756static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001757_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1758 PyObject *lines)
1759/*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001760{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 PyObject *strobj;
1762 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 if (!PySequence_Check(lines)) {
1765 PyErr_SetString(PyExc_TypeError,
1766 "arg must be a sequence object");
1767 return NULL;
1768 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 for (i = 0; i < PySequence_Length(lines); i++) {
1771 /* length can be changed even within this loop */
1772 strobj = PySequence_GetItem(lines, i);
1773 if (strobj == NULL)
1774 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001775
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001776 r = mbstreamwriter_iwrite(self, strobj);
1777 Py_DECREF(strobj);
1778 if (r == -1)
1779 return NULL;
1780 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001781 /* PySequence_Length() can fail */
1782 if (PyErr_Occurred())
1783 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001786}
1787
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001788/*[clinic input]
1789 _multibytecodec.MultibyteStreamWriter.reset
1790[clinic start generated code]*/
1791
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001792static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001793_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1794/*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001796 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001797
Victor Stinner579db162015-07-16 22:17:31 +02001798 if (!self->pending)
1799 Py_RETURN_NONE;
1800
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001801 pwrt = multibytecodec_encode(self->codec, &self->state,
Victor Stinnerd9491262013-04-14 02:06:32 +02001802 self->pending, NULL, self->errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 MBENC_FLUSH | MBENC_RESET);
1804 /* some pending buffer can be truncated when UnicodeEncodeError is
1805 * raised on 'strict' mode. but, 'reset' method is designed to
1806 * reset the pending buffer or states so failed string sequence
1807 * ought to be missed */
Victor Stinnerd9491262013-04-14 02:06:32 +02001808 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001809 if (pwrt == NULL)
1810 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 assert(PyBytes_Check(pwrt));
1813 if (PyBytes_Size(pwrt) > 0) {
1814 PyObject *wr;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001815
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001816 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 if (wr == NULL) {
1818 Py_DECREF(pwrt);
1819 return NULL;
1820 }
1821 }
1822 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001825}
1826
1827static PyObject *
1828mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1829{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001830 MultibyteStreamWriterObject *self;
1831 PyObject *stream, *codec = NULL;
1832 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1835 streamkwarglist, &stream, &errors))
1836 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1839 if (self == NULL)
1840 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1843 if (codec == NULL)
1844 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001845
1846 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1847 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001848 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1849 goto errorexit;
1850 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001851
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001852 self->codec = ((MultibyteCodecObject *)codec)->codec;
1853 self->stream = stream;
1854 Py_INCREF(stream);
Victor Stinnerd9491262013-04-14 02:06:32 +02001855 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 self->errors = internal_error_callback(errors);
1857 if (self->errors == NULL)
1858 goto errorexit;
1859 if (self->codec->encinit != NULL &&
1860 self->codec->encinit(&self->state, self->codec->config) != 0)
1861 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001862
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001863 Py_DECREF(codec);
1864 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001865
1866errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 Py_XDECREF(self);
1868 Py_XDECREF(codec);
1869 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001870}
1871
1872static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001873mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1874{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001875 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001876}
1877
1878static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001879mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001880 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 if (ERROR_ISCUSTOM(self->errors))
1883 Py_VISIT(self->errors);
1884 Py_VISIT(self->stream);
1885 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001886}
1887
1888static void
1889mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1890{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001891 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001892 PyObject_GC_UnTrack(self);
1893 ERROR_DECREF(self->errors);
1894 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001895 tp->tp_free(self);
1896 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001897}
1898
1899static struct PyMethodDef mbstreamwriter_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001900 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1901 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1902 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1903 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001904};
1905
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001906static PyMemberDef mbstreamwriter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001907 {"stream", T_OBJECT,
1908 offsetof(MultibyteStreamWriterObject, stream),
1909 READONLY, NULL},
1910 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001911};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001912
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001913static PyType_Slot writer_slots[] = {
1914 {Py_tp_dealloc, mbstreamwriter_dealloc},
1915 {Py_tp_getattro, PyObject_GenericGetAttr},
1916 {Py_tp_traverse, mbstreamwriter_traverse},
1917 {Py_tp_methods, mbstreamwriter_methods},
1918 {Py_tp_members, mbstreamwriter_members},
1919 {Py_tp_getset, codecctx_getsets},
1920 {Py_tp_init, mbstreamwriter_init},
1921 {Py_tp_new, mbstreamwriter_new},
1922 {0, NULL},
1923};
1924
1925static PyType_Spec writer_spec = {
1926 .name = MODULE_NAME ".MultibyteStreamWriter",
1927 .basicsize = sizeof(MultibyteStreamWriterObject),
1928 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1929 .slots = writer_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001930};
1931
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001932
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001933/*[clinic input]
1934_multibytecodec.__create_codec
1935
1936 arg: object
1937 /
1938[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001939
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001940static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001941_multibytecodec___create_codec(PyObject *module, PyObject *arg)
1942/*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001943{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944 MultibyteCodecObject *self;
1945 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1948 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1949 return NULL;
1950 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1953 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1954 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001955
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001956 _multibytecodec_state *state = _multibytecodec_get_state(module);
Victor Stinner11ef53a2021-01-08 15:43:59 +01001957 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 if (self == NULL)
1959 return NULL;
1960 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001961
Victor Stinner11ef53a2021-01-08 15:43:59 +01001962 PyObject_GC_Track(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001964}
1965
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001966static int
1967_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1968{
1969 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1970 Py_VISIT(state->multibytecodec_type);
1971 Py_VISIT(state->encoder_type);
1972 Py_VISIT(state->decoder_type);
1973 Py_VISIT(state->reader_type);
1974 Py_VISIT(state->writer_type);
1975 return 0;
1976}
1977
1978static int
1979_multibytecodec_clear(PyObject *mod)
1980{
1981 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1982 Py_CLEAR(state->multibytecodec_type);
1983 Py_CLEAR(state->encoder_type);
1984 Py_CLEAR(state->decoder_type);
1985 Py_CLEAR(state->reader_type);
1986 Py_CLEAR(state->writer_type);
1987 return 0;
1988}
1989
1990static void
1991_multibytecodec_free(void *mod)
1992{
1993 _multibytecodec_clear((PyObject *)mod);
1994}
1995
1996#define CREATE_TYPE(module, type, spec) \
1997 do { \
1998 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
1999 if (!type) { \
2000 return -1; \
2001 } \
2002 } while (0)
2003
2004#define ADD_TYPE(module, type) \
2005 do { \
2006 if (PyModule_AddType(module, type) < 0) { \
2007 return -1; \
2008 } \
2009 } while (0)
2010
2011static int
2012_multibytecodec_exec(PyObject *mod)
2013{
2014 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2015 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2016 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2017 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2018 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2019 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2020
2021 ADD_TYPE(mod, state->encoder_type);
2022 ADD_TYPE(mod, state->decoder_type);
2023 ADD_TYPE(mod, state->reader_type);
2024 ADD_TYPE(mod, state->writer_type);
2025 return 0;
2026}
2027
2028#undef CREATE_TYPE
2029#undef ADD_TYPE
2030
2031static struct PyMethodDef _multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04002032 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002033 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002034};
2035
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002036static PyModuleDef_Slot _multibytecodec_slots[] = {
2037 {Py_mod_exec, _multibytecodec_exec},
2038 {0, NULL}
2039};
Martin v. Löwis1a214512008-06-11 05:26:20 +00002040
2041static struct PyModuleDef _multibytecodecmodule = {
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002042 .m_base = PyModuleDef_HEAD_INIT,
2043 .m_name = "_multibytecodec",
2044 .m_size = sizeof(_multibytecodec_state),
2045 .m_methods = _multibytecodec_methods,
2046 .m_slots = _multibytecodec_slots,
2047 .m_traverse = _multibytecodec_traverse,
2048 .m_clear = _multibytecodec_clear,
2049 .m_free = _multibytecodec_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00002050};
2051
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002052PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002053PyInit__multibytecodec(void)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002054{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002055 return PyModuleDef_Init(&_multibytecodecmodule);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002056}