blob: 4f34b8a82fb94db7737f460616a592c7fcad198c [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02009#include "structmember.h" // PyMemberDef
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010#include "multibytecodec.h"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040011#include "clinic/multibytecodec.c.h"
12
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010013#define MODULE_NAME "_multibytecodec"
14
15typedef struct {
16 PyTypeObject *encoder_type;
17 PyTypeObject *decoder_type;
18 PyTypeObject *reader_type;
19 PyTypeObject *writer_type;
20 PyTypeObject *multibytecodec_type;
21} _multibytecodec_state;
22
23static _multibytecodec_state *
24_multibytecodec_get_state(PyObject *module)
25{
26 _multibytecodec_state *state = PyModule_GetState(module);
27 assert(state != NULL);
28 return state;
29}
30
31static struct PyModuleDef _multibytecodecmodule;
32static _multibytecodec_state *
33_multibyte_codec_find_state_by_type(PyTypeObject *type)
34{
35 PyObject *module = _PyType_GetModuleByDef(type, &_multibytecodecmodule);
36 assert(module != NULL);
37 return _multibytecodec_get_state(module);
38}
39
40#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
Brett Cannonf2de1fc2014-08-22 11:45:03 -040041/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -040042module _multibytecodec
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010043class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
44class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
45class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
46class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
47class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040048[clinic start generated code]*/
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010049/*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
50#undef clinic_get_state
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000051
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000052typedef struct {
Victor Stinnerd9491262013-04-14 02:06:32 +020053 PyObject *inobj;
54 Py_ssize_t inpos, inlen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055 unsigned char *outbuf, *outbuf_end;
56 PyObject *excobj, *outobj;
57} MultibyteEncodeBuffer;
58
59typedef struct {
60 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
Victor Stinnera0dd0212013-04-11 22:09:04 +020061 PyObject *excobj;
62 _PyUnicodeWriter writer;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000063} MultibyteDecodeBuffer;
64
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065static char *incnewkwarglist[] = {"errors", NULL};
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000066static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000067
68static PyObject *multibytecodec_encode(MultibyteCodec *,
Victor Stinnerd9491262013-04-14 02:06:32 +020069 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000073
Victor Stinner3f36a572013-11-12 21:39:02 +010074_Py_IDENTIFIER(write);
75
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000076static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000077make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 if (object == NULL)
82 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(object);
87 return NULL;
88 }
89 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 w = PyLong_FromSsize_t(len);
92 if (w == NULL) {
93 Py_DECREF(v);
94 return NULL;
95 }
96 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000099}
100
101static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000102internal_error_callback(const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (errors == NULL || strcmp(errors, "strict") == 0)
105 return ERROR_STRICT;
106 else if (strcmp(errors, "ignore") == 0)
107 return ERROR_IGNORE;
108 else if (strcmp(errors, "replace") == 0)
109 return ERROR_REPLACE;
110 else
111 return PyUnicode_FromString(errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112}
113
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000114static PyObject *
115call_error_callback(PyObject *errors, PyObject *exc)
116{
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200117 PyObject *cb, *r;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 assert(PyUnicode_Check(errors));
Serhiy Storchaka06515832016-11-20 09:13:07 +0200121 str = PyUnicode_AsUTF8(errors);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 if (str == NULL)
123 return NULL;
124 cb = PyCodec_LookupError(str);
125 if (cb == NULL)
126 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000127
Petr Viktorinffd97532020-02-11 17:46:57 +0100128 r = PyObject_CallOneArg(cb, exc);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_DECREF(cb);
130 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000131}
132
133static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200134codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 const char *errors;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 if (self->errors == ERROR_STRICT)
139 errors = "strict";
140 else if (self->errors == ERROR_IGNORE)
141 errors = "ignore";
142 else if (self->errors == ERROR_REPLACE)
143 errors = "replace";
144 else {
145 Py_INCREF(self->errors);
146 return self->errors;
147 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return PyUnicode_FromString(errors);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000150}
151
152static int
153codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 void *closure)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 PyObject *cb;
157 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000158
Zackery Spytz842acaa2018-12-17 07:52:45 -0700159 if (value == NULL) {
160 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
161 return -1;
162 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 if (!PyUnicode_Check(value)) {
164 PyErr_SetString(PyExc_TypeError, "errors must be a string");
165 return -1;
166 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000167
Serhiy Storchaka06515832016-11-20 09:13:07 +0200168 str = PyUnicode_AsUTF8(value);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 if (str == NULL)
170 return -1;
Neal Norwitz6ea45d32007-08-26 04:19:43 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 cb = internal_error_callback(str);
173 if (cb == NULL)
174 return -1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 ERROR_DECREF(self->errors);
177 self->errors = cb;
178 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000179}
180
181/* This getset handlers list is used by all the stateful codec objects */
182static PyGetSetDef codecctx_getsets[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 {"errors", (getter)codecctx_errors_get,
184 (setter)codecctx_errors_set,
185 PyDoc_STR("how to treat errors")},
186 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000187};
188
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000189static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000190expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 Py_ssize_t orgpos, orgsize, incsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 orgpos = (Py_ssize_t)((char *)buf->outbuf -
195 PyBytes_AS_STRING(buf->outobj));
196 orgsize = PyBytes_GET_SIZE(buf->outobj);
197 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000198
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200199 if (orgsize > PY_SSIZE_T_MAX - incsize) {
200 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return -1;
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200202 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
205 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
208 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
209 + PyBytes_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212}
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200213#define REQUIRE_ENCODEBUFFER(buf, s) do { \
214 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (expand_encodebuffer(buf, s) == -1) \
216 goto errorexit; \
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200217} while(0)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000218
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000219
220/**
221 * MultibyteCodec object
222 */
223
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000224static int
225multibytecodec_encerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 MultibyteCodec_State *state,
227 MultibyteEncodeBuffer *buf,
228 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 PyObject *retobj = NULL, *retstr = NULL, *tobj;
231 Py_ssize_t retstrsize, newpos;
232 Py_ssize_t esize, start, end;
233 const char *reason;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 if (e > 0) {
236 reason = "illegal multibyte sequence";
237 esize = e;
238 }
239 else {
240 switch (e) {
241 case MBERR_TOOSMALL:
242 REQUIRE_ENCODEBUFFER(buf, -1);
243 return 0; /* retry it */
244 case MBERR_TOOFEW:
245 reason = "incomplete multibyte sequence";
Victor Stinnerd9491262013-04-14 02:06:32 +0200246 esize = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 break;
248 case MBERR_INTERNAL:
249 PyErr_SetString(PyExc_RuntimeError,
250 "internal codec error");
251 return -1;
252 default:
253 PyErr_SetString(PyExc_RuntimeError,
254 "unknown runtime error");
255 return -1;
256 }
257 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 if (errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200260 PyObject *replchar;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 Py_ssize_t r;
Victor Stinnerd9491262013-04-14 02:06:32 +0200262 Py_ssize_t inpos;
263 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300264 const void *data;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Victor Stinnerd9491262013-04-14 02:06:32 +0200266 replchar = PyUnicode_FromOrdinal('?');
267 if (replchar == NULL)
268 goto errorexit;
269 kind = PyUnicode_KIND(replchar);
270 data = PyUnicode_DATA(replchar);
271
272 inpos = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 for (;;) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200274 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000275
Victor Stinnerd9491262013-04-14 02:06:32 +0200276 r = codec->encode(state, codec->config,
277 kind, data, &inpos, 1,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 &buf->outbuf, outleft, 0);
279 if (r == MBERR_TOOSMALL) {
280 REQUIRE_ENCODEBUFFER(buf, -1);
281 continue;
282 }
283 else
284 break;
285 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000286
Victor Stinnerd9491262013-04-14 02:06:32 +0200287 Py_DECREF(replchar);
288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 if (r != 0) {
290 REQUIRE_ENCODEBUFFER(buf, 1);
291 *buf->outbuf++ = '?';
292 }
293 }
294 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200295 buf->inpos += esize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 return 0;
297 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000298
Victor Stinnerd9491262013-04-14 02:06:32 +0200299 start = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 /* use cached exception object if available */
303 if (buf->excobj == NULL) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200304 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
305 "sOnns",
306 codec->encoding, buf->inobj,
307 start, end, reason);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 if (buf->excobj == NULL)
309 goto errorexit;
310 }
311 else
312 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
313 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
314 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
315 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (errors == ERROR_STRICT) {
318 PyCodec_StrictErrors(buf->excobj);
319 goto errorexit;
320 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 retobj = call_error_callback(errors, buf->excobj);
323 if (retobj == NULL)
324 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500327 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
329 PyErr_SetString(PyExc_TypeError,
330 "encoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500331 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 goto errorexit;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500335 if (PyUnicode_Check(tobj)) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200336 Py_ssize_t inpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000337
Victor Stinnerd9491262013-04-14 02:06:32 +0200338 retstr = multibytecodec_encode(codec, state, tobj,
339 &inpos, ERROR_STRICT,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 MBENC_FLUSH);
341 if (retstr == NULL)
342 goto errorexit;
343 }
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500344 else {
345 Py_INCREF(tobj);
346 retstr = tobj;
347 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 assert(PyBytes_Check(retstr));
350 retstrsize = PyBytes_GET_SIZE(retstr);
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200351 if (retstrsize > 0) {
352 REQUIRE_ENCODEBUFFER(buf, retstrsize);
353 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
354 buf->outbuf += retstrsize;
355 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
358 if (newpos < 0 && !PyErr_Occurred())
Victor Stinnerd9491262013-04-14 02:06:32 +0200359 newpos += (Py_ssize_t)buf->inlen;
360 if (newpos < 0 || newpos > buf->inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyErr_Clear();
362 PyErr_Format(PyExc_IndexError,
363 "position %zd from error handler out of bounds",
364 newpos);
365 goto errorexit;
366 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200367 buf->inpos = newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 Py_DECREF(retobj);
370 Py_DECREF(retstr);
371 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000372
373errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 Py_XDECREF(retobj);
375 Py_XDECREF(retstr);
376 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000377}
378
379static int
380multibytecodec_decerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 MultibyteCodec_State *state,
382 MultibyteDecodeBuffer *buf,
383 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 PyObject *retobj = NULL, *retuni = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200386 Py_ssize_t newpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 const char *reason;
388 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (e > 0) {
391 reason = "illegal multibyte sequence";
392 esize = e;
393 }
394 else {
395 switch (e) {
396 case MBERR_TOOSMALL:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 return 0; /* retry it */
398 case MBERR_TOOFEW:
399 reason = "incomplete multibyte sequence";
400 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
401 break;
402 case MBERR_INTERNAL:
403 PyErr_SetString(PyExc_RuntimeError,
404 "internal codec error");
405 return -1;
Victor Stinnerd1f99422013-07-16 21:41:43 +0200406 case MBERR_EXCEPTION:
407 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 default:
409 PyErr_SetString(PyExc_RuntimeError,
410 "unknown runtime error");
411 return -1;
412 }
413 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (errors == ERROR_REPLACE) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200416 if (_PyUnicodeWriter_WriteChar(&buf->writer,
417 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
418 goto errorexit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 }
420 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
421 buf->inbuf += esize;
422 return 0;
423 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
426 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 /* use cached exception object if available */
429 if (buf->excobj == NULL) {
430 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
431 (const char *)buf->inbuf_top,
432 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
433 start, end, reason);
434 if (buf->excobj == NULL)
435 goto errorexit;
436 }
437 else
438 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
439 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
440 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
441 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 if (errors == ERROR_STRICT) {
444 PyCodec_StrictErrors(buf->excobj);
445 goto errorexit;
446 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 retobj = call_error_callback(errors, buf->excobj);
449 if (retobj == NULL)
450 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
453 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
454 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
455 PyErr_SetString(PyExc_TypeError,
456 "decoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500457 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 goto errorexit;
459 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000460
Victor Stinnera0dd0212013-04-11 22:09:04 +0200461 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
Victor Stinner4eea8492011-11-21 03:01:27 +0100462 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
465 if (newpos < 0 && !PyErr_Occurred())
466 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
467 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
468 PyErr_Clear();
469 PyErr_Format(PyExc_IndexError,
470 "position %zd from error handler out of bounds",
471 newpos);
472 goto errorexit;
473 }
474 buf->inbuf = buf->inbuf_top + newpos;
475 Py_DECREF(retobj);
476 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000477
478errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 Py_XDECREF(retobj);
480 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481}
482
483static PyObject *
484multibytecodec_encode(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 MultibyteCodec_State *state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200486 PyObject *text, Py_ssize_t *inpos_t,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 MultibyteEncodeBuffer buf;
490 Py_ssize_t finalsize, r = 0;
Victor Stinnerd9491262013-04-14 02:06:32 +0200491 Py_ssize_t datalen;
492 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300493 const void *data;
Victor Stinnerd9491262013-04-14 02:06:32 +0200494
495 if (PyUnicode_READY(text) < 0)
496 return NULL;
497 datalen = PyUnicode_GET_LENGTH(text);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000498
Victor Stinner6bcbef72011-05-24 22:17:55 +0200499 if (datalen == 0 && !(flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 return PyBytes_FromStringAndSize(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 buf.excobj = NULL;
Brett Cannonb94767f2011-02-22 20:15:44 +0000503 buf.outobj = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200504 buf.inobj = text; /* borrowed reference */
505 buf.inpos = 0;
506 buf.inlen = datalen;
507 kind = PyUnicode_KIND(buf.inobj);
508 data = PyUnicode_DATA(buf.inobj);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
511 PyErr_NoMemory();
512 goto errorexit;
513 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
516 if (buf.outobj == NULL)
517 goto errorexit;
518 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
519 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000520
Victor Stinnerd9491262013-04-14 02:06:32 +0200521 while (buf.inpos < buf.inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 /* we don't reuse inleft and outleft here.
523 * error callbacks can relocate the cursor anywhere on buffer*/
Victor Stinnerd9491262013-04-14 02:06:32 +0200524 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
525
526 r = codec->encode(state, codec->config,
527 kind, data,
528 &buf.inpos, buf.inlen,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 &buf.outbuf, outleft, flags);
530 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
531 break;
532 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
533 goto errorexit;
534 else if (r == MBERR_TOOFEW)
535 break;
536 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000537
Victor Stinner6bcbef72011-05-24 22:17:55 +0200538 if (codec->encreset != NULL && (flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 for (;;) {
540 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
543 r = codec->encreset(state, codec->config, &buf.outbuf,
544 outleft);
545 if (r == 0)
546 break;
547 else if (multibytecodec_encerror(codec, state,
548 &buf, errors, r))
549 goto errorexit;
550 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 finalsize = (Py_ssize_t)((char *)buf.outbuf -
553 PyBytes_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
556 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
557 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000558
Victor Stinnerd9491262013-04-14 02:06:32 +0200559 if (inpos_t)
560 *inpos_t = buf.inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 Py_XDECREF(buf.excobj);
562 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000563
564errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 Py_XDECREF(buf.excobj);
566 Py_XDECREF(buf.outobj);
567 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000568}
569
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400570/*[clinic input]
571_multibytecodec.MultibyteCodec.encode
572
573 input: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300574 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400575
576Return an encoded string version of `input'.
577
578'errors' may be given to set a different error handling scheme. Default is
579'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
580values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
581registered with codecs.register_error that can handle UnicodeEncodeErrors.
582[clinic start generated code]*/
583
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000584static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400585_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
586 PyObject *input,
587 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300588/*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000589{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 MultibyteCodec_State state;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400591 PyObject *errorcb, *r, *ucvt;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000593
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400594 if (PyUnicode_Check(input))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 ucvt = NULL;
596 else {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400597 input = ucvt = PyObject_Str(input);
598 if (input == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 return NULL;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400600 else if (!PyUnicode_Check(input)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyErr_SetString(PyExc_TypeError,
602 "couldn't convert the object to unicode.");
603 Py_DECREF(ucvt);
604 return NULL;
605 }
606 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000607
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400608 if (PyUnicode_READY(input) < 0) {
Victor Stinner9a80fab2011-11-21 02:50:14 +0100609 Py_XDECREF(ucvt);
610 return NULL;
611 }
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400612 datalen = PyUnicode_GET_LENGTH(input);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 errorcb = internal_error_callback(errors);
615 if (errorcb == NULL) {
616 Py_XDECREF(ucvt);
617 return NULL;
618 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 if (self->codec->encinit != NULL &&
621 self->codec->encinit(&state, self->codec->config) != 0)
622 goto errorexit;
623 r = multibytecodec_encode(self->codec, &state,
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400624 input, NULL, errorcb,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 MBENC_FLUSH | MBENC_RESET);
626 if (r == NULL)
627 goto errorexit;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 ERROR_DECREF(errorcb);
630 Py_XDECREF(ucvt);
631 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
633errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 ERROR_DECREF(errorcb);
635 Py_XDECREF(ucvt);
636 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000637}
638
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400639/*[clinic input]
640_multibytecodec.MultibyteCodec.decode
641
642 input: Py_buffer
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300643 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400644
645Decodes 'input'.
646
647'errors' may be given to set a different error handling scheme. Default is
648'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
649values are 'ignore' and 'replace' as well as any other name registered with
650codecs.register_error that is able to handle UnicodeDecodeErrors."
651[clinic start generated code]*/
652
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000653static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400654_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
655 Py_buffer *input,
656 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300657/*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 MultibyteCodec_State state;
660 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200661 PyObject *errorcb, *res;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400662 const char *data;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200663 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000664
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400665 data = input->buf;
666 datalen = input->len;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 errorcb = internal_error_callback(errors);
669 if (errorcb == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 return NULL;
671 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 if (datalen == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 ERROR_DECREF(errorcb);
Victor Stinnerb37b1742011-12-01 03:18:59 +0100675 return make_tuple(PyUnicode_New(0, 0), 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000677
Victor Stinner8f674cc2013-04-17 23:02:17 +0200678 _PyUnicodeWriter_Init(&buf.writer);
679 buf.writer.min_length = datalen;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 buf.excobj = NULL;
681 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
682 buf.inbuf_end = buf.inbuf_top + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 if (self->codec->decinit != NULL &&
685 self->codec->decinit(&state, self->codec->config) != 0)
686 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 while (buf.inbuf < buf.inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200689 Py_ssize_t inleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 r = self->codec->decode(&state, self->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200694 &buf.inbuf, inleft, &buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 if (r == 0)
696 break;
697 else if (multibytecodec_decerror(self->codec, &state,
698 &buf, errorcb, r))
699 goto errorexit;
700 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000701
Victor Stinnera0dd0212013-04-11 22:09:04 +0200702 res = _PyUnicodeWriter_Finish(&buf.writer);
703 if (res == NULL)
704 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 Py_XDECREF(buf.excobj);
707 ERROR_DECREF(errorcb);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200708 return make_tuple(res, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000709
710errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 ERROR_DECREF(errorcb);
712 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200713 _PyUnicodeWriter_Dealloc(&buf.writer);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716}
717
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000718static struct PyMethodDef multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400719 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
720 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
721 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000722};
723
724static void
725multibytecodec_dealloc(MultibyteCodecObject *self)
726{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100727 PyTypeObject *tp = Py_TYPE(self);
Victor Stinner32bd68c2020-12-01 10:37:39 +0100728 PyObject_Free(self);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100729 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000730}
731
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100732static PyType_Slot multibytecodec_slots[] = {
733 {Py_tp_dealloc, multibytecodec_dealloc},
734 {Py_tp_getattro, PyObject_GenericGetAttr},
735 {Py_tp_methods, multibytecodec_methods},
736 {0, NULL},
737};
738
739static PyType_Spec multibytecodec_spec = {
740 .name = MODULE_NAME ".MultibyteCodec",
741 .basicsize = sizeof(MultibyteCodecObject),
742 .flags = Py_TPFLAGS_DEFAULT,
743 .slots = multibytecodec_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000744};
745
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000746
747/**
748 * Utility functions for stateful codec mechanism
749 */
750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
752#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000753
754static PyObject *
755encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 PyObject *unistr, int final)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000757{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 PyObject *ucvt, *r = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200759 PyObject *inbuf = NULL;
760 Py_ssize_t inpos, datalen;
761 PyObject *origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 if (PyUnicode_Check(unistr))
764 ucvt = NULL;
765 else {
766 unistr = ucvt = PyObject_Str(unistr);
767 if (unistr == NULL)
768 return NULL;
769 else if (!PyUnicode_Check(unistr)) {
770 PyErr_SetString(PyExc_TypeError,
Benjamin Petersonaff47232012-12-02 10:53:41 -0500771 "couldn't convert the object to str.");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000772 Py_DECREF(ucvt);
773 return NULL;
774 }
775 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000776
Victor Stinnerd9491262013-04-14 02:06:32 +0200777 if (ctx->pending) {
778 PyObject *inbuf_tmp;
779
780 Py_INCREF(ctx->pending);
781 origpending = ctx->pending;
782
783 Py_INCREF(ctx->pending);
784 inbuf_tmp = ctx->pending;
785 PyUnicode_Append(&inbuf_tmp, unistr);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 if (inbuf_tmp == NULL)
787 goto errorexit;
Victor Stinnerd9491262013-04-14 02:06:32 +0200788 Py_CLEAR(ctx->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 inbuf = inbuf_tmp;
790 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200791 else {
792 origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000793
Victor Stinnerd9491262013-04-14 02:06:32 +0200794 Py_INCREF(unistr);
795 inbuf = unistr;
796 }
797 if (PyUnicode_READY(inbuf) < 0)
798 goto errorexit;
799 inpos = 0;
800 datalen = PyUnicode_GET_LENGTH(inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 r = multibytecodec_encode(ctx->codec, &ctx->state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200803 inbuf, &inpos,
804 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 if (r == NULL) {
806 /* recover the original pending buffer */
Serhiy Storchaka48842712016-04-06 09:45:48 +0300807 Py_XSETREF(ctx->pending, origpending);
Victor Stinnerd9491262013-04-14 02:06:32 +0200808 origpending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 goto errorexit;
810 }
Victor Stinner322cc742013-04-14 18:11:41 +0200811 Py_XDECREF(origpending);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000812
Victor Stinnerd9491262013-04-14 02:06:32 +0200813 if (inpos < datalen) {
814 if (datalen - inpos > MAXENCPENDING) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 /* normal codecs can't reach here */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 PyErr_SetString(PyExc_UnicodeError,
817 "pending buffer overflow");
818 goto errorexit;
819 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200820 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
821 if (ctx->pending == NULL) {
822 /* normal codecs can't reach here */
823 goto errorexit;
824 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000826
Victor Stinner322cc742013-04-14 18:11:41 +0200827 Py_DECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 Py_XDECREF(ucvt);
829 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000830
831errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 Py_XDECREF(r);
833 Py_XDECREF(ucvt);
Victor Stinnerd9491262013-04-14 02:06:32 +0200834 Py_XDECREF(origpending);
Victor Stinner322cc742013-04-14 18:11:41 +0200835 Py_XDECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000837}
838
839static int
840decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000842{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 Py_ssize_t npendings;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
846 if (npendings + ctx->pendingsize > MAXDECPENDING ||
847 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
848 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
849 return -1;
850 }
851 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
852 ctx->pendingsize += npendings;
853 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000854}
855
856static int
857decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 Py_ssize_t size)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000859{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
861 buf->inbuf_end = buf->inbuf_top + size;
Victor Stinner8f674cc2013-04-17 23:02:17 +0200862 buf->writer.min_length += size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000864}
865
866static int
867decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 while (buf->inbuf < buf->inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200871 Py_ssize_t inleft;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_ssize_t r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000873
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000875
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200877 &buf->inbuf, inleft, &buf->writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (r == 0 || r == MBERR_TOOFEW)
879 break;
880 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
881 buf, ctx->errors, r))
882 return -1;
883 }
884 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000885}
886
887
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400888/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400889_multibytecodec.MultibyteIncrementalEncoder.encode
890
891 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200892 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400893[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000894
895static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400896_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
897 PyObject *input,
898 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200899/*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000900{
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400901 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000902}
903
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400904/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000905_multibytecodec.MultibyteIncrementalEncoder.getstate
906[clinic start generated code]*/
907
908static PyObject *
909_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
910/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
911{
912 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
913 for UTF-8 encoded buffer (each character can use up to 4
914 bytes), and required bytes for MultibyteCodec_State.c. A byte
915 array is used to avoid different compilers generating different
916 values for the same state, e.g. as a result of struct padding.
917 */
918 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
919 Py_ssize_t statesize;
920 const char *pendingbuffer = NULL;
921 Py_ssize_t pendingsize;
922
923 if (self->pending != NULL) {
924 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
925 if (pendingbuffer == NULL) {
926 return NULL;
927 }
928 if (pendingsize > MAXENCPENDING*4) {
929 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
930 return NULL;
931 }
Victor Stinnercdbcb772018-11-22 10:25:46 +0100932 statebytes[0] = (unsigned char)pendingsize;
933 memcpy(statebytes + 1, pendingbuffer, pendingsize);
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000934 statesize = 1 + pendingsize;
935 } else {
936 statebytes[0] = 0;
937 statesize = 1;
938 }
939 memcpy(statebytes+statesize, self->state.c,
940 sizeof(self->state.c));
941 statesize += sizeof(self->state.c);
942
943 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
944 1 /* little-endian */ ,
945 0 /* unsigned */ );
946}
947
948/*[clinic input]
949_multibytecodec.MultibyteIncrementalEncoder.setstate
950 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
951 /
952[clinic start generated code]*/
953
954static PyObject *
955_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
956 PyLongObject *statelong)
957/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
958{
959 PyObject *pending = NULL;
960 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
961
962 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
963 1 /* little-endian */ ,
964 0 /* unsigned */ ) < 0) {
965 goto errorexit;
966 }
967
968 if (statebytes[0] > MAXENCPENDING*4) {
969 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
970 return NULL;
971 }
972
973 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
974 statebytes[0], "strict");
975 if (pending == NULL) {
976 goto errorexit;
977 }
978
979 Py_CLEAR(self->pending);
980 self->pending = pending;
981 memcpy(self->state.c, statebytes+1+statebytes[0],
982 sizeof(self->state.c));
983
984 Py_RETURN_NONE;
985
986errorexit:
987 Py_XDECREF(pending);
988 return NULL;
989}
990
991/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400992_multibytecodec.MultibyteIncrementalEncoder.reset
993[clinic start generated code]*/
994
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000995static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400996_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
997/*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000998{
Victor Stinnere15dce32011-05-30 22:56:00 +0200999 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1000 unsigned char buffer[4], *outbuf;
1001 Py_ssize_t r;
1002 if (self->codec->encreset != NULL) {
1003 outbuf = buffer;
1004 r = self->codec->encreset(&self->state, self->codec->config,
1005 &outbuf, sizeof(buffer));
1006 if (r != 0)
1007 return NULL;
1008 }
Victor Stinnerd9491262013-04-14 02:06:32 +02001009 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001011}
1012
1013static struct PyMethodDef mbiencoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001014 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001015 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1016 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001017 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1018 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001019};
1020
1021static PyObject *
1022mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1023{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 MultibyteIncrementalEncoderObject *self;
1025 PyObject *codec = NULL;
1026 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1029 incnewkwarglist, &errors))
1030 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1033 if (self == NULL)
1034 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1037 if (codec == NULL)
1038 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001039
1040 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1041 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1043 goto errorexit;
1044 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 self->codec = ((MultibyteCodecObject *)codec)->codec;
Victor Stinnerd9491262013-04-14 02:06:32 +02001047 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 self->errors = internal_error_callback(errors);
1049 if (self->errors == NULL)
1050 goto errorexit;
1051 if (self->codec->encinit != NULL &&
1052 self->codec->encinit(&self->state, self->codec->config) != 0)
1053 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 Py_DECREF(codec);
1056 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001057
1058errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 Py_XDECREF(self);
1060 Py_XDECREF(codec);
1061 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001062}
1063
1064static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001065mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001068}
1069
1070static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001071mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001073{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 if (ERROR_ISCUSTOM(self->errors))
1075 Py_VISIT(self->errors);
1076 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001077}
1078
1079static void
1080mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1081{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001082 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 PyObject_GC_UnTrack(self);
1084 ERROR_DECREF(self->errors);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001085 Py_CLEAR(self->pending);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001086 tp->tp_free(self);
1087 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001088}
1089
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001090static PyType_Slot encoder_slots[] = {
1091 {Py_tp_dealloc, mbiencoder_dealloc},
1092 {Py_tp_getattro, PyObject_GenericGetAttr},
1093 {Py_tp_traverse, mbiencoder_traverse},
1094 {Py_tp_methods, mbiencoder_methods},
1095 {Py_tp_getset, codecctx_getsets},
1096 {Py_tp_init, mbiencoder_init},
1097 {Py_tp_new, mbiencoder_new},
1098 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001099};
1100
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001101static PyType_Spec encoder_spec = {
1102 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1103 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1104 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1105 .slots = encoder_slots,
1106};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001107
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001108
1109/*[clinic input]
1110_multibytecodec.MultibyteIncrementalDecoder.decode
1111
1112 input: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001113 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001114[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001115
1116static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001117_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1118 Py_buffer *input,
1119 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001120/*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001121{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 MultibyteDecodeBuffer buf;
1123 char *data, *wdata = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001124 Py_ssize_t wsize, size, origpending;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001125 PyObject *res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001126
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001127 data = input->buf;
1128 size = input->len;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001129
Victor Stinner8f674cc2013-04-17 23:02:17 +02001130 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001131 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 origpending = self->pendingsize;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 if (self->pendingsize == 0) {
1135 wsize = size;
1136 wdata = data;
1137 }
1138 else {
1139 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1140 PyErr_NoMemory();
1141 goto errorexit;
1142 }
1143 wsize = size + self->pendingsize;
1144 wdata = PyMem_Malloc(wsize);
Victor Stinner33283ba2013-07-15 17:47:39 +02001145 if (wdata == NULL) {
1146 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 goto errorexit;
Victor Stinner33283ba2013-07-15 17:47:39 +02001148 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 memcpy(wdata, self->pending, self->pendingsize);
1150 memcpy(wdata + self->pendingsize, data, size);
1151 self->pendingsize = 0;
1152 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1155 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1158 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (final && buf.inbuf < buf.inbuf_end) {
1161 if (multibytecodec_decerror(self->codec, &self->state,
1162 &buf, self->errors, MBERR_TOOFEW)) {
1163 /* recover the original pending buffer */
1164 memcpy(self->pending, wdata, origpending);
1165 self->pendingsize = origpending;
1166 goto errorexit;
1167 }
1168 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1171 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1172 goto errorexit;
1173 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001174
Victor Stinnera0dd0212013-04-11 22:09:04 +02001175 res = _PyUnicodeWriter_Finish(&buf.writer);
1176 if (res == NULL)
1177 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 if (wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001180 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001182 return res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001183
1184errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 if (wdata != NULL && wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001186 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001188 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001190}
1191
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001192/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001193_multibytecodec.MultibyteIncrementalDecoder.getstate
1194[clinic start generated code]*/
1195
1196static PyObject *
1197_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1198/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1199{
1200 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001201 PyObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001202
1203 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1204 self->pendingsize);
1205 if (buffer == NULL) {
1206 return NULL;
1207 }
1208
Christopher Thorne488c0a62018-11-02 03:29:40 +00001209 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1210 sizeof(self->state.c),
1211 1 /* little-endian */ ,
1212 0 /* unsigned */ );
1213 if (statelong == NULL) {
1214 Py_DECREF(buffer);
1215 return NULL;
1216 }
1217
1218 return Py_BuildValue("NN", buffer, statelong);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001219}
1220
1221/*[clinic input]
1222_multibytecodec.MultibyteIncrementalDecoder.setstate
1223 state: object(subclass_of='&PyTuple_Type')
1224 /
1225[clinic start generated code]*/
1226
1227static PyObject *
1228_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1229 PyObject *state)
1230/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1231{
1232 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001233 PyLongObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001234 Py_ssize_t buffersize;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001235 const char *bufferstr;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001236 unsigned char statebytes[8];
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001237
Christopher Thorne488c0a62018-11-02 03:29:40 +00001238 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1239 &buffer, &PyLong_Type, &statelong))
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001240 {
1241 return NULL;
1242 }
1243
Christopher Thorne488c0a62018-11-02 03:29:40 +00001244 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1245 1 /* little-endian */ ,
1246 0 /* unsigned */ ) < 0) {
1247 return NULL;
1248 }
1249
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001250 buffersize = PyBytes_Size(buffer);
1251 if (buffersize == -1) {
1252 return NULL;
1253 }
1254
1255 if (buffersize > MAXDECPENDING) {
1256 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1257 return NULL;
1258 }
1259
1260 bufferstr = PyBytes_AsString(buffer);
1261 if (bufferstr == NULL) {
1262 return NULL;
1263 }
1264 self->pendingsize = buffersize;
1265 memcpy(self->pending, bufferstr, self->pendingsize);
Christopher Thorne488c0a62018-11-02 03:29:40 +00001266 memcpy(self->state.c, statebytes, sizeof(statebytes));
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001267
1268 Py_RETURN_NONE;
1269}
1270
1271/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001272_multibytecodec.MultibyteIncrementalDecoder.reset
1273[clinic start generated code]*/
1274
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001275static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001276_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1277/*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001278{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (self->codec->decreset != NULL &&
1280 self->codec->decreset(&self->state, self->codec->config) != 0)
1281 return NULL;
1282 self->pendingsize = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001285}
1286
1287static struct PyMethodDef mbidecoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001288 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001289 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1290 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001291 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1292 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001293};
1294
1295static PyObject *
1296mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 MultibyteIncrementalDecoderObject *self;
1299 PyObject *codec = NULL;
1300 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1303 incnewkwarglist, &errors))
1304 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1307 if (self == NULL)
1308 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001309
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1311 if (codec == NULL)
1312 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001313
1314 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1315 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1317 goto errorexit;
1318 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 self->codec = ((MultibyteCodecObject *)codec)->codec;
1321 self->pendingsize = 0;
1322 self->errors = internal_error_callback(errors);
1323 if (self->errors == NULL)
1324 goto errorexit;
1325 if (self->codec->decinit != NULL &&
1326 self->codec->decinit(&self->state, self->codec->config) != 0)
1327 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 Py_DECREF(codec);
1330 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001331
1332errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 Py_XDECREF(self);
1334 Py_XDECREF(codec);
1335 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001336}
1337
1338static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001339mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001342}
1343
1344static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001345mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 if (ERROR_ISCUSTOM(self->errors))
1349 Py_VISIT(self->errors);
1350 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001351}
1352
1353static void
1354mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1355{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001356 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 PyObject_GC_UnTrack(self);
1358 ERROR_DECREF(self->errors);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001359 tp->tp_free(self);
1360 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001361}
1362
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001363static PyType_Slot decoder_slots[] = {
1364 {Py_tp_dealloc, mbidecoder_dealloc},
1365 {Py_tp_getattro, PyObject_GenericGetAttr},
1366 {Py_tp_traverse, mbidecoder_traverse},
1367 {Py_tp_methods, mbidecoder_methods},
1368 {Py_tp_getset, codecctx_getsets},
1369 {Py_tp_init, mbidecoder_init},
1370 {Py_tp_new, mbidecoder_new},
1371 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372};
1373
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001374static PyType_Spec decoder_spec = {
1375 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1376 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1377 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1378 .slots = decoder_slots,
1379};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001380
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001381static PyObject *
1382mbstreamreader_iread(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001386 PyObject *cres, *res;
1387 Py_ssize_t rsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (sizehint == 0)
Victor Stinnerb37b1742011-12-01 03:18:59 +01001390 return PyUnicode_New(0, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001391
Victor Stinner8f674cc2013-04-17 23:02:17 +02001392 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001393 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 for (;;) {
1397 int endoffile;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (sizehint < 0)
1400 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001401 method, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 else
1403 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001404 method, "i", sizehint);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (cres == NULL)
1406 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 if (!PyBytes_Check(cres)) {
1409 PyErr_Format(PyExc_TypeError,
1410 "stream function returned a "
1411 "non-bytes object (%.100s)",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001412 Py_TYPE(cres)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 goto errorexit;
1414 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 endoffile = (PyBytes_GET_SIZE(cres) == 0);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (self->pendingsize > 0) {
1419 PyObject *ctr;
1420 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1423 PyErr_NoMemory();
1424 goto errorexit;
Victor Stinner064bbdc2013-07-08 22:28:27 +02001425 }
1426 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1427 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1428 if (ctr == NULL)
1429 goto errorexit;
1430 ctrdata = PyBytes_AS_STRING(ctr);
1431 memcpy(ctrdata, self->pending, self->pendingsize);
1432 memcpy(ctrdata + self->pendingsize,
1433 PyBytes_AS_STRING(cres),
1434 PyBytes_GET_SIZE(cres));
1435 Py_DECREF(cres);
1436 cres = ctr;
1437 self->pendingsize = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 rsize = PyBytes_GET_SIZE(cres);
1441 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1442 rsize) != 0)
1443 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 if (rsize > 0 && decoder_feed_buffer(
1446 (MultibyteStatefulDecoderContext *)self, &buf))
1447 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 if (endoffile || sizehint < 0) {
1450 if (buf.inbuf < buf.inbuf_end &&
1451 multibytecodec_decerror(self->codec, &self->state,
1452 &buf, self->errors, MBERR_TOOFEW))
1453 goto errorexit;
1454 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1457 if (decoder_append_pending(STATEFUL_DCTX(self),
1458 &buf) != 0)
1459 goto errorexit;
1460 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 Py_DECREF(cres);
1463 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001464
Victor Stinnera0dd0212013-04-11 22:09:04 +02001465 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 sizehint = 1; /* read 1 more byte and retry */
1469 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001470
Victor Stinnera0dd0212013-04-11 22:09:04 +02001471 res = _PyUnicodeWriter_Finish(&buf.writer);
1472 if (res == NULL)
1473 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 Py_XDECREF(cres);
1476 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001477 return res;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001478
1479errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 Py_XDECREF(cres);
1481 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001482 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001484}
1485
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001486/*[clinic input]
1487 _multibytecodec.MultibyteStreamReader.read
1488
1489 sizeobj: object = None
1490 /
1491[clinic start generated code]*/
1492
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001493static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001494_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1495 PyObject *sizeobj)
1496/*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001497{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001499
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001500 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 size = -1;
1502 else if (PyLong_Check(sizeobj))
1503 size = PyLong_AsSsize_t(sizeobj);
1504 else {
1505 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1506 return NULL;
1507 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 if (size == -1 && PyErr_Occurred())
1510 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001512 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001513}
1514
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001515/*[clinic input]
1516 _multibytecodec.MultibyteStreamReader.readline
1517
1518 sizeobj: object = None
1519 /
1520[clinic start generated code]*/
1521
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001522static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001523_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1524 PyObject *sizeobj)
1525/*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001528
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001529 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 size = -1;
1531 else if (PyLong_Check(sizeobj))
1532 size = PyLong_AsSsize_t(sizeobj);
1533 else {
1534 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1535 return NULL;
1536 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 if (size == -1 && PyErr_Occurred())
1539 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001542}
1543
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001544/*[clinic input]
1545 _multibytecodec.MultibyteStreamReader.readlines
1546
1547 sizehintobj: object = None
1548 /
1549[clinic start generated code]*/
1550
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001551static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001552_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1553 PyObject *sizehintobj)
1554/*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001555{
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001556 PyObject *r, *sr;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001558
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001559 if (sizehintobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001560 sizehint = -1;
1561 else if (PyLong_Check(sizehintobj))
1562 sizehint = PyLong_AsSsize_t(sizehintobj);
1563 else {
1564 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1565 return NULL;
1566 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001567
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 if (sizehint == -1 && PyErr_Occurred())
1569 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001570
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 r = mbstreamreader_iread(self, "read", sizehint);
1572 if (r == NULL)
1573 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 sr = PyUnicode_Splitlines(r, 1);
1576 Py_DECREF(r);
1577 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001578}
1579
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001580/*[clinic input]
1581 _multibytecodec.MultibyteStreamReader.reset
1582[clinic start generated code]*/
1583
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001584static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001585_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1586/*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 if (self->codec->decreset != NULL &&
1589 self->codec->decreset(&self->state, self->codec->config) != 0)
1590 return NULL;
1591 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001594}
1595
1596static struct PyMethodDef mbstreamreader_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001597 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1598 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1599 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1600 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001602};
1603
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001604static PyMemberDef mbstreamreader_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 {"stream", T_OBJECT,
1606 offsetof(MultibyteStreamReaderObject, stream),
1607 READONLY, NULL},
1608 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001609};
1610
1611static PyObject *
1612mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1613{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 MultibyteStreamReaderObject *self;
1615 PyObject *stream, *codec = NULL;
1616 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1619 streamkwarglist, &stream, &errors))
1620 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001622 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1623 if (self == NULL)
1624 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1627 if (codec == NULL)
1628 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001629
1630 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1631 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1633 goto errorexit;
1634 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 self->codec = ((MultibyteCodecObject *)codec)->codec;
1637 self->stream = stream;
1638 Py_INCREF(stream);
1639 self->pendingsize = 0;
1640 self->errors = internal_error_callback(errors);
1641 if (self->errors == NULL)
1642 goto errorexit;
1643 if (self->codec->decinit != NULL &&
1644 self->codec->decinit(&self->state, self->codec->config) != 0)
1645 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 Py_DECREF(codec);
1648 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001649
1650errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 Py_XDECREF(self);
1652 Py_XDECREF(codec);
1653 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001654}
1655
1656static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001657mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001660}
1661
1662static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001663mbstreamreader_traverse(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001665{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 if (ERROR_ISCUSTOM(self->errors))
1667 Py_VISIT(self->errors);
1668 Py_VISIT(self->stream);
1669 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001670}
1671
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001672static void
1673mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1674{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001675 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 PyObject_GC_UnTrack(self);
1677 ERROR_DECREF(self->errors);
1678 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001679 tp->tp_free(self);
1680 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001681}
1682
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001683static PyType_Slot reader_slots[] = {
1684 {Py_tp_dealloc, mbstreamreader_dealloc},
1685 {Py_tp_getattro, PyObject_GenericGetAttr},
1686 {Py_tp_traverse, mbstreamreader_traverse},
1687 {Py_tp_methods, mbstreamreader_methods},
1688 {Py_tp_members, mbstreamreader_members},
1689 {Py_tp_getset, codecctx_getsets},
1690 {Py_tp_init, mbstreamreader_init},
1691 {Py_tp_new, mbstreamreader_new},
1692 {0, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001693};
1694
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001695static PyType_Spec reader_spec = {
1696 .name = MODULE_NAME ".MultibyteStreamReader",
1697 .basicsize = sizeof(MultibyteStreamReaderObject),
1698 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1699 .slots = reader_slots,
1700};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001701
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001702static int
1703mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001704 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001705{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 PyObject *str, *wr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1709 if (str == NULL)
1710 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001711
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001712 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 Py_DECREF(str);
1714 if (wr == NULL)
1715 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 Py_DECREF(wr);
1718 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001719}
1720
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001721/*[clinic input]
1722 _multibytecodec.MultibyteStreamWriter.write
1723
1724 strobj: object
1725 /
1726[clinic start generated code]*/
1727
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001728static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001729_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1730 PyObject *strobj)
1731/*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001732{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 if (mbstreamwriter_iwrite(self, strobj))
1734 return NULL;
1735 else
1736 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001737}
1738
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001739/*[clinic input]
1740 _multibytecodec.MultibyteStreamWriter.writelines
1741
1742 lines: object
1743 /
1744[clinic start generated code]*/
1745
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001746static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001747_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1748 PyObject *lines)
1749/*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001750{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001751 PyObject *strobj;
1752 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001754 if (!PySequence_Check(lines)) {
1755 PyErr_SetString(PyExc_TypeError,
1756 "arg must be a sequence object");
1757 return NULL;
1758 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001760 for (i = 0; i < PySequence_Length(lines); i++) {
1761 /* length can be changed even within this loop */
1762 strobj = PySequence_GetItem(lines, i);
1763 if (strobj == NULL)
1764 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001766 r = mbstreamwriter_iwrite(self, strobj);
1767 Py_DECREF(strobj);
1768 if (r == -1)
1769 return NULL;
1770 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001771 /* PySequence_Length() can fail */
1772 if (PyErr_Occurred())
1773 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001774
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001776}
1777
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001778/*[clinic input]
1779 _multibytecodec.MultibyteStreamWriter.reset
1780[clinic start generated code]*/
1781
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001782static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001783_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1784/*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001785{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001786 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001787
Victor Stinner579db162015-07-16 22:17:31 +02001788 if (!self->pending)
1789 Py_RETURN_NONE;
1790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001791 pwrt = multibytecodec_encode(self->codec, &self->state,
Victor Stinnerd9491262013-04-14 02:06:32 +02001792 self->pending, NULL, self->errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 MBENC_FLUSH | MBENC_RESET);
1794 /* some pending buffer can be truncated when UnicodeEncodeError is
1795 * raised on 'strict' mode. but, 'reset' method is designed to
1796 * reset the pending buffer or states so failed string sequence
1797 * ought to be missed */
Victor Stinnerd9491262013-04-14 02:06:32 +02001798 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 if (pwrt == NULL)
1800 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001802 assert(PyBytes_Check(pwrt));
1803 if (PyBytes_Size(pwrt) > 0) {
1804 PyObject *wr;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001805
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001806 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807 if (wr == NULL) {
1808 Py_DECREF(pwrt);
1809 return NULL;
1810 }
1811 }
1812 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001813
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001814 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001815}
1816
1817static PyObject *
1818mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1819{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 MultibyteStreamWriterObject *self;
1821 PyObject *stream, *codec = NULL;
1822 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001823
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001824 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1825 streamkwarglist, &stream, &errors))
1826 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1829 if (self == NULL)
1830 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1833 if (codec == NULL)
1834 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001835
1836 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1837 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1839 goto errorexit;
1840 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001842 self->codec = ((MultibyteCodecObject *)codec)->codec;
1843 self->stream = stream;
1844 Py_INCREF(stream);
Victor Stinnerd9491262013-04-14 02:06:32 +02001845 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001846 self->errors = internal_error_callback(errors);
1847 if (self->errors == NULL)
1848 goto errorexit;
1849 if (self->codec->encinit != NULL &&
1850 self->codec->encinit(&self->state, self->codec->config) != 0)
1851 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001852
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 Py_DECREF(codec);
1854 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001855
1856errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 Py_XDECREF(self);
1858 Py_XDECREF(codec);
1859 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001860}
1861
1862static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001863mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001865 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001866}
1867
1868static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001869mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001872 if (ERROR_ISCUSTOM(self->errors))
1873 Py_VISIT(self->errors);
1874 Py_VISIT(self->stream);
1875 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001876}
1877
1878static void
1879mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1880{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001881 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001882 PyObject_GC_UnTrack(self);
1883 ERROR_DECREF(self->errors);
1884 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001885 tp->tp_free(self);
1886 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001887}
1888
1889static struct PyMethodDef mbstreamwriter_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001890 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1891 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1892 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1893 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001894};
1895
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001896static PyMemberDef mbstreamwriter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001897 {"stream", T_OBJECT,
1898 offsetof(MultibyteStreamWriterObject, stream),
1899 READONLY, NULL},
1900 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001901};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001902
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001903static PyType_Slot writer_slots[] = {
1904 {Py_tp_dealloc, mbstreamwriter_dealloc},
1905 {Py_tp_getattro, PyObject_GenericGetAttr},
1906 {Py_tp_traverse, mbstreamwriter_traverse},
1907 {Py_tp_methods, mbstreamwriter_methods},
1908 {Py_tp_members, mbstreamwriter_members},
1909 {Py_tp_getset, codecctx_getsets},
1910 {Py_tp_init, mbstreamwriter_init},
1911 {Py_tp_new, mbstreamwriter_new},
1912 {0, NULL},
1913};
1914
1915static PyType_Spec writer_spec = {
1916 .name = MODULE_NAME ".MultibyteStreamWriter",
1917 .basicsize = sizeof(MultibyteStreamWriterObject),
1918 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE,
1919 .slots = writer_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001920};
1921
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001922
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001923/*[clinic input]
1924_multibytecodec.__create_codec
1925
1926 arg: object
1927 /
1928[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001929
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001930static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001931_multibytecodec___create_codec(PyObject *module, PyObject *arg)
1932/*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001933{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001934 MultibyteCodecObject *self;
1935 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1938 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1939 return NULL;
1940 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1943 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1944 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001945
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001946 _multibytecodec_state *state = _multibytecodec_get_state(module);
1947 self = PyObject_New(MultibyteCodecObject, state->multibytecodec_type);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 if (self == NULL)
1949 return NULL;
1950 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001953}
1954
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001955static int
1956_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1957{
1958 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1959 Py_VISIT(state->multibytecodec_type);
1960 Py_VISIT(state->encoder_type);
1961 Py_VISIT(state->decoder_type);
1962 Py_VISIT(state->reader_type);
1963 Py_VISIT(state->writer_type);
1964 return 0;
1965}
1966
1967static int
1968_multibytecodec_clear(PyObject *mod)
1969{
1970 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1971 Py_CLEAR(state->multibytecodec_type);
1972 Py_CLEAR(state->encoder_type);
1973 Py_CLEAR(state->decoder_type);
1974 Py_CLEAR(state->reader_type);
1975 Py_CLEAR(state->writer_type);
1976 return 0;
1977}
1978
1979static void
1980_multibytecodec_free(void *mod)
1981{
1982 _multibytecodec_clear((PyObject *)mod);
1983}
1984
1985#define CREATE_TYPE(module, type, spec) \
1986 do { \
1987 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
1988 if (!type) { \
1989 return -1; \
1990 } \
1991 } while (0)
1992
1993#define ADD_TYPE(module, type) \
1994 do { \
1995 if (PyModule_AddType(module, type) < 0) { \
1996 return -1; \
1997 } \
1998 } while (0)
1999
2000static int
2001_multibytecodec_exec(PyObject *mod)
2002{
2003 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2004 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2005 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2006 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2007 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2008 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2009
2010 ADD_TYPE(mod, state->encoder_type);
2011 ADD_TYPE(mod, state->decoder_type);
2012 ADD_TYPE(mod, state->reader_type);
2013 ADD_TYPE(mod, state->writer_type);
2014 return 0;
2015}
2016
2017#undef CREATE_TYPE
2018#undef ADD_TYPE
2019
2020static struct PyMethodDef _multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04002021 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002023};
2024
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002025static PyModuleDef_Slot _multibytecodec_slots[] = {
2026 {Py_mod_exec, _multibytecodec_exec},
2027 {0, NULL}
2028};
Martin v. Löwis1a214512008-06-11 05:26:20 +00002029
2030static struct PyModuleDef _multibytecodecmodule = {
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002031 .m_base = PyModuleDef_HEAD_INIT,
2032 .m_name = "_multibytecodec",
2033 .m_size = sizeof(_multibytecodec_state),
2034 .m_methods = _multibytecodec_methods,
2035 .m_slots = _multibytecodec_slots,
2036 .m_traverse = _multibytecodec_traverse,
2037 .m_clear = _multibytecodec_clear,
2038 .m_free = _multibytecodec_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00002039};
2040
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002041PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002042PyInit__multibytecodec(void)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002043{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002044 return PyModuleDef_Init(&_multibytecodecmodule);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002045}