blob: ba558d0dbf2f1dc900453600d76e5c91063a98c3 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00005 */
6
Hye-Shik Chang4b96c132006-03-04 16:08:19 +00007#define PY_SSIZE_T_CLEAN
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00008#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +02009#include "structmember.h" // PyMemberDef
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000010#include "multibytecodec.h"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040011#include "clinic/multibytecodec.c.h"
12
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010013#define MODULE_NAME "_multibytecodec"
14
15typedef struct {
16 PyTypeObject *encoder_type;
17 PyTypeObject *decoder_type;
18 PyTypeObject *reader_type;
19 PyTypeObject *writer_type;
20 PyTypeObject *multibytecodec_type;
21} _multibytecodec_state;
22
23static _multibytecodec_state *
24_multibytecodec_get_state(PyObject *module)
25{
26 _multibytecodec_state *state = PyModule_GetState(module);
27 assert(state != NULL);
28 return state;
29}
30
31static struct PyModuleDef _multibytecodecmodule;
32static _multibytecodec_state *
33_multibyte_codec_find_state_by_type(PyTypeObject *type)
34{
35 PyObject *module = _PyType_GetModuleByDef(type, &_multibytecodecmodule);
36 assert(module != NULL);
37 return _multibytecodec_get_state(module);
38}
39
40#define clinic_get_state() _multibyte_codec_find_state_by_type(type)
Brett Cannonf2de1fc2014-08-22 11:45:03 -040041/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -040042module _multibytecodec
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010043class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
44class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
45class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
46class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
47class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
Brett Cannonf2de1fc2014-08-22 11:45:03 -040048[clinic start generated code]*/
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +010049/*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
50#undef clinic_get_state
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000051
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000052typedef struct {
Victor Stinnerd9491262013-04-14 02:06:32 +020053 PyObject *inobj;
54 Py_ssize_t inpos, inlen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000055 unsigned char *outbuf, *outbuf_end;
56 PyObject *excobj, *outobj;
57} MultibyteEncodeBuffer;
58
59typedef struct {
60 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
Victor Stinnera0dd0212013-04-11 22:09:04 +020061 PyObject *excobj;
62 _PyUnicodeWriter writer;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000063} MultibyteDecodeBuffer;
64
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000065static char *incnewkwarglist[] = {"errors", NULL};
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +000066static char *streamkwarglist[] = {"stream", "errors", NULL};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000067
68static PyObject *multibytecodec_encode(MultibyteCodec *,
Victor Stinnerd9491262013-04-14 02:06:32 +020069 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000070 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000072#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000073
Victor Stinner3f36a572013-11-12 21:39:02 +010074_Py_IDENTIFIER(write);
75
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000076static PyObject *
Hye-Shik Chang4b96c132006-03-04 16:08:19 +000077make_tuple(PyObject *object, Py_ssize_t len)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000078{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 if (object == NULL)
82 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000084 v = PyTuple_New(2);
85 if (v == NULL) {
86 Py_DECREF(object);
87 return NULL;
88 }
89 PyTuple_SET_ITEM(v, 0, object);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 w = PyLong_FromSsize_t(len);
92 if (w == NULL) {
93 Py_DECREF(v);
94 return NULL;
95 }
96 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000099}
100
101static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000102internal_error_callback(const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 if (errors == NULL || strcmp(errors, "strict") == 0)
105 return ERROR_STRICT;
106 else if (strcmp(errors, "ignore") == 0)
107 return ERROR_IGNORE;
108 else if (strcmp(errors, "replace") == 0)
109 return ERROR_REPLACE;
110 else
111 return PyUnicode_FromString(errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000112}
113
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000114static PyObject *
115call_error_callback(PyObject *errors, PyObject *exc)
116{
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200117 PyObject *cb, *r;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 assert(PyUnicode_Check(errors));
Serhiy Storchaka06515832016-11-20 09:13:07 +0200121 str = PyUnicode_AsUTF8(errors);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 if (str == NULL)
123 return NULL;
124 cb = PyCodec_LookupError(str);
125 if (cb == NULL)
126 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000127
Petr Viktorinffd97532020-02-11 17:46:57 +0100128 r = PyObject_CallOneArg(cb, exc);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_DECREF(cb);
130 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000131}
132
133static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200134codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000135{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 const char *errors;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 if (self->errors == ERROR_STRICT)
139 errors = "strict";
140 else if (self->errors == ERROR_IGNORE)
141 errors = "ignore";
142 else if (self->errors == ERROR_REPLACE)
143 errors = "replace";
144 else {
145 Py_INCREF(self->errors);
146 return self->errors;
147 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return PyUnicode_FromString(errors);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000150}
151
152static int
153codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 void *closure)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 PyObject *cb;
157 const char *str;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000158
Zackery Spytz842acaa2018-12-17 07:52:45 -0700159 if (value == NULL) {
160 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
161 return -1;
162 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 if (!PyUnicode_Check(value)) {
164 PyErr_SetString(PyExc_TypeError, "errors must be a string");
165 return -1;
166 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000167
Serhiy Storchaka06515832016-11-20 09:13:07 +0200168 str = PyUnicode_AsUTF8(value);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 if (str == NULL)
170 return -1;
Neal Norwitz6ea45d32007-08-26 04:19:43 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 cb = internal_error_callback(str);
173 if (cb == NULL)
174 return -1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 ERROR_DECREF(self->errors);
177 self->errors = cb;
178 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000179}
180
181/* This getset handlers list is used by all the stateful codec objects */
182static PyGetSetDef codecctx_getsets[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 {"errors", (getter)codecctx_errors_get,
184 (setter)codecctx_errors_set,
185 PyDoc_STR("how to treat errors")},
186 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000187};
188
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000189static int
Hye-Shik Chang4b96c132006-03-04 16:08:19 +0000190expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000192 Py_ssize_t orgpos, orgsize, incsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 orgpos = (Py_ssize_t)((char *)buf->outbuf -
195 PyBytes_AS_STRING(buf->outobj));
196 orgsize = PyBytes_GET_SIZE(buf->outobj);
197 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000198
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200199 if (orgsize > PY_SSIZE_T_MAX - incsize) {
200 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return -1;
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200202 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
205 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
208 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
209 + PyBytes_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212}
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200213#define REQUIRE_ENCODEBUFFER(buf, s) do { \
214 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (expand_encodebuffer(buf, s) == -1) \
216 goto errorexit; \
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200217} while(0)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000218
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000219
220/**
221 * MultibyteCodec object
222 */
223
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000224static int
225multibytecodec_encerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 MultibyteCodec_State *state,
227 MultibyteEncodeBuffer *buf,
228 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 PyObject *retobj = NULL, *retstr = NULL, *tobj;
231 Py_ssize_t retstrsize, newpos;
232 Py_ssize_t esize, start, end;
233 const char *reason;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 if (e > 0) {
236 reason = "illegal multibyte sequence";
237 esize = e;
238 }
239 else {
240 switch (e) {
241 case MBERR_TOOSMALL:
242 REQUIRE_ENCODEBUFFER(buf, -1);
243 return 0; /* retry it */
244 case MBERR_TOOFEW:
245 reason = "incomplete multibyte sequence";
Victor Stinnerd9491262013-04-14 02:06:32 +0200246 esize = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 break;
248 case MBERR_INTERNAL:
249 PyErr_SetString(PyExc_RuntimeError,
250 "internal codec error");
251 return -1;
252 default:
253 PyErr_SetString(PyExc_RuntimeError,
254 "unknown runtime error");
255 return -1;
256 }
257 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 if (errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200260 PyObject *replchar;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 Py_ssize_t r;
Victor Stinnerd9491262013-04-14 02:06:32 +0200262 Py_ssize_t inpos;
263 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300264 const void *data;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Victor Stinnerd9491262013-04-14 02:06:32 +0200266 replchar = PyUnicode_FromOrdinal('?');
267 if (replchar == NULL)
268 goto errorexit;
269 kind = PyUnicode_KIND(replchar);
270 data = PyUnicode_DATA(replchar);
271
272 inpos = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 for (;;) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200274 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000275
Victor Stinnerd9491262013-04-14 02:06:32 +0200276 r = codec->encode(state, codec->config,
277 kind, data, &inpos, 1,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000278 &buf->outbuf, outleft, 0);
279 if (r == MBERR_TOOSMALL) {
280 REQUIRE_ENCODEBUFFER(buf, -1);
281 continue;
282 }
283 else
284 break;
285 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000286
Victor Stinnerd9491262013-04-14 02:06:32 +0200287 Py_DECREF(replchar);
288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 if (r != 0) {
290 REQUIRE_ENCODEBUFFER(buf, 1);
291 *buf->outbuf++ = '?';
292 }
293 }
294 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200295 buf->inpos += esize;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 return 0;
297 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000298
Victor Stinnerd9491262013-04-14 02:06:32 +0200299 start = (Py_ssize_t)buf->inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 /* use cached exception object if available */
303 if (buf->excobj == NULL) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200304 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
305 "sOnns",
306 codec->encoding, buf->inobj,
307 start, end, reason);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 if (buf->excobj == NULL)
309 goto errorexit;
310 }
311 else
312 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
313 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
314 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
315 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 if (errors == ERROR_STRICT) {
318 PyCodec_StrictErrors(buf->excobj);
319 goto errorexit;
320 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000322 retobj = call_error_callback(errors, buf->excobj);
323 if (retobj == NULL)
324 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500327 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000328 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
329 PyErr_SetString(PyExc_TypeError,
330 "encoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500331 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 goto errorexit;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500335 if (PyUnicode_Check(tobj)) {
Victor Stinnerd9491262013-04-14 02:06:32 +0200336 Py_ssize_t inpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000337
Victor Stinnerd9491262013-04-14 02:06:32 +0200338 retstr = multibytecodec_encode(codec, state, tobj,
339 &inpos, ERROR_STRICT,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 MBENC_FLUSH);
341 if (retstr == NULL)
342 goto errorexit;
343 }
Benjamin Peterson47a00f32012-12-02 11:20:28 -0500344 else {
345 Py_INCREF(tobj);
346 retstr = tobj;
347 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000349 assert(PyBytes_Check(retstr));
350 retstrsize = PyBytes_GET_SIZE(retstr);
Serhiy Storchakaa1543cd2015-02-21 01:19:58 +0200351 if (retstrsize > 0) {
352 REQUIRE_ENCODEBUFFER(buf, retstrsize);
353 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
354 buf->outbuf += retstrsize;
355 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
358 if (newpos < 0 && !PyErr_Occurred())
Victor Stinnerd9491262013-04-14 02:06:32 +0200359 newpos += (Py_ssize_t)buf->inlen;
360 if (newpos < 0 || newpos > buf->inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 PyErr_Clear();
362 PyErr_Format(PyExc_IndexError,
363 "position %zd from error handler out of bounds",
364 newpos);
365 goto errorexit;
366 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200367 buf->inpos = newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 Py_DECREF(retobj);
370 Py_DECREF(retstr);
371 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000372
373errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 Py_XDECREF(retobj);
375 Py_XDECREF(retstr);
376 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000377}
378
379static int
380multibytecodec_decerror(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 MultibyteCodec_State *state,
382 MultibyteDecodeBuffer *buf,
383 PyObject *errors, Py_ssize_t e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 PyObject *retobj = NULL, *retuni = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200386 Py_ssize_t newpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 const char *reason;
388 Py_ssize_t esize, start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (e > 0) {
391 reason = "illegal multibyte sequence";
392 esize = e;
393 }
394 else {
395 switch (e) {
396 case MBERR_TOOSMALL:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 return 0; /* retry it */
398 case MBERR_TOOFEW:
399 reason = "incomplete multibyte sequence";
400 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
401 break;
402 case MBERR_INTERNAL:
403 PyErr_SetString(PyExc_RuntimeError,
404 "internal codec error");
405 return -1;
Victor Stinnerd1f99422013-07-16 21:41:43 +0200406 case MBERR_EXCEPTION:
407 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 default:
409 PyErr_SetString(PyExc_RuntimeError,
410 "unknown runtime error");
411 return -1;
412 }
413 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (errors == ERROR_REPLACE) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200416 if (_PyUnicodeWriter_WriteChar(&buf->writer,
417 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
418 goto errorexit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 }
420 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
421 buf->inbuf += esize;
422 return 0;
423 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
426 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 /* use cached exception object if available */
429 if (buf->excobj == NULL) {
430 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
431 (const char *)buf->inbuf_top,
432 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
433 start, end, reason);
434 if (buf->excobj == NULL)
435 goto errorexit;
436 }
437 else
438 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
439 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
440 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
441 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 if (errors == ERROR_STRICT) {
444 PyCodec_StrictErrors(buf->excobj);
445 goto errorexit;
446 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 retobj = call_error_callback(errors, buf->excobj);
449 if (retobj == NULL)
450 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
453 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
454 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
455 PyErr_SetString(PyExc_TypeError,
456 "decoding error handler must return "
Benjamin Petersonaff47232012-12-02 10:53:41 -0500457 "(str, int) tuple");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458 goto errorexit;
459 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000460
Victor Stinnera0dd0212013-04-11 22:09:04 +0200461 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
Victor Stinner4eea8492011-11-21 03:01:27 +0100462 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
465 if (newpos < 0 && !PyErr_Occurred())
466 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
467 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
468 PyErr_Clear();
469 PyErr_Format(PyExc_IndexError,
470 "position %zd from error handler out of bounds",
471 newpos);
472 goto errorexit;
473 }
474 buf->inbuf = buf->inbuf_top + newpos;
475 Py_DECREF(retobj);
476 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000477
478errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 Py_XDECREF(retobj);
480 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481}
482
483static PyObject *
484multibytecodec_encode(MultibyteCodec *codec,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 MultibyteCodec_State *state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200486 PyObject *text, Py_ssize_t *inpos_t,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000488{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 MultibyteEncodeBuffer buf;
490 Py_ssize_t finalsize, r = 0;
Victor Stinnerd9491262013-04-14 02:06:32 +0200491 Py_ssize_t datalen;
492 int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300493 const void *data;
Victor Stinnerd9491262013-04-14 02:06:32 +0200494
495 if (PyUnicode_READY(text) < 0)
496 return NULL;
497 datalen = PyUnicode_GET_LENGTH(text);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000498
Victor Stinner6bcbef72011-05-24 22:17:55 +0200499 if (datalen == 0 && !(flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 return PyBytes_FromStringAndSize(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 buf.excobj = NULL;
Brett Cannonb94767f2011-02-22 20:15:44 +0000503 buf.outobj = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200504 buf.inobj = text; /* borrowed reference */
505 buf.inpos = 0;
506 buf.inlen = datalen;
507 kind = PyUnicode_KIND(buf.inobj);
508 data = PyUnicode_DATA(buf.inobj);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
511 PyErr_NoMemory();
512 goto errorexit;
513 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000514
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
516 if (buf.outobj == NULL)
517 goto errorexit;
518 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
519 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000520
Victor Stinnerd9491262013-04-14 02:06:32 +0200521 while (buf.inpos < buf.inlen) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 /* we don't reuse inleft and outleft here.
523 * error callbacks can relocate the cursor anywhere on buffer*/
Victor Stinnerd9491262013-04-14 02:06:32 +0200524 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
525
526 r = codec->encode(state, codec->config,
527 kind, data,
528 &buf.inpos, buf.inlen,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 &buf.outbuf, outleft, flags);
530 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
531 break;
532 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
533 goto errorexit;
534 else if (r == MBERR_TOOFEW)
535 break;
536 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000537
Victor Stinner6bcbef72011-05-24 22:17:55 +0200538 if (codec->encreset != NULL && (flags & MBENC_RESET))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 for (;;) {
540 Py_ssize_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
543 r = codec->encreset(state, codec->config, &buf.outbuf,
544 outleft);
545 if (r == 0)
546 break;
547 else if (multibytecodec_encerror(codec, state,
548 &buf, errors, r))
549 goto errorexit;
550 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000551
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 finalsize = (Py_ssize_t)((char *)buf.outbuf -
553 PyBytes_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000554
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
556 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
557 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000558
Victor Stinnerd9491262013-04-14 02:06:32 +0200559 if (inpos_t)
560 *inpos_t = buf.inpos;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 Py_XDECREF(buf.excobj);
562 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000563
564errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 Py_XDECREF(buf.excobj);
566 Py_XDECREF(buf.outobj);
567 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000568}
569
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400570/*[clinic input]
571_multibytecodec.MultibyteCodec.encode
572
573 input: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300574 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400575
576Return an encoded string version of `input'.
577
578'errors' may be given to set a different error handling scheme. Default is
579'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
580values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
581registered with codecs.register_error that can handle UnicodeEncodeErrors.
582[clinic start generated code]*/
583
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000584static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400585_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
586 PyObject *input,
587 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300588/*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000589{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 MultibyteCodec_State state;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400591 PyObject *errorcb, *r, *ucvt;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000593
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400594 if (PyUnicode_Check(input))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 ucvt = NULL;
596 else {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400597 input = ucvt = PyObject_Str(input);
598 if (input == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 return NULL;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400600 else if (!PyUnicode_Check(input)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyErr_SetString(PyExc_TypeError,
602 "couldn't convert the object to unicode.");
603 Py_DECREF(ucvt);
604 return NULL;
605 }
606 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000607
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400608 if (PyUnicode_READY(input) < 0) {
Victor Stinner9a80fab2011-11-21 02:50:14 +0100609 Py_XDECREF(ucvt);
610 return NULL;
611 }
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400612 datalen = PyUnicode_GET_LENGTH(input);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 errorcb = internal_error_callback(errors);
615 if (errorcb == NULL) {
616 Py_XDECREF(ucvt);
617 return NULL;
618 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 if (self->codec->encinit != NULL &&
621 self->codec->encinit(&state, self->codec->config) != 0)
622 goto errorexit;
623 r = multibytecodec_encode(self->codec, &state,
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400624 input, NULL, errorcb,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000625 MBENC_FLUSH | MBENC_RESET);
626 if (r == NULL)
627 goto errorexit;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 ERROR_DECREF(errorcb);
630 Py_XDECREF(ucvt);
631 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
633errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 ERROR_DECREF(errorcb);
635 Py_XDECREF(ucvt);
636 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000637}
638
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400639/*[clinic input]
640_multibytecodec.MultibyteCodec.decode
641
642 input: Py_buffer
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300643 errors: str(accept={str, NoneType}) = None
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400644
645Decodes 'input'.
646
647'errors' may be given to set a different error handling scheme. Default is
648'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
649values are 'ignore' and 'replace' as well as any other name registered with
650codecs.register_error that is able to handle UnicodeDecodeErrors."
651[clinic start generated code]*/
652
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000653static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400654_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
655 Py_buffer *input,
656 const char *errors)
Serhiy Storchaka279f4462019-09-14 12:24:05 +0300657/*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000658{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000659 MultibyteCodec_State state;
660 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200661 PyObject *errorcb, *res;
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400662 const char *data;
Victor Stinnera0dd0212013-04-11 22:09:04 +0200663 Py_ssize_t datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000664
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400665 data = input->buf;
666 datalen = input->len;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 errorcb = internal_error_callback(errors);
669 if (errorcb == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 return NULL;
671 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 if (datalen == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 ERROR_DECREF(errorcb);
Victor Stinnerb37b1742011-12-01 03:18:59 +0100675 return make_tuple(PyUnicode_New(0, 0), 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000677
Victor Stinner8f674cc2013-04-17 23:02:17 +0200678 _PyUnicodeWriter_Init(&buf.writer);
679 buf.writer.min_length = datalen;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 buf.excobj = NULL;
681 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
682 buf.inbuf_end = buf.inbuf_top + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 if (self->codec->decinit != NULL &&
685 self->codec->decinit(&state, self->codec->config) != 0)
686 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 while (buf.inbuf < buf.inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200689 Py_ssize_t inleft, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 r = self->codec->decode(&state, self->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200694 &buf.inbuf, inleft, &buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 if (r == 0)
696 break;
697 else if (multibytecodec_decerror(self->codec, &state,
698 &buf, errorcb, r))
699 goto errorexit;
700 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000701
Victor Stinnera0dd0212013-04-11 22:09:04 +0200702 res = _PyUnicodeWriter_Finish(&buf.writer);
703 if (res == NULL)
704 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000705
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 Py_XDECREF(buf.excobj);
707 ERROR_DECREF(errorcb);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200708 return make_tuple(res, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000709
710errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 ERROR_DECREF(errorcb);
712 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +0200713 _PyUnicodeWriter_Dealloc(&buf.writer);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716}
717
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000718static struct PyMethodDef multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400719 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
720 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
721 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000722};
723
Victor Stinner11ef53a2021-01-08 15:43:59 +0100724static int
725multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
726{
727 Py_VISIT(Py_TYPE(self));
728 return 0;
729}
730
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000731static void
732multibytecodec_dealloc(MultibyteCodecObject *self)
733{
Victor Stinner11ef53a2021-01-08 15:43:59 +0100734 PyObject_GC_UnTrack(self);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100735 PyTypeObject *tp = Py_TYPE(self);
Victor Stinner11ef53a2021-01-08 15:43:59 +0100736 tp->tp_free(self);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100737 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000738}
739
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100740static PyType_Slot multibytecodec_slots[] = {
741 {Py_tp_dealloc, multibytecodec_dealloc},
742 {Py_tp_getattro, PyObject_GenericGetAttr},
743 {Py_tp_methods, multibytecodec_methods},
Victor Stinner11ef53a2021-01-08 15:43:59 +0100744 {Py_tp_traverse, multibytecodec_traverse},
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100745 {0, NULL},
746};
747
748static PyType_Spec multibytecodec_spec = {
749 .name = MODULE_NAME ".MultibyteCodec",
750 .basicsize = sizeof(MultibyteCodecObject),
Erlend Egeberg Aasland9746cda2021-04-30 16:04:57 +0200751 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700752 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +0100753 .slots = multibytecodec_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000754};
755
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000756
757/**
758 * Utility functions for stateful codec mechanism
759 */
760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761#define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
762#define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000763
764static PyObject *
765encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 PyObject *unistr, int final)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000767{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 PyObject *ucvt, *r = NULL;
Victor Stinnerd9491262013-04-14 02:06:32 +0200769 PyObject *inbuf = NULL;
770 Py_ssize_t inpos, datalen;
771 PyObject *origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 if (PyUnicode_Check(unistr))
774 ucvt = NULL;
775 else {
776 unistr = ucvt = PyObject_Str(unistr);
777 if (unistr == NULL)
778 return NULL;
779 else if (!PyUnicode_Check(unistr)) {
780 PyErr_SetString(PyExc_TypeError,
Benjamin Petersonaff47232012-12-02 10:53:41 -0500781 "couldn't convert the object to str.");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 Py_DECREF(ucvt);
783 return NULL;
784 }
785 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000786
Victor Stinnerd9491262013-04-14 02:06:32 +0200787 if (ctx->pending) {
788 PyObject *inbuf_tmp;
789
790 Py_INCREF(ctx->pending);
791 origpending = ctx->pending;
792
793 Py_INCREF(ctx->pending);
794 inbuf_tmp = ctx->pending;
795 PyUnicode_Append(&inbuf_tmp, unistr);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 if (inbuf_tmp == NULL)
797 goto errorexit;
Victor Stinnerd9491262013-04-14 02:06:32 +0200798 Py_CLEAR(ctx->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 inbuf = inbuf_tmp;
800 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200801 else {
802 origpending = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000803
Victor Stinnerd9491262013-04-14 02:06:32 +0200804 Py_INCREF(unistr);
805 inbuf = unistr;
806 }
807 if (PyUnicode_READY(inbuf) < 0)
808 goto errorexit;
809 inpos = 0;
810 datalen = PyUnicode_GET_LENGTH(inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 r = multibytecodec_encode(ctx->codec, &ctx->state,
Victor Stinnerd9491262013-04-14 02:06:32 +0200813 inbuf, &inpos,
814 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 if (r == NULL) {
816 /* recover the original pending buffer */
Serhiy Storchaka48842712016-04-06 09:45:48 +0300817 Py_XSETREF(ctx->pending, origpending);
Victor Stinnerd9491262013-04-14 02:06:32 +0200818 origpending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 goto errorexit;
820 }
Victor Stinner322cc742013-04-14 18:11:41 +0200821 Py_XDECREF(origpending);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000822
Victor Stinnerd9491262013-04-14 02:06:32 +0200823 if (inpos < datalen) {
824 if (datalen - inpos > MAXENCPENDING) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 /* normal codecs can't reach here */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 PyErr_SetString(PyExc_UnicodeError,
827 "pending buffer overflow");
828 goto errorexit;
829 }
Victor Stinnerd9491262013-04-14 02:06:32 +0200830 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
831 if (ctx->pending == NULL) {
832 /* normal codecs can't reach here */
833 goto errorexit;
834 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000836
Victor Stinner322cc742013-04-14 18:11:41 +0200837 Py_DECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 Py_XDECREF(ucvt);
839 return r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000840
841errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 Py_XDECREF(r);
843 Py_XDECREF(ucvt);
Victor Stinnerd9491262013-04-14 02:06:32 +0200844 Py_XDECREF(origpending);
Victor Stinner322cc742013-04-14 18:11:41 +0200845 Py_XDECREF(inbuf);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000847}
848
849static int
850decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000852{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 Py_ssize_t npendings;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
856 if (npendings + ctx->pendingsize > MAXDECPENDING ||
857 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
858 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
859 return -1;
860 }
861 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
862 ctx->pendingsize += npendings;
863 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000864}
865
866static int
867decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 Py_ssize_t size)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
871 buf->inbuf_end = buf->inbuf_top + size;
Victor Stinner8f674cc2013-04-17 23:02:17 +0200872 buf->writer.min_length += size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000874}
875
876static int
877decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 MultibyteDecodeBuffer *buf)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 while (buf->inbuf < buf->inbuf_end) {
Victor Stinnera0dd0212013-04-11 22:09:04 +0200881 Py_ssize_t inleft;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 Py_ssize_t r;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000883
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000885
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
Victor Stinnera0dd0212013-04-11 22:09:04 +0200887 &buf->inbuf, inleft, &buf->writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 if (r == 0 || r == MBERR_TOOFEW)
889 break;
890 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
891 buf, ctx->errors, r))
892 return -1;
893 }
894 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000895}
896
897
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400898/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400899_multibytecodec.MultibyteIncrementalEncoder.encode
900
901 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200902 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400903[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000904
905static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -0400906_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
907 PyObject *input,
908 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200909/*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000910{
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400911 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000912}
913
Brett Cannonf2de1fc2014-08-22 11:45:03 -0400914/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000915_multibytecodec.MultibyteIncrementalEncoder.getstate
916[clinic start generated code]*/
917
918static PyObject *
919_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
920/*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
921{
922 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
923 for UTF-8 encoded buffer (each character can use up to 4
924 bytes), and required bytes for MultibyteCodec_State.c. A byte
925 array is used to avoid different compilers generating different
926 values for the same state, e.g. as a result of struct padding.
927 */
928 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
929 Py_ssize_t statesize;
930 const char *pendingbuffer = NULL;
931 Py_ssize_t pendingsize;
932
933 if (self->pending != NULL) {
934 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
935 if (pendingbuffer == NULL) {
936 return NULL;
937 }
938 if (pendingsize > MAXENCPENDING*4) {
939 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
940 return NULL;
941 }
Victor Stinnercdbcb772018-11-22 10:25:46 +0100942 statebytes[0] = (unsigned char)pendingsize;
943 memcpy(statebytes + 1, pendingbuffer, pendingsize);
Christopher Thorneac22f6a2018-11-01 10:48:49 +0000944 statesize = 1 + pendingsize;
945 } else {
946 statebytes[0] = 0;
947 statesize = 1;
948 }
949 memcpy(statebytes+statesize, self->state.c,
950 sizeof(self->state.c));
951 statesize += sizeof(self->state.c);
952
953 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
954 1 /* little-endian */ ,
955 0 /* unsigned */ );
956}
957
958/*[clinic input]
959_multibytecodec.MultibyteIncrementalEncoder.setstate
960 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
961 /
962[clinic start generated code]*/
963
964static PyObject *
965_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
966 PyLongObject *statelong)
967/*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
968{
969 PyObject *pending = NULL;
970 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
971
972 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
973 1 /* little-endian */ ,
974 0 /* unsigned */ ) < 0) {
975 goto errorexit;
976 }
977
978 if (statebytes[0] > MAXENCPENDING*4) {
979 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
980 return NULL;
981 }
982
983 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
984 statebytes[0], "strict");
985 if (pending == NULL) {
986 goto errorexit;
987 }
988
989 Py_CLEAR(self->pending);
990 self->pending = pending;
991 memcpy(self->state.c, statebytes+1+statebytes[0],
992 sizeof(self->state.c));
993
994 Py_RETURN_NONE;
995
996errorexit:
997 Py_XDECREF(pending);
998 return NULL;
999}
1000
1001/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001002_multibytecodec.MultibyteIncrementalEncoder.reset
1003[clinic start generated code]*/
1004
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001005static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001006_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1007/*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001008{
Victor Stinnere15dce32011-05-30 22:56:00 +02001009 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1010 unsigned char buffer[4], *outbuf;
1011 Py_ssize_t r;
1012 if (self->codec->encreset != NULL) {
1013 outbuf = buffer;
1014 r = self->codec->encreset(&self->state, self->codec->config,
1015 &outbuf, sizeof(buffer));
1016 if (r != 0)
1017 return NULL;
1018 }
Victor Stinnerd9491262013-04-14 02:06:32 +02001019 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001021}
1022
1023static struct PyMethodDef mbiencoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001024 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001025 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1026 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001027 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1028 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001029};
1030
1031static PyObject *
1032mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 MultibyteIncrementalEncoderObject *self;
1035 PyObject *codec = NULL;
1036 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1039 incnewkwarglist, &errors))
1040 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1043 if (self == NULL)
1044 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1047 if (codec == NULL)
1048 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001049
1050 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1051 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1053 goto errorexit;
1054 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 self->codec = ((MultibyteCodecObject *)codec)->codec;
Victor Stinnerd9491262013-04-14 02:06:32 +02001057 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 self->errors = internal_error_callback(errors);
1059 if (self->errors == NULL)
1060 goto errorexit;
1061 if (self->codec->encinit != NULL &&
1062 self->codec->encinit(&self->state, self->codec->config) != 0)
1063 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 Py_DECREF(codec);
1066 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001067
1068errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 Py_XDECREF(self);
1070 Py_XDECREF(codec);
1071 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001072}
1073
1074static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001075mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1076{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001078}
1079
1080static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001081mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001083{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 if (ERROR_ISCUSTOM(self->errors))
1085 Py_VISIT(self->errors);
1086 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001087}
1088
1089static void
1090mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1091{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001092 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 PyObject_GC_UnTrack(self);
1094 ERROR_DECREF(self->errors);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001095 Py_CLEAR(self->pending);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001096 tp->tp_free(self);
1097 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001098}
1099
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001100static PyType_Slot encoder_slots[] = {
1101 {Py_tp_dealloc, mbiencoder_dealloc},
1102 {Py_tp_getattro, PyObject_GenericGetAttr},
1103 {Py_tp_traverse, mbiencoder_traverse},
1104 {Py_tp_methods, mbiencoder_methods},
1105 {Py_tp_getset, codecctx_getsets},
1106 {Py_tp_init, mbiencoder_init},
1107 {Py_tp_new, mbiencoder_new},
1108 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001109};
1110
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001111static PyType_Spec encoder_spec = {
1112 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1113 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001114 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1115 Py_TPFLAGS_IMMUTABLETYPE),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001116 .slots = encoder_slots,
1117};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001118
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001119
1120/*[clinic input]
1121_multibytecodec.MultibyteIncrementalDecoder.decode
1122
1123 input: Py_buffer
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001124 final: bool(accept={int}) = False
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001125[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001126
1127static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001128_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1129 Py_buffer *input,
1130 int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001131/*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 MultibyteDecodeBuffer buf;
1134 char *data, *wdata = NULL;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001135 Py_ssize_t wsize, size, origpending;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001136 PyObject *res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001137
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001138 data = input->buf;
1139 size = input->len;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001140
Victor Stinner8f674cc2013-04-17 23:02:17 +02001141 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001142 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 origpending = self->pendingsize;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 if (self->pendingsize == 0) {
1146 wsize = size;
1147 wdata = data;
1148 }
1149 else {
1150 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1151 PyErr_NoMemory();
1152 goto errorexit;
1153 }
1154 wsize = size + self->pendingsize;
1155 wdata = PyMem_Malloc(wsize);
Victor Stinner33283ba2013-07-15 17:47:39 +02001156 if (wdata == NULL) {
1157 PyErr_NoMemory();
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 goto errorexit;
Victor Stinner33283ba2013-07-15 17:47:39 +02001159 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 memcpy(wdata, self->pending, self->pendingsize);
1161 memcpy(wdata + self->pendingsize, data, size);
1162 self->pendingsize = 0;
1163 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1166 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1169 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 if (final && buf.inbuf < buf.inbuf_end) {
1172 if (multibytecodec_decerror(self->codec, &self->state,
1173 &buf, self->errors, MBERR_TOOFEW)) {
1174 /* recover the original pending buffer */
1175 memcpy(self->pending, wdata, origpending);
1176 self->pendingsize = origpending;
1177 goto errorexit;
1178 }
1179 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1182 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1183 goto errorexit;
1184 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001185
Victor Stinnera0dd0212013-04-11 22:09:04 +02001186 res = _PyUnicodeWriter_Finish(&buf.writer);
1187 if (res == NULL)
1188 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 if (wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001191 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001193 return res;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001194
1195errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (wdata != NULL && wdata != data)
Victor Stinner00d7abd2020-12-01 09:56:42 +01001197 PyMem_Free(wdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001199 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001201}
1202
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001203/*[clinic input]
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001204_multibytecodec.MultibyteIncrementalDecoder.getstate
1205[clinic start generated code]*/
1206
1207static PyObject *
1208_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1209/*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1210{
1211 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001212 PyObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001213
1214 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1215 self->pendingsize);
1216 if (buffer == NULL) {
1217 return NULL;
1218 }
1219
Christopher Thorne488c0a62018-11-02 03:29:40 +00001220 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1221 sizeof(self->state.c),
1222 1 /* little-endian */ ,
1223 0 /* unsigned */ );
1224 if (statelong == NULL) {
1225 Py_DECREF(buffer);
1226 return NULL;
1227 }
1228
1229 return Py_BuildValue("NN", buffer, statelong);
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001230}
1231
1232/*[clinic input]
1233_multibytecodec.MultibyteIncrementalDecoder.setstate
1234 state: object(subclass_of='&PyTuple_Type')
1235 /
1236[clinic start generated code]*/
1237
1238static PyObject *
1239_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1240 PyObject *state)
1241/*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1242{
1243 PyObject *buffer;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001244 PyLongObject *statelong;
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001245 Py_ssize_t buffersize;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03001246 const char *bufferstr;
Christopher Thorne488c0a62018-11-02 03:29:40 +00001247 unsigned char statebytes[8];
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001248
Christopher Thorne488c0a62018-11-02 03:29:40 +00001249 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1250 &buffer, &PyLong_Type, &statelong))
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001251 {
1252 return NULL;
1253 }
1254
Christopher Thorne488c0a62018-11-02 03:29:40 +00001255 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1256 1 /* little-endian */ ,
1257 0 /* unsigned */ ) < 0) {
1258 return NULL;
1259 }
1260
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001261 buffersize = PyBytes_Size(buffer);
1262 if (buffersize == -1) {
1263 return NULL;
1264 }
1265
1266 if (buffersize > MAXDECPENDING) {
1267 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1268 return NULL;
1269 }
1270
1271 bufferstr = PyBytes_AsString(buffer);
1272 if (bufferstr == NULL) {
1273 return NULL;
1274 }
1275 self->pendingsize = buffersize;
1276 memcpy(self->pending, bufferstr, self->pendingsize);
Christopher Thorne488c0a62018-11-02 03:29:40 +00001277 memcpy(self->state.c, statebytes, sizeof(statebytes));
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001278
1279 Py_RETURN_NONE;
1280}
1281
1282/*[clinic input]
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001283_multibytecodec.MultibyteIncrementalDecoder.reset
1284[clinic start generated code]*/
1285
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001286static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001287_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1288/*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 if (self->codec->decreset != NULL &&
1291 self->codec->decreset(&self->state, self->codec->config) != 0)
1292 return NULL;
1293 self->pendingsize = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001294
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001296}
1297
1298static struct PyMethodDef mbidecoder_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001299 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
Christopher Thorneac22f6a2018-11-01 10:48:49 +00001300 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1301 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001302 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1303 {NULL, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001304};
1305
1306static PyObject *
1307mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 MultibyteIncrementalDecoderObject *self;
1310 PyObject *codec = NULL;
1311 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1314 incnewkwarglist, &errors))
1315 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001316
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1318 if (self == NULL)
1319 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1322 if (codec == NULL)
1323 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001324
1325 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1326 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1328 goto errorexit;
1329 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 self->codec = ((MultibyteCodecObject *)codec)->codec;
1332 self->pendingsize = 0;
1333 self->errors = internal_error_callback(errors);
1334 if (self->errors == NULL)
1335 goto errorexit;
1336 if (self->codec->decinit != NULL &&
1337 self->codec->decinit(&self->state, self->codec->config) != 0)
1338 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 Py_DECREF(codec);
1341 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001342
1343errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 Py_XDECREF(self);
1345 Py_XDECREF(codec);
1346 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001347}
1348
1349static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001350mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001353}
1354
1355static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001356mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 if (ERROR_ISCUSTOM(self->errors))
1360 Py_VISIT(self->errors);
1361 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001362}
1363
1364static void
1365mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1366{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001367 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 PyObject_GC_UnTrack(self);
1369 ERROR_DECREF(self->errors);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001370 tp->tp_free(self);
1371 Py_DECREF(tp);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001372}
1373
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001374static PyType_Slot decoder_slots[] = {
1375 {Py_tp_dealloc, mbidecoder_dealloc},
1376 {Py_tp_getattro, PyObject_GenericGetAttr},
1377 {Py_tp_traverse, mbidecoder_traverse},
1378 {Py_tp_methods, mbidecoder_methods},
1379 {Py_tp_getset, codecctx_getsets},
1380 {Py_tp_init, mbidecoder_init},
1381 {Py_tp_new, mbidecoder_new},
1382 {0, NULL},
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001383};
1384
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001385static PyType_Spec decoder_spec = {
1386 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1387 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001388 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1389 Py_TPFLAGS_IMMUTABLETYPE),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001390 .slots = decoder_slots,
1391};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001392
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001393static PyObject *
1394mbstreamreader_iread(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 const char *method, Py_ssize_t sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 MultibyteDecodeBuffer buf;
Victor Stinnera0dd0212013-04-11 22:09:04 +02001398 PyObject *cres, *res;
1399 Py_ssize_t rsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (sizehint == 0)
Victor Stinnerb37b1742011-12-01 03:18:59 +01001402 return PyUnicode_New(0, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001403
Victor Stinner8f674cc2013-04-17 23:02:17 +02001404 _PyUnicodeWriter_Init(&buf.writer);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001405 buf.excobj = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 for (;;) {
1409 int endoffile;
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 if (sizehint < 0)
1412 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001413 method, NULL);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 else
1415 cres = PyObject_CallMethod(self->stream,
Serhiy Storchaka20b39b22014-09-28 11:27:24 +03001416 method, "i", sizehint);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (cres == NULL)
1418 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (!PyBytes_Check(cres)) {
1421 PyErr_Format(PyExc_TypeError,
1422 "stream function returned a "
1423 "non-bytes object (%.100s)",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001424 Py_TYPE(cres)->tp_name);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 goto errorexit;
1426 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 endoffile = (PyBytes_GET_SIZE(cres) == 0);
Guido van Rossumcd16bf62007-06-13 18:07:49 +00001429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (self->pendingsize > 0) {
1431 PyObject *ctr;
1432 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001433
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1435 PyErr_NoMemory();
1436 goto errorexit;
Victor Stinner064bbdc2013-07-08 22:28:27 +02001437 }
1438 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1439 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1440 if (ctr == NULL)
1441 goto errorexit;
1442 ctrdata = PyBytes_AS_STRING(ctr);
1443 memcpy(ctrdata, self->pending, self->pendingsize);
1444 memcpy(ctrdata + self->pendingsize,
1445 PyBytes_AS_STRING(cres),
1446 PyBytes_GET_SIZE(cres));
1447 Py_DECREF(cres);
1448 cres = ctr;
1449 self->pendingsize = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001451
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 rsize = PyBytes_GET_SIZE(cres);
1453 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1454 rsize) != 0)
1455 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 if (rsize > 0 && decoder_feed_buffer(
1458 (MultibyteStatefulDecoderContext *)self, &buf))
1459 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (endoffile || sizehint < 0) {
1462 if (buf.inbuf < buf.inbuf_end &&
1463 multibytecodec_decerror(self->codec, &self->state,
1464 &buf, self->errors, MBERR_TOOFEW))
1465 goto errorexit;
1466 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1469 if (decoder_append_pending(STATEFUL_DCTX(self),
1470 &buf) != 0)
1471 goto errorexit;
1472 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 Py_DECREF(cres);
1475 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001476
Victor Stinnera0dd0212013-04-11 22:09:04 +02001477 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001479
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 sizehint = 1; /* read 1 more byte and retry */
1481 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001482
Victor Stinnera0dd0212013-04-11 22:09:04 +02001483 res = _PyUnicodeWriter_Finish(&buf.writer);
1484 if (res == NULL)
1485 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001486
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 Py_XDECREF(cres);
1488 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001489 return res;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001490
1491errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 Py_XDECREF(cres);
1493 Py_XDECREF(buf.excobj);
Victor Stinnera0dd0212013-04-11 22:09:04 +02001494 _PyUnicodeWriter_Dealloc(&buf.writer);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001496}
1497
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001498/*[clinic input]
1499 _multibytecodec.MultibyteStreamReader.read
1500
1501 sizeobj: object = None
1502 /
1503[clinic start generated code]*/
1504
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001505static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001506_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1507 PyObject *sizeobj)
1508/*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001511
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001512 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 size = -1;
1514 else if (PyLong_Check(sizeobj))
1515 size = PyLong_AsSsize_t(sizeobj);
1516 else {
1517 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1518 return NULL;
1519 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 if (size == -1 && PyErr_Occurred())
1522 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001524 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001525}
1526
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001527/*[clinic input]
1528 _multibytecodec.MultibyteStreamReader.readline
1529
1530 sizeobj: object = None
1531 /
1532[clinic start generated code]*/
1533
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001534static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001535_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1536 PyObject *sizeobj)
1537/*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 Py_ssize_t size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001540
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001541 if (sizeobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 size = -1;
1543 else if (PyLong_Check(sizeobj))
1544 size = PyLong_AsSsize_t(sizeobj);
1545 else {
1546 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1547 return NULL;
1548 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001549
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001550 if (size == -1 && PyErr_Occurred())
1551 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001554}
1555
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001556/*[clinic input]
1557 _multibytecodec.MultibyteStreamReader.readlines
1558
1559 sizehintobj: object = None
1560 /
1561[clinic start generated code]*/
1562
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001563static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001564_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1565 PyObject *sizehintobj)
1566/*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001567{
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001568 PyObject *r, *sr;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 Py_ssize_t sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001570
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001571 if (sizehintobj == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 sizehint = -1;
1573 else if (PyLong_Check(sizehintobj))
1574 sizehint = PyLong_AsSsize_t(sizehintobj);
1575 else {
1576 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1577 return NULL;
1578 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 if (sizehint == -1 && PyErr_Occurred())
1581 return NULL;
Guido van Rossumddefaf32007-01-14 03:31:43 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 r = mbstreamreader_iread(self, "read", sizehint);
1584 if (r == NULL)
1585 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 sr = PyUnicode_Splitlines(r, 1);
1588 Py_DECREF(r);
1589 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001590}
1591
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001592/*[clinic input]
1593 _multibytecodec.MultibyteStreamReader.reset
1594[clinic start generated code]*/
1595
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001596static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001597_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1598/*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001599{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 if (self->codec->decreset != NULL &&
1601 self->codec->decreset(&self->state, self->codec->config) != 0)
1602 return NULL;
1603 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001606}
1607
1608static struct PyMethodDef mbstreamreader_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001609 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1610 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1611 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1612 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001613 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001614};
1615
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001616static PyMemberDef mbstreamreader_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 {"stream", T_OBJECT,
1618 offsetof(MultibyteStreamReaderObject, stream),
1619 READONLY, NULL},
1620 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001621};
1622
1623static PyObject *
1624mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1625{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 MultibyteStreamReaderObject *self;
1627 PyObject *stream, *codec = NULL;
1628 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1631 streamkwarglist, &stream, &errors))
1632 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1635 if (self == NULL)
1636 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1639 if (codec == NULL)
1640 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001641
1642 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1643 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1645 goto errorexit;
1646 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 self->codec = ((MultibyteCodecObject *)codec)->codec;
1649 self->stream = stream;
1650 Py_INCREF(stream);
1651 self->pendingsize = 0;
1652 self->errors = internal_error_callback(errors);
1653 if (self->errors == NULL)
1654 goto errorexit;
1655 if (self->codec->decinit != NULL &&
1656 self->codec->decinit(&self->state, self->codec->config) != 0)
1657 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 Py_DECREF(codec);
1660 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001661
1662errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 Py_XDECREF(self);
1664 Py_XDECREF(codec);
1665 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001666}
1667
1668static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001669mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1670{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001672}
1673
1674static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001675mbstreamreader_traverse(MultibyteStreamReaderObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001677{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 if (ERROR_ISCUSTOM(self->errors))
1679 Py_VISIT(self->errors);
1680 Py_VISIT(self->stream);
1681 return 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001682}
1683
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001684static void
1685mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1686{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001687 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 PyObject_GC_UnTrack(self);
1689 ERROR_DECREF(self->errors);
1690 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001691 tp->tp_free(self);
1692 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001693}
1694
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001695static PyType_Slot reader_slots[] = {
1696 {Py_tp_dealloc, mbstreamreader_dealloc},
1697 {Py_tp_getattro, PyObject_GenericGetAttr},
1698 {Py_tp_traverse, mbstreamreader_traverse},
1699 {Py_tp_methods, mbstreamreader_methods},
1700 {Py_tp_members, mbstreamreader_members},
1701 {Py_tp_getset, codecctx_getsets},
1702 {Py_tp_init, mbstreamreader_init},
1703 {Py_tp_new, mbstreamreader_new},
1704 {0, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001705};
1706
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001707static PyType_Spec reader_spec = {
1708 .name = MODULE_NAME ".MultibyteStreamReader",
1709 .basicsize = sizeof(MultibyteStreamReaderObject),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001710 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1711 Py_TPFLAGS_IMMUTABLETYPE),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001712 .slots = reader_slots,
1713};
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001714
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001715static int
1716mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001717 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001718{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001719 PyObject *str, *wr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001721 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1722 if (str == NULL)
1723 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001724
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001725 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, str);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 Py_DECREF(str);
1727 if (wr == NULL)
1728 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 Py_DECREF(wr);
1731 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001732}
1733
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001734/*[clinic input]
1735 _multibytecodec.MultibyteStreamWriter.write
1736
1737 strobj: object
1738 /
1739[clinic start generated code]*/
1740
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001741static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001742_multibytecodec_MultibyteStreamWriter_write(MultibyteStreamWriterObject *self,
1743 PyObject *strobj)
1744/*[clinic end generated code: output=e13ae841c895251e input=551dc4c018c10a2b]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001745{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 if (mbstreamwriter_iwrite(self, strobj))
1747 return NULL;
1748 else
1749 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001750}
1751
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001752/*[clinic input]
1753 _multibytecodec.MultibyteStreamWriter.writelines
1754
1755 lines: object
1756 /
1757[clinic start generated code]*/
1758
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001759static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04001760_multibytecodec_MultibyteStreamWriter_writelines(MultibyteStreamWriterObject *self,
1761 PyObject *lines)
1762/*[clinic end generated code: output=e5c4285ac8e7d522 input=57797fe7008d4e96]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001763{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 PyObject *strobj;
1765 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 if (!PySequence_Check(lines)) {
1768 PyErr_SetString(PyExc_TypeError,
1769 "arg must be a sequence object");
1770 return NULL;
1771 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 for (i = 0; i < PySequence_Length(lines); i++) {
1774 /* length can be changed even within this loop */
1775 strobj = PySequence_GetItem(lines, i);
1776 if (strobj == NULL)
1777 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001779 r = mbstreamwriter_iwrite(self, strobj);
1780 Py_DECREF(strobj);
1781 if (r == -1)
1782 return NULL;
1783 }
Serhiy Storchakabf623ae2017-04-19 20:03:52 +03001784 /* PySequence_Length() can fail */
1785 if (PyErr_Occurred())
1786 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788 Py_RETURN_NONE;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001789}
1790
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001791/*[clinic input]
1792 _multibytecodec.MultibyteStreamWriter.reset
1793[clinic start generated code]*/
1794
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001795static PyObject *
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001796_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self)
1797/*[clinic end generated code: output=8f54a4d9b03db5ff input=b56dbcbaf35cc10c]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001798{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001800
Victor Stinner579db162015-07-16 22:17:31 +02001801 if (!self->pending)
1802 Py_RETURN_NONE;
1803
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 pwrt = multibytecodec_encode(self->codec, &self->state,
Victor Stinnerd9491262013-04-14 02:06:32 +02001805 self->pending, NULL, self->errors,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 MBENC_FLUSH | MBENC_RESET);
1807 /* some pending buffer can be truncated when UnicodeEncodeError is
1808 * raised on 'strict' mode. but, 'reset' method is designed to
1809 * reset the pending buffer or states so failed string sequence
1810 * ought to be missed */
Victor Stinnerd9491262013-04-14 02:06:32 +02001811 Py_CLEAR(self->pending);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 if (pwrt == NULL)
1813 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001814
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 assert(PyBytes_Check(pwrt));
1816 if (PyBytes_Size(pwrt) > 0) {
1817 PyObject *wr;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001818
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001819 wr = _PyObject_CallMethodIdOneArg(self->stream, &PyId_write, pwrt);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001820 if (wr == NULL) {
1821 Py_DECREF(pwrt);
1822 return NULL;
1823 }
1824 }
1825 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 Py_RETURN_NONE;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001828}
1829
1830static PyObject *
1831mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1832{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001833 MultibyteStreamWriterObject *self;
1834 PyObject *stream, *codec = NULL;
1835 char *errors = NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001837 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1838 streamkwarglist, &stream, &errors))
1839 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1842 if (self == NULL)
1843 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1846 if (codec == NULL)
1847 goto errorexit;
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001848
1849 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1850 if (!MultibyteCodec_Check(state, codec)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1852 goto errorexit;
1853 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001855 self->codec = ((MultibyteCodecObject *)codec)->codec;
1856 self->stream = stream;
1857 Py_INCREF(stream);
Victor Stinnerd9491262013-04-14 02:06:32 +02001858 self->pending = NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001859 self->errors = internal_error_callback(errors);
1860 if (self->errors == NULL)
1861 goto errorexit;
1862 if (self->codec->encinit != NULL &&
1863 self->codec->encinit(&self->state, self->codec->config) != 0)
1864 goto errorexit;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001865
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001866 Py_DECREF(codec);
1867 return (PyObject *)self;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001868
1869errorexit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001870 Py_XDECREF(self);
1871 Py_XDECREF(codec);
1872 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001873}
1874
1875static int
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001876mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001878 return 0;
Hye-Shik Chang13247bf2006-04-21 16:21:44 +00001879}
1880
1881static int
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001882mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001883 visitproc visit, void *arg)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001884{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 if (ERROR_ISCUSTOM(self->errors))
1886 Py_VISIT(self->errors);
1887 Py_VISIT(self->stream);
1888 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001889}
1890
1891static void
1892mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1893{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001894 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001895 PyObject_GC_UnTrack(self);
1896 ERROR_DECREF(self->errors);
1897 Py_XDECREF(self->stream);
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001898 tp->tp_free(self);
1899 Py_DECREF(tp);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001900}
1901
1902static struct PyMethodDef mbstreamwriter_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001903 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1904 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1905 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1906 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001907};
1908
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001909static PyMemberDef mbstreamwriter_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001910 {"stream", T_OBJECT,
1911 offsetof(MultibyteStreamWriterObject, stream),
1912 READONLY, NULL},
1913 {NULL,}
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001914};
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001915
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001916static PyType_Slot writer_slots[] = {
1917 {Py_tp_dealloc, mbstreamwriter_dealloc},
1918 {Py_tp_getattro, PyObject_GenericGetAttr},
1919 {Py_tp_traverse, mbstreamwriter_traverse},
1920 {Py_tp_methods, mbstreamwriter_methods},
1921 {Py_tp_members, mbstreamwriter_members},
1922 {Py_tp_getset, codecctx_getsets},
1923 {Py_tp_init, mbstreamwriter_init},
1924 {Py_tp_new, mbstreamwriter_new},
1925 {0, NULL},
1926};
1927
1928static PyType_Spec writer_spec = {
1929 .name = MODULE_NAME ".MultibyteStreamWriter",
1930 .basicsize = sizeof(MultibyteStreamWriterObject),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001931 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1932 Py_TPFLAGS_IMMUTABLETYPE),
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001933 .slots = writer_slots,
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001934};
1935
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001936
Brett Cannonf2de1fc2014-08-22 11:45:03 -04001937/*[clinic input]
1938_multibytecodec.__create_codec
1939
1940 arg: object
1941 /
1942[clinic start generated code]*/
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001943
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001944static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03001945_multibytecodec___create_codec(PyObject *module, PyObject *arg)
1946/*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001947{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001948 MultibyteCodecObject *self;
1949 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1952 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1953 return NULL;
1954 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1957 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1958 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001959
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001960 _multibytecodec_state *state = _multibytecodec_get_state(module);
Victor Stinner11ef53a2021-01-08 15:43:59 +01001961 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 if (self == NULL)
1963 return NULL;
1964 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001965
Victor Stinner11ef53a2021-01-08 15:43:59 +01001966 PyObject_GC_Track(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001968}
1969
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01001970static int
1971_multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1972{
1973 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1974 Py_VISIT(state->multibytecodec_type);
1975 Py_VISIT(state->encoder_type);
1976 Py_VISIT(state->decoder_type);
1977 Py_VISIT(state->reader_type);
1978 Py_VISIT(state->writer_type);
1979 return 0;
1980}
1981
1982static int
1983_multibytecodec_clear(PyObject *mod)
1984{
1985 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1986 Py_CLEAR(state->multibytecodec_type);
1987 Py_CLEAR(state->encoder_type);
1988 Py_CLEAR(state->decoder_type);
1989 Py_CLEAR(state->reader_type);
1990 Py_CLEAR(state->writer_type);
1991 return 0;
1992}
1993
1994static void
1995_multibytecodec_free(void *mod)
1996{
1997 _multibytecodec_clear((PyObject *)mod);
1998}
1999
2000#define CREATE_TYPE(module, type, spec) \
2001 do { \
2002 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2003 if (!type) { \
2004 return -1; \
2005 } \
2006 } while (0)
2007
2008#define ADD_TYPE(module, type) \
2009 do { \
2010 if (PyModule_AddType(module, type) < 0) { \
2011 return -1; \
2012 } \
2013 } while (0)
2014
2015static int
2016_multibytecodec_exec(PyObject *mod)
2017{
2018 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2019 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2020 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2021 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2022 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2023 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2024
2025 ADD_TYPE(mod, state->encoder_type);
2026 ADD_TYPE(mod, state->decoder_type);
2027 ADD_TYPE(mod, state->reader_type);
2028 ADD_TYPE(mod, state->writer_type);
2029 return 0;
2030}
2031
2032#undef CREATE_TYPE
2033#undef ADD_TYPE
2034
2035static struct PyMethodDef _multibytecodec_methods[] = {
Brett Cannonf2de1fc2014-08-22 11:45:03 -04002036 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002038};
2039
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002040static PyModuleDef_Slot _multibytecodec_slots[] = {
2041 {Py_mod_exec, _multibytecodec_exec},
2042 {0, NULL}
2043};
Martin v. Löwis1a214512008-06-11 05:26:20 +00002044
2045static struct PyModuleDef _multibytecodecmodule = {
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002046 .m_base = PyModuleDef_HEAD_INIT,
2047 .m_name = "_multibytecodec",
2048 .m_size = sizeof(_multibytecodec_state),
2049 .m_methods = _multibytecodec_methods,
2050 .m_slots = _multibytecodec_slots,
2051 .m_traverse = _multibytecodec_traverse,
2052 .m_clear = _multibytecodec_clear,
2053 .m_free = _multibytecodec_free,
Martin v. Löwis1a214512008-06-11 05:26:20 +00002054};
2055
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002056PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002057PyInit__multibytecodec(void)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002058{
Erlend Egeberg Aasland0b858cd2021-01-04 22:33:45 +01002059 return PyModuleDef_Init(&_multibytecodecmodule);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00002060}