blob: 58b16a3ab7fa2558ce1cdb5dc757988c3206ce55 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00005 * $CJKCodecs: multibytecodec.c,v 1.12 2004/06/27 19:24:13 perky Exp $
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00006 */
7
8#include "Python.h"
9#include "multibytecodec.h"
10
11
12typedef struct {
13 const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
14 unsigned char *outbuf, *outbuf_end;
15 PyObject *excobj, *outobj;
16} MultibyteEncodeBuffer;
17
18typedef struct {
19 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
20 Py_UNICODE *outbuf, *outbuf_end;
21 PyObject *excobj, *outobj;
22} MultibyteDecodeBuffer;
23
24PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
25"I.encode(unicode[, errors]) -> (string, length consumed)\n\
26\n\
27Return an encoded string version of `unicode'. errors may be given to\n\
28set a different error handling scheme. Default is 'strict' meaning that\n\
29encoding errors raise a UnicodeEncodeError. Other possible values are\n\
30'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
31registered with codecs.register_error that can handle UnicodeEncodeErrors.");
32
33PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
34"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
35\n\
36Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
37to set a different error handling scheme. Default is 'strict' meaning\n\
38that encoding errors raise a UnicodeDecodeError. Other possible values\n\
39are 'ignore' and 'replace' as well as any other name registerd with\n\
40codecs.register_error that is able to handle UnicodeDecodeErrors.");
41
42PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
43"I.StreamReader(stream[, errors]) -> StreamReader instance");
44
45PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
46"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
47
48static char *codeckwarglist[] = {"input", "errors", NULL};
49static char *streamkwarglist[] = {"stream", "errors", NULL};
50
51static PyObject *multibytecodec_encode(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000052 MultibyteCodec_State *, const Py_UNICODE **, size_t,
53 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000054static PyObject *mbstreamreader_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000055 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000056static PyObject *mbstreamwriter_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000057 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000058
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000059#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000060
61static PyObject *
62make_tuple(PyObject *unicode, int len)
63{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000064 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000065
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000066 if (unicode == NULL)
67 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000068
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000069 v = PyTuple_New(2);
70 if (v == NULL) {
71 Py_DECREF(unicode);
72 return NULL;
73 }
74 PyTuple_SET_ITEM(v, 0, unicode);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000075
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000076 w = PyInt_FromLong(len);
77 if (w == NULL) {
78 Py_DECREF(v);
79 return NULL;
80 }
81 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000082
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000083 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000084}
85
86static PyObject *
87get_errorcallback(const char *errors)
88{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000089 if (errors == NULL || strcmp(errors, "strict") == 0)
90 return ERROR_STRICT;
91 else if (strcmp(errors, "ignore") == 0)
92 return ERROR_IGNORE;
93 else if (strcmp(errors, "replace") == 0)
94 return ERROR_REPLACE;
95 else {
96 return PyCodec_LookupError(errors);
97 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000098}
99
100static int
101expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize)
102{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000103 int orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000104
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000105 orgpos = (int)((char*)buf->outbuf - PyString_AS_STRING(buf->outobj));
106 orgsize = PyString_GET_SIZE(buf->outobj);
107 if (_PyString_Resize(&buf->outobj, orgsize + (
108 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
109 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000110
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000111 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
112 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
113 + PyString_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000114
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000115 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000116}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000117#define REQUIRE_ENCODEBUFFER(buf, s) { \
118 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
119 if (expand_encodebuffer(buf, s) == -1) \
120 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000121}
122
123static int
124expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize)
125{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000126 int orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000127
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000128 orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
129 orgsize = PyUnicode_GET_SIZE(buf->outobj);
130 if (PyUnicode_Resize(&buf->outobj, orgsize + (
131 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
132 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000133
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000134 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
135 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
136 + PyUnicode_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000137
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000138 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000139}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000140#define REQUIRE_DECODEBUFFER(buf, s) { \
141 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
142 if (expand_decodebuffer(buf, s) == -1) \
143 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000144}
145
146static int
147multibytecodec_encerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000148 MultibyteCodec_State *state,
149 MultibyteEncodeBuffer *buf,
150 PyObject *errors, int e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000151{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000152 PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
153 int retstrsize, newpos;
154 const char *reason;
155 size_t esize;
156 int start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000157
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000158 if (e > 0) {
159 reason = "illegal multibyte sequence";
160 esize = e;
161 }
162 else {
163 switch (e) {
164 case MBERR_TOOSMALL:
165 REQUIRE_ENCODEBUFFER(buf, -1);
166 return 0; /* retry it */
167 case MBERR_TOOFEW:
168 reason = "incomplete multibyte sequence";
169 esize = (size_t)(buf->inbuf_end - buf->inbuf);
170 break;
171 case MBERR_INTERNAL:
172 PyErr_SetString(PyExc_RuntimeError,
173 "internal codec error");
174 return -1;
175 default:
176 PyErr_SetString(PyExc_RuntimeError,
177 "unknown runtime error");
178 return -1;
179 }
180 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000181
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000182 if (errors == ERROR_REPLACE) {
183 const Py_UNICODE replchar = '?', *inbuf = &replchar;
184 int r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000185
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000186 for (;;) {
187 size_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000188
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000189 outleft = (size_t)(buf->outbuf_end - buf->outbuf);
190 r = codec->encode(state, codec->config, &inbuf, 1,
191 &buf->outbuf, outleft, 0);
192 if (r == MBERR_TOOSMALL) {
193 REQUIRE_ENCODEBUFFER(buf, -1);
194 continue;
195 }
196 else
197 break;
198 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000199
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000200 if (r != 0) {
201 REQUIRE_ENCODEBUFFER(buf, 1);
202 *buf->outbuf++ = '?';
203 }
204 }
205 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
206 buf->inbuf += esize;
207 return 0;
208 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000209
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000210 start = (int)(buf->inbuf - buf->inbuf_top);
211 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000213 /* use cached exception object if available */
214 if (buf->excobj == NULL) {
215 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
216 buf->inbuf_top,
217 (int)(buf->inbuf_end - buf->inbuf_top),
218 start, end, reason);
219 if (buf->excobj == NULL)
220 goto errorexit;
221 }
222 else
223 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
224 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
225 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
226 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000227
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000228 if (errors == ERROR_STRICT) {
229 PyCodec_StrictErrors(buf->excobj);
230 goto errorexit;
231 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000232
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000233 argsobj = PyTuple_New(1);
234 if (argsobj == NULL)
235 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000236
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000237 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
238 Py_INCREF(buf->excobj);
239 retobj = PyObject_CallObject(errors, argsobj);
240 Py_DECREF(argsobj);
241 if (retobj == NULL)
242 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000243
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000244 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
245 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
246 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
247 PyErr_SetString(PyExc_ValueError,
248 "encoding error handler must return "
249 "(unicode, int) tuple");
250 goto errorexit;
251 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000252
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000253 {
254 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000255
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000256 retstr = multibytecodec_encode(codec, state, &uraw,
257 PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
258 MBENC_FLUSH);
259 if (retstr == NULL)
260 goto errorexit;
261 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000262
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000263 retstrsize = PyString_GET_SIZE(retstr);
264 REQUIRE_ENCODEBUFFER(buf, retstrsize);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000266 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
267 buf->outbuf += retstrsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000268
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000269 newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
270 if (newpos < 0)
271 newpos += (int)(buf->inbuf_end - buf->inbuf_top);
272 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
273 PyErr_Format(PyExc_IndexError,
274 "position %d from error handler out of bounds",
275 newpos);
276 goto errorexit;
277 }
278 buf->inbuf = buf->inbuf_top + newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000279
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000280 Py_DECREF(retobj);
281 Py_DECREF(retstr);
282 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000283
284errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000285 Py_XDECREF(retobj);
286 Py_XDECREF(retstr);
287 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000288}
289
290static int
291multibytecodec_decerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000292 MultibyteCodec_State *state,
293 MultibyteDecodeBuffer *buf,
294 PyObject *errors, int e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000295{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000296 PyObject *argsobj, *retobj = NULL, *retuni = NULL;
297 int retunisize, newpos;
298 const char *reason;
299 size_t esize;
300 int start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000301
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000302 if (e > 0) {
303 reason = "illegal multibyte sequence";
304 esize = e;
305 }
306 else {
307 switch (e) {
308 case MBERR_TOOSMALL:
309 REQUIRE_DECODEBUFFER(buf, -1);
310 return 0; /* retry it */
311 case MBERR_TOOFEW:
312 reason = "incomplete multibyte sequence";
313 esize = (size_t)(buf->inbuf_end - buf->inbuf);
314 break;
315 case MBERR_INTERNAL:
316 PyErr_SetString(PyExc_RuntimeError,
317 "internal codec error");
318 return -1;
319 default:
320 PyErr_SetString(PyExc_RuntimeError,
321 "unknown runtime error");
322 return -1;
323 }
324 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000325
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000326 if (errors == ERROR_REPLACE) {
327 REQUIRE_DECODEBUFFER(buf, 1);
328 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
329 }
330 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
331 buf->inbuf += esize;
332 return 0;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000335 start = (int)(buf->inbuf - buf->inbuf_top);
336 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000337
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000338 /* use cached exception object if available */
339 if (buf->excobj == NULL) {
340 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
341 buf->inbuf_top,
342 (int)(buf->inbuf_end - buf->inbuf_top),
343 start, end, reason);
344 if (buf->excobj == NULL)
345 goto errorexit;
346 }
347 else
348 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
349 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
350 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
351 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000352
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000353 if (errors == ERROR_STRICT) {
354 PyCodec_StrictErrors(buf->excobj);
355 goto errorexit;
356 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000357
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000358 argsobj = PyTuple_New(1);
359 if (argsobj == NULL)
360 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000361
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
363 Py_INCREF(buf->excobj);
364 retobj = PyObject_CallObject(errors, argsobj);
365 Py_DECREF(argsobj);
366 if (retobj == NULL)
367 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000368
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000369 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
370 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
371 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
372 PyErr_SetString(PyExc_ValueError,
373 "decoding error handler must return "
374 "(unicode, int) tuple");
375 goto errorexit;
376 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000377
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000378 retunisize = PyUnicode_GET_SIZE(retuni);
379 if (retunisize > 0) {
380 REQUIRE_DECODEBUFFER(buf, retunisize);
381 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
382 retunisize * Py_UNICODE_SIZE);
383 buf->outbuf += retunisize;
384 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000385
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000386 newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
387 if (newpos < 0)
388 newpos += (int)(buf->inbuf_end - buf->inbuf_top);
389 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
390 PyErr_Format(PyExc_IndexError,
391 "position %d from error handler out of bounds",
392 newpos);
393 goto errorexit;
394 }
395 buf->inbuf = buf->inbuf_top + newpos;
396 Py_DECREF(retobj);
397 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000398
399errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000400 Py_XDECREF(retobj);
401 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000402}
403
404static PyObject *
405multibytecodec_encode(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000406 MultibyteCodec_State *state,
407 const Py_UNICODE **data, size_t datalen,
408 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000409{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000410 MultibyteEncodeBuffer buf;
411 int finalsize, r = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000412
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000413 if (datalen == 0)
414 return PyString_FromString("");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000415
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000416 buf.excobj = NULL;
417 buf.inbuf = buf.inbuf_top = *data;
418 buf.inbuf_end = buf.inbuf_top + datalen;
419 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
420 if (buf.outobj == NULL)
421 goto errorexit;
422 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
423 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000424
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000425 while (buf.inbuf < buf.inbuf_end) {
426 size_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000427
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000428 /* we don't reuse inleft and outleft here.
429 * error callbacks can relocate the cursor anywhere on buffer*/
430 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
431 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
432 r = codec->encode(state, codec->config, &buf.inbuf, inleft,
433 &buf.outbuf, outleft, flags);
434 *data = buf.inbuf;
435 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
436 break;
437 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
438 goto errorexit;
439 else if (r == MBERR_TOOFEW)
440 break;
441 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000442
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000443 if (codec->encreset != NULL)
444 for (;;) {
445 size_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000446
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000447 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
448 r = codec->encreset(state, codec->config, &buf.outbuf,
449 outleft);
450 if (r == 0)
451 break;
452 else if (multibytecodec_encerror(codec, state,
453 &buf, errors, r))
454 goto errorexit;
455 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000456
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000457 finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000458
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000459 if (finalsize != PyString_GET_SIZE(buf.outobj))
460 if (_PyString_Resize(&buf.outobj, finalsize) == -1)
461 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000462
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000463 Py_XDECREF(buf.excobj);
464 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000465
466errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000467 Py_XDECREF(buf.excobj);
468 Py_XDECREF(buf.outobj);
469 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000470}
471
472static PyObject *
473MultibyteCodec_Encode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000474 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000475{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000476 MultibyteCodec_State state;
477 Py_UNICODE *data;
478 PyObject *errorcb, *r, *arg, *ucvt;
479 const char *errors = NULL;
480 int datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000482 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
483 codeckwarglist, &arg, &errors))
484 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000485
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000486 if (PyUnicode_Check(arg))
487 ucvt = NULL;
488 else {
489 arg = ucvt = PyObject_Unicode(arg);
490 if (arg == NULL)
491 return NULL;
492 else if (!PyUnicode_Check(arg)) {
493 PyErr_SetString(PyExc_TypeError,
494 "couldn't convert the object to unicode.");
495 Py_DECREF(ucvt);
496 return NULL;
497 }
498 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000499
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000500 data = PyUnicode_AS_UNICODE(arg);
501 datalen = PyUnicode_GET_SIZE(arg);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000502
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000503 errorcb = get_errorcallback(errors);
504 if (errorcb == NULL) {
505 Py_XDECREF(ucvt);
506 return NULL;
507 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000508
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000509 if (self->codec->encinit != NULL &&
510 self->codec->encinit(&state, self->codec->config) != 0)
511 goto errorexit;
512 r = multibytecodec_encode(self->codec, &state,
513 (const Py_UNICODE **)&data, datalen, errorcb,
514 MBENC_FLUSH | MBENC_RESET);
515 if (r == NULL)
516 goto errorexit;
517
518 if (errorcb > ERROR_MAX) {
519 Py_DECREF(errorcb);
520 }
521 Py_XDECREF(ucvt);
522 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000523
524errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000525 if (errorcb > ERROR_MAX) {
526 Py_DECREF(errorcb);
527 }
528 Py_XDECREF(ucvt);
529 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000530}
531
532static PyObject *
533MultibyteCodec_Decode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000534 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000535{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000536 MultibyteCodec_State state;
537 MultibyteDecodeBuffer buf;
538 PyObject *errorcb;
539 const char *data, *errors = NULL;
540 int datalen, finalsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000542 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
543 codeckwarglist, &data, &datalen, &errors))
544 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000545
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000546 errorcb = get_errorcallback(errors);
547 if (errorcb == NULL)
548 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000549
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000550 if (datalen == 0) {
551 if (errorcb > ERROR_MAX) {
552 Py_DECREF(errorcb);
553 }
554 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
555 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000556
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000557 buf.outobj = buf.excobj = NULL;
558 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
559 buf.inbuf_end = buf.inbuf_top + datalen;
560 buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
561 if (buf.outobj == NULL)
562 goto errorexit;
563 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
564 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000565
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000566 if (self->codec->decinit != NULL &&
567 self->codec->decinit(&state, self->codec->config) != 0)
568 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000569
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000570 while (buf.inbuf < buf.inbuf_end) {
571 size_t inleft, outleft;
572 int r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000573
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000574 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
575 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000576
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000577 r = self->codec->decode(&state, self->codec->config,
578 &buf.inbuf, inleft, &buf.outbuf, outleft);
579 if (r == 0)
580 break;
581 else if (multibytecodec_decerror(self->codec, &state,
582 &buf, errorcb, r))
583 goto errorexit;
584 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000585
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000586 finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000587
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000588 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
589 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
590 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000591
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000592 Py_XDECREF(buf.excobj);
593 if (errorcb > ERROR_MAX) {
594 Py_DECREF(errorcb);
595 }
596 return make_tuple(buf.outobj, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000597
598errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000599 if (errorcb > ERROR_MAX) {
600 Py_DECREF(errorcb);
601 }
602 Py_XDECREF(buf.excobj);
603 Py_XDECREF(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000604
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000605 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000606}
607
608static PyObject *
609MultibyteCodec_StreamReader(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000610 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000611{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000612 PyObject *stream;
613 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000614
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
616 streamkwarglist, &stream, &errors))
617 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000618
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000619 return mbstreamreader_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000620}
621
622static PyObject *
623MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000624 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000625{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000626 PyObject *stream;
627 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000628
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000629 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
630 streamkwarglist, &stream, &errors))
631 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000633 return mbstreamwriter_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000634}
635
636static struct PyMethodDef multibytecodec_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000637 {"encode", (PyCFunction)MultibyteCodec_Encode,
638 METH_VARARGS | METH_KEYWORDS,
639 MultibyteCodec_Encode__doc__},
640 {"decode", (PyCFunction)MultibyteCodec_Decode,
641 METH_VARARGS | METH_KEYWORDS,
642 MultibyteCodec_Decode__doc__},
643 {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
644 METH_VARARGS | METH_KEYWORDS,
645 MultibyteCodec_StreamReader__doc__},
646 {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
647 METH_VARARGS | METH_KEYWORDS,
648 MultibyteCodec_StreamWriter__doc__},
649 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000650};
651
652static void
653multibytecodec_dealloc(MultibyteCodecObject *self)
654{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000655 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000656}
657
658
659
660static PyTypeObject MultibyteCodec_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000661 PyObject_HEAD_INIT(NULL)
662 0, /* ob_size */
663 "MultibyteCodec", /* tp_name */
664 sizeof(MultibyteCodecObject), /* tp_basicsize */
665 0, /* tp_itemsize */
666 /* methods */
667 (destructor)multibytecodec_dealloc, /* tp_dealloc */
668 0, /* tp_print */
669 0, /* tp_getattr */
670 0, /* tp_setattr */
671 0, /* tp_compare */
672 0, /* tp_repr */
673 0, /* tp_as_number */
674 0, /* tp_as_sequence */
675 0, /* tp_as_mapping */
676 0, /* tp_hash */
677 0, /* tp_call */
678 0, /* tp_str */
679 PyObject_GenericGetAttr, /* tp_getattro */
680 0, /* tp_setattro */
681 0, /* tp_as_buffer */
682 Py_TPFLAGS_DEFAULT, /* tp_flags */
683 0, /* tp_doc */
684 0, /* tp_traverse */
685 0, /* tp_clear */
686 0, /* tp_richcompare */
687 0, /* tp_weaklistoffset */
688 0, /* tp_iter */
689 0, /* tp_iterext */
690 multibytecodec_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000691};
692
693static PyObject *
694mbstreamreader_iread(MultibyteStreamReaderObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000695 const char *method, int sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000696{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000697 MultibyteDecodeBuffer buf;
698 PyObject *cres;
699 int rsize, r, finalsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000700
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000701 if (sizehint == 0)
702 return PyUnicode_FromUnicode(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000703
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000704 buf.outobj = buf.excobj = NULL;
705 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000706
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000707 for (;;) {
708 if (sizehint < 0)
709 cres = PyObject_CallMethod(self->stream,
710 (char *)method, NULL);
711 else
712 cres = PyObject_CallMethod(self->stream,
713 (char *)method, "i", sizehint);
714 if (cres == NULL)
715 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000717 if (!PyString_Check(cres)) {
718 PyErr_SetString(PyExc_TypeError,
719 "stream function returned a "
720 "non-string object");
721 goto errorexit;
722 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000723
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000724 if (self->pendingsize > 0) {
725 PyObject *ctr;
726 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000727
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000728 rsize = PyString_GET_SIZE(cres) + self->pendingsize;
729 ctr = PyString_FromStringAndSize(NULL, rsize);
730 if (ctr == NULL)
731 goto errorexit;
732 ctrdata = PyString_AS_STRING(ctr);
733 memcpy(ctrdata, self->pending, self->pendingsize);
734 memcpy(ctrdata + self->pendingsize,
735 PyString_AS_STRING(cres),
736 PyString_GET_SIZE(cres));
737 Py_DECREF(cres);
738 cres = ctr;
739 self->pendingsize = 0;
740 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000741
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000742 rsize = PyString_GET_SIZE(cres);
743 buf.inbuf = buf.inbuf_top =
744 (unsigned char *)PyString_AS_STRING(cres);
745 buf.inbuf_end = buf.inbuf_top + rsize;
746 if (buf.outobj == NULL) {
747 buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
748 if (buf.outobj == NULL)
749 goto errorexit;
750 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
751 buf.outbuf_end = buf.outbuf +
752 PyUnicode_GET_SIZE(buf.outobj);
753 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000754
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000755 r = 0;
756 if (rsize > 0)
757 while (buf.inbuf < buf.inbuf_end) {
758 size_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000759
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000760 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
761 outleft = (size_t)(buf.outbuf_end -buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000762
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000763 r = self->codec->decode(&self->state,
764 self->codec->config,
765 &buf.inbuf, inleft,
766 &buf.outbuf, outleft);
767 if (r == 0 || r == MBERR_TOOFEW)
768 break;
769 else if (multibytecodec_decerror(self->codec,
770 &self->state, &buf,
771 self->errors, r))
772 goto errorexit;
773 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000774
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000775 if (rsize == 0 || sizehint < 0) { /* end of file */
776 if (buf.inbuf < buf.inbuf_end &&
777 multibytecodec_decerror(self->codec, &self->state,
778 &buf, self->errors, MBERR_TOOFEW))
779 goto errorexit;
780 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000781
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000782 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
783 size_t npendings;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000784
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000785 /* we can't assume that pendingsize is still 0 here.
786 * because this function can be called recursively
787 * from error callback */
788 npendings = (size_t)(buf.inbuf_end - buf.inbuf);
789 if (npendings + self->pendingsize > MAXDECPENDING) {
790 PyErr_SetString(PyExc_RuntimeError,
791 "pending buffer overflow");
792 goto errorexit;
793 }
794 memcpy(self->pending + self->pendingsize, buf.inbuf,
795 npendings);
796 self->pendingsize += npendings;
797 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000798
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000799 finalsize = (int)(buf.outbuf -
800 PyUnicode_AS_UNICODE(buf.outobj));
801 Py_DECREF(cres);
802 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000803
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000804 if (sizehint < 0 || finalsize != 0 || rsize == 0)
805 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000806
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000807 sizehint = 1; /* read 1 more byte and retry */
808 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000809
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000810 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
811 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
812 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000813
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000814 Py_XDECREF(cres);
815 Py_XDECREF(buf.excobj);
816 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000817
818errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000819 Py_XDECREF(cres);
820 Py_XDECREF(buf.excobj);
821 Py_XDECREF(buf.outobj);
822 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000823}
824
825static PyObject *
826mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
827{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000828 PyObject *sizeobj = NULL;
829 long size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000830
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000831 if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
832 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000833
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000834 if (sizeobj == Py_None || sizeobj == NULL)
835 size = -1;
836 else if (PyInt_Check(sizeobj))
837 size = PyInt_AsLong(sizeobj);
838 else {
839 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
840 return NULL;
841 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000842
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000843 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000844}
845
846static PyObject *
847mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
848{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000849 PyObject *sizeobj = NULL;
850 long size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000851
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000852 if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
853 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000854
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000855 if (sizeobj == Py_None || sizeobj == NULL)
856 size = -1;
857 else if (PyInt_Check(sizeobj))
858 size = PyInt_AsLong(sizeobj);
859 else {
860 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
861 return NULL;
862 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000863
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000864 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000865}
866
867static PyObject *
868mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
869{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000870 PyObject *sizehintobj = NULL, *r, *sr;
871 long sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000872
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000873 if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
874 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000875
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000876 if (sizehintobj == Py_None || sizehintobj == NULL)
877 sizehint = -1;
878 else if (PyInt_Check(sizehintobj))
879 sizehint = PyInt_AsLong(sizehintobj);
880 else {
881 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
882 return NULL;
883 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000884
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000885 r = mbstreamreader_iread(self, "read", sizehint);
886 if (r == NULL)
887 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000888
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000889 sr = PyUnicode_Splitlines(r, 1);
890 Py_DECREF(r);
891 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000892}
893
894static PyObject *
895mbstreamreader_reset(MultibyteStreamReaderObject *self)
896{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000897 if (self->codec->decreset != NULL &&
898 self->codec->decreset(&self->state, self->codec->config) != 0)
899 return NULL;
900 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000901
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000902 Py_INCREF(Py_None);
903 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000904}
905
906static struct PyMethodDef mbstreamreader_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000907 {"read", (PyCFunction)mbstreamreader_read,
908 METH_VARARGS, NULL},
909 {"readline", (PyCFunction)mbstreamreader_readline,
910 METH_VARARGS, NULL},
911 {"readlines", (PyCFunction)mbstreamreader_readlines,
912 METH_VARARGS, NULL},
913 {"reset", (PyCFunction)mbstreamreader_reset,
914 METH_NOARGS, NULL},
915 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000916};
917
918static void
919mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
920{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000921 if (self->errors > ERROR_MAX) {
922 Py_DECREF(self->errors);
923 }
924 Py_DECREF(self->stream);
925 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000926}
927
928
929
930static PyTypeObject MultibyteStreamReader_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000931 PyObject_HEAD_INIT(NULL)
932 0, /* ob_size */
933 "MultibyteStreamReader", /* tp_name */
934 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
935 0, /* tp_itemsize */
936 /* methods */
937 (destructor)mbstreamreader_dealloc, /* tp_dealloc */
938 0, /* tp_print */
939 0, /* tp_getattr */
940 0, /* tp_setattr */
941 0, /* tp_compare */
942 0, /* tp_repr */
943 0, /* tp_as_number */
944 0, /* tp_as_sequence */
945 0, /* tp_as_mapping */
946 0, /* tp_hash */
947 0, /* tp_call */
948 0, /* tp_str */
949 PyObject_GenericGetAttr, /* tp_getattro */
950 0, /* tp_setattro */
951 0, /* tp_as_buffer */
952 Py_TPFLAGS_DEFAULT, /* tp_flags */
953 0, /* tp_doc */
954 0, /* tp_traverse */
955 0, /* tp_clear */
956 0, /* tp_richcompare */
957 0, /* tp_weaklistoffset */
958 0, /* tp_iter */
959 0, /* tp_iterext */
960 mbstreamreader_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000961};
962
963static int
964mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000965 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000966{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000967 PyObject *wr, *ucvt, *r = NULL;
968 Py_UNICODE *inbuf, *inbuf_end, *data, *inbuf_tmp = NULL;
969 int datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000970
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000971 if (PyUnicode_Check(unistr))
972 ucvt = NULL;
973 else {
974 unistr = ucvt = PyObject_Unicode(unistr);
975 if (unistr == NULL)
976 return -1;
977 else if (!PyUnicode_Check(unistr)) {
978 PyErr_SetString(PyExc_TypeError,
979 "couldn't convert the object to unicode.");
980 Py_DECREF(ucvt);
981 return -1;
982 }
983 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000984
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000985 data = PyUnicode_AS_UNICODE(unistr);
986 datalen = PyUnicode_GET_SIZE(unistr);
987 if (datalen == 0) {
988 Py_XDECREF(ucvt);
989 return 0;
990 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000991
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000992 if (self->pendingsize > 0) {
993 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize);
994 if (inbuf_tmp == NULL)
995 goto errorexit;
996 memcpy(inbuf_tmp, self->pending,
997 Py_UNICODE_SIZE * self->pendingsize);
998 memcpy(inbuf_tmp + self->pendingsize,
999 PyUnicode_AS_UNICODE(unistr),
1000 Py_UNICODE_SIZE * datalen);
1001 datalen += self->pendingsize;
1002 self->pendingsize = 0;
1003 inbuf = inbuf_tmp;
1004 }
1005 else
1006 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001007
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001008 inbuf_end = inbuf + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001009
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001010 r = multibytecodec_encode(self->codec, &self->state,
1011 (const Py_UNICODE **)&inbuf, datalen, self->errors, 0);
1012 if (r == NULL)
1013 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001014
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001015 if (inbuf < inbuf_end) {
1016 self->pendingsize = (int)(inbuf_end - inbuf);
1017 if (self->pendingsize > MAXENCPENDING) {
1018 self->pendingsize = 0;
1019 PyErr_SetString(PyExc_RuntimeError,
1020 "pending buffer overflow");
1021 goto errorexit;
1022 }
1023 memcpy(self->pending, inbuf,
1024 self->pendingsize * Py_UNICODE_SIZE);
1025 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001026
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001027 wr = PyObject_CallMethod(self->stream, "write", "O", r);
1028 if (wr == NULL)
1029 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001030
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001031 if (inbuf_tmp != NULL)
1032 PyMem_Del(inbuf_tmp);
1033 Py_DECREF(r);
1034 Py_DECREF(wr);
1035 Py_XDECREF(ucvt);
1036 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001037
1038errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001039 if (inbuf_tmp != NULL)
1040 PyMem_Del(inbuf_tmp);
1041 Py_XDECREF(r);
1042 Py_XDECREF(ucvt);
1043 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001044}
1045
1046static PyObject *
1047mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args)
1048{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001049 PyObject *strobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001050
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001051 if (!PyArg_ParseTuple(args, "O:write", &strobj))
1052 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001053
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001054 if (mbstreamwriter_iwrite(self, strobj))
1055 return NULL;
1056 else {
1057 Py_INCREF(Py_None);
1058 return Py_None;
1059 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001060}
1061
1062static PyObject *
1063mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args)
1064{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001065 PyObject *lines, *strobj;
1066 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001067
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001068 if (!PyArg_ParseTuple(args, "O:writelines", &lines))
1069 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001070
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001071 if (!PySequence_Check(lines)) {
1072 PyErr_SetString(PyExc_TypeError,
1073 "arg must be a sequence object");
1074 return NULL;
1075 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001076
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001077 for (i = 0; i < PySequence_Length(lines); i++) {
1078 /* length can be changed even within this loop */
1079 strobj = PySequence_GetItem(lines, i);
1080 if (strobj == NULL)
1081 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001082
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001083 r = mbstreamwriter_iwrite(self, strobj);
1084 Py_DECREF(strobj);
1085 if (r == -1)
1086 return NULL;
1087 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001088
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001089 Py_INCREF(Py_None);
1090 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001091}
1092
1093static PyObject *
1094mbstreamwriter_reset(MultibyteStreamWriterObject *self)
1095{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001096 const Py_UNICODE *pending;
1097 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001098
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001099 pending = self->pending;
1100 pwrt = multibytecodec_encode(self->codec, &self->state,
1101 &pending, self->pendingsize, self->errors,
1102 MBENC_FLUSH | MBENC_RESET);
1103 /* some pending buffer can be truncated when UnicodeEncodeError is
1104 * raised on 'strict' mode. but, 'reset' method is designed to
1105 * reset the pending buffer or states so failed string sequence
1106 * ought to be missed */
1107 self->pendingsize = 0;
1108 if (pwrt == NULL)
1109 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001110
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001111 if (PyString_Size(pwrt) > 0) {
1112 PyObject *wr;
1113 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
1114 if (wr == NULL) {
1115 Py_DECREF(pwrt);
1116 return NULL;
1117 }
1118 }
1119 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001120
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001121 Py_INCREF(Py_None);
1122 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001123}
1124
1125static void
1126mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1127{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001128 if (self->errors > ERROR_MAX) {
1129 Py_DECREF(self->errors);
1130 }
1131 Py_DECREF(self->stream);
1132 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001133}
1134
1135static struct PyMethodDef mbstreamwriter_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001136 {"write", (PyCFunction)mbstreamwriter_write,
1137 METH_VARARGS, NULL},
1138 {"writelines", (PyCFunction)mbstreamwriter_writelines,
1139 METH_VARARGS, NULL},
1140 {"reset", (PyCFunction)mbstreamwriter_reset,
1141 METH_NOARGS, NULL},
1142 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001143};
1144
1145
1146
1147static PyTypeObject MultibyteStreamWriter_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001148 PyObject_HEAD_INIT(NULL)
1149 0, /* ob_size */
1150 "MultibyteStreamWriter", /* tp_name */
1151 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1152 0, /* tp_itemsize */
1153 /* methods */
1154 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1155 0, /* tp_print */
1156 0, /* tp_getattr */
1157 0, /* tp_setattr */
1158 0, /* tp_compare */
1159 0, /* tp_repr */
1160 0, /* tp_as_number */
1161 0, /* tp_as_sequence */
1162 0, /* tp_as_mapping */
1163 0, /* tp_hash */
1164 0, /* tp_call */
1165 0, /* tp_str */
1166 PyObject_GenericGetAttr, /* tp_getattro */
1167 0, /* tp_setattro */
1168 0, /* tp_as_buffer */
1169 Py_TPFLAGS_DEFAULT, /* tp_flags */
1170 0, /* tp_doc */
1171 0, /* tp_traverse */
1172 0, /* tp_clear */
1173 0, /* tp_richcompare */
1174 0, /* tp_weaklistoffset */
1175 0, /* tp_iter */
1176 0, /* tp_iterext */
1177 mbstreamwriter_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001178};
1179
1180static PyObject *
1181__create_codec(PyObject *ignore, PyObject *arg)
1182{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001183 MultibyteCodecObject *self;
1184 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001185
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001186 if (!PyCObject_Check(arg)) {
1187 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1188 return NULL;
1189 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001190
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001191 codec = PyCObject_AsVoidPtr(arg);
1192 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1193 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001194
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001195 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1196 if (self == NULL)
1197 return NULL;
1198 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001199
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001200 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001201}
1202
1203static PyObject *
1204mbstreamreader_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001205 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001206{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001207 MultibyteStreamReaderObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001208
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001209 self = PyObject_New(MultibyteStreamReaderObject,
1210 &MultibyteStreamReader_Type);
1211 if (self == NULL)
1212 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001213
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001214 self->codec = codec;
1215 self->stream = stream;
1216 Py_INCREF(stream);
1217 self->pendingsize = 0;
1218 self->errors = get_errorcallback(errors);
1219 if (self->errors == NULL)
1220 goto errorexit;
1221 if (self->codec->decinit != NULL &&
1222 self->codec->decinit(&self->state, self->codec->config) != 0)
1223 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001224
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001225 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001226
1227errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001228 Py_XDECREF(self);
1229 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001230}
1231
1232static PyObject *
1233mbstreamwriter_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001234 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001235{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001236 MultibyteStreamWriterObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001237
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001238 self = PyObject_New(MultibyteStreamWriterObject,
1239 &MultibyteStreamWriter_Type);
1240 if (self == NULL)
1241 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001242
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001243 self->codec = codec;
1244 self->stream = stream;
1245 Py_INCREF(stream);
1246 self->pendingsize = 0;
1247 self->errors = get_errorcallback(errors);
1248 if (self->errors == NULL)
1249 goto errorexit;
1250 if (self->codec->encinit != NULL &&
1251 self->codec->encinit(&self->state, self->codec->config) != 0)
1252 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001253
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001254 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001255
1256errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001257 Py_XDECREF(self);
1258 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001259}
1260
1261static struct PyMethodDef __methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001262 {"__create_codec", (PyCFunction)__create_codec, METH_O},
1263 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001264};
1265
1266void
1267init_multibytecodec(void)
1268{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001269 Py_InitModule("_multibytecodec", __methods);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001270
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001271 if (PyErr_Occurred())
1272 Py_FatalError("can't initialize the _multibytecodec module");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001273}