blob: 7d2d15ebfb78e04e8be8d1f0ad0bfcf9e3e99216 [file] [log] [blame]
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001/*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
Hye-Shik Changf5a149a2004-08-19 17:49:56 +00005 * $CJKCodecs: multibytecodec.c,v 1.13 2004/08/19 16:57:19 perky Exp $
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00006 */
7
8#include "Python.h"
9#include "multibytecodec.h"
10
11
12typedef struct {
13 const Py_UNICODE *inbuf, *inbuf_top, *inbuf_end;
14 unsigned char *outbuf, *outbuf_end;
15 PyObject *excobj, *outobj;
16} MultibyteEncodeBuffer;
17
18typedef struct {
19 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
20 Py_UNICODE *outbuf, *outbuf_end;
21 PyObject *excobj, *outobj;
22} MultibyteDecodeBuffer;
23
24PyDoc_STRVAR(MultibyteCodec_Encode__doc__,
25"I.encode(unicode[, errors]) -> (string, length consumed)\n\
26\n\
27Return an encoded string version of `unicode'. errors may be given to\n\
28set a different error handling scheme. Default is 'strict' meaning that\n\
29encoding errors raise a UnicodeEncodeError. Other possible values are\n\
30'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
31registered with codecs.register_error that can handle UnicodeEncodeErrors.");
32
33PyDoc_STRVAR(MultibyteCodec_Decode__doc__,
34"I.decode(string[, errors]) -> (unicodeobject, length consumed)\n\
35\n\
36Decodes `string' using I, an MultibyteCodec instance. errors may be given\n\
37to set a different error handling scheme. Default is 'strict' meaning\n\
38that encoding errors raise a UnicodeDecodeError. Other possible values\n\
39are 'ignore' and 'replace' as well as any other name registerd with\n\
40codecs.register_error that is able to handle UnicodeDecodeErrors.");
41
42PyDoc_STRVAR(MultibyteCodec_StreamReader__doc__,
43"I.StreamReader(stream[, errors]) -> StreamReader instance");
44
45PyDoc_STRVAR(MultibyteCodec_StreamWriter__doc__,
46"I.StreamWriter(stream[, errors]) -> StreamWriter instance");
47
48static char *codeckwarglist[] = {"input", "errors", NULL};
49static char *streamkwarglist[] = {"stream", "errors", NULL};
50
51static PyObject *multibytecodec_encode(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000052 MultibyteCodec_State *, const Py_UNICODE **, size_t,
53 PyObject *, int);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000054static PyObject *mbstreamreader_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000055 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000056static PyObject *mbstreamwriter_create(MultibyteCodec *,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000057 PyObject *, const char *);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000058
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000059#define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000060
61static PyObject *
62make_tuple(PyObject *unicode, int len)
63{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000064 PyObject *v, *w;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000065
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000066 if (unicode == NULL)
67 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000068
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000069 v = PyTuple_New(2);
70 if (v == NULL) {
71 Py_DECREF(unicode);
72 return NULL;
73 }
74 PyTuple_SET_ITEM(v, 0, unicode);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000075
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000076 w = PyInt_FromLong(len);
77 if (w == NULL) {
78 Py_DECREF(v);
79 return NULL;
80 }
81 PyTuple_SET_ITEM(v, 1, w);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000082
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000083 return v;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000084}
85
86static PyObject *
87get_errorcallback(const char *errors)
88{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +000089 if (errors == NULL || strcmp(errors, "strict") == 0)
90 return ERROR_STRICT;
91 else if (strcmp(errors, "ignore") == 0)
92 return ERROR_IGNORE;
93 else if (strcmp(errors, "replace") == 0)
94 return ERROR_REPLACE;
95 else {
96 return PyCodec_LookupError(errors);
97 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +000098}
99
100static int
101expand_encodebuffer(MultibyteEncodeBuffer *buf, int esize)
102{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000103 int orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000104
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000105 orgpos = (int)((char*)buf->outbuf - PyString_AS_STRING(buf->outobj));
106 orgsize = PyString_GET_SIZE(buf->outobj);
107 if (_PyString_Resize(&buf->outobj, orgsize + (
108 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
109 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000110
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000111 buf->outbuf = (unsigned char *)PyString_AS_STRING(buf->outobj) +orgpos;
112 buf->outbuf_end = (unsigned char *)PyString_AS_STRING(buf->outobj)
113 + PyString_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000114
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000115 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000116}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000117#define REQUIRE_ENCODEBUFFER(buf, s) { \
118 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
119 if (expand_encodebuffer(buf, s) == -1) \
120 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000121}
122
123static int
124expand_decodebuffer(MultibyteDecodeBuffer *buf, int esize)
125{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000126 int orgpos, orgsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000127
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000128 orgpos = (int)(buf->outbuf - PyUnicode_AS_UNICODE(buf->outobj));
129 orgsize = PyUnicode_GET_SIZE(buf->outobj);
130 if (PyUnicode_Resize(&buf->outobj, orgsize + (
131 esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize)) == -1)
132 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000133
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000134 buf->outbuf = PyUnicode_AS_UNICODE(buf->outobj) + orgpos;
135 buf->outbuf_end = PyUnicode_AS_UNICODE(buf->outobj)
136 + PyUnicode_GET_SIZE(buf->outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000137
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000138 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000139}
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000140#define REQUIRE_DECODEBUFFER(buf, s) { \
141 if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \
142 if (expand_decodebuffer(buf, s) == -1) \
143 goto errorexit; \
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000144}
145
146static int
147multibytecodec_encerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000148 MultibyteCodec_State *state,
149 MultibyteEncodeBuffer *buf,
150 PyObject *errors, int e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000151{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000152 PyObject *retobj = NULL, *retstr = NULL, *argsobj, *tobj;
153 int retstrsize, newpos;
154 const char *reason;
155 size_t esize;
156 int start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000157
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000158 if (e > 0) {
159 reason = "illegal multibyte sequence";
160 esize = e;
161 }
162 else {
163 switch (e) {
164 case MBERR_TOOSMALL:
165 REQUIRE_ENCODEBUFFER(buf, -1);
166 return 0; /* retry it */
167 case MBERR_TOOFEW:
168 reason = "incomplete multibyte sequence";
169 esize = (size_t)(buf->inbuf_end - buf->inbuf);
170 break;
171 case MBERR_INTERNAL:
172 PyErr_SetString(PyExc_RuntimeError,
173 "internal codec error");
174 return -1;
175 default:
176 PyErr_SetString(PyExc_RuntimeError,
177 "unknown runtime error");
178 return -1;
179 }
180 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000181
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000182 if (errors == ERROR_REPLACE) {
183 const Py_UNICODE replchar = '?', *inbuf = &replchar;
184 int r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000185
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000186 for (;;) {
187 size_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000188
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000189 outleft = (size_t)(buf->outbuf_end - buf->outbuf);
190 r = codec->encode(state, codec->config, &inbuf, 1,
191 &buf->outbuf, outleft, 0);
192 if (r == MBERR_TOOSMALL) {
193 REQUIRE_ENCODEBUFFER(buf, -1);
194 continue;
195 }
196 else
197 break;
198 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000199
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000200 if (r != 0) {
201 REQUIRE_ENCODEBUFFER(buf, 1);
202 *buf->outbuf++ = '?';
203 }
204 }
205 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
206 buf->inbuf += esize;
207 return 0;
208 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000209
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000210 start = (int)(buf->inbuf - buf->inbuf_top);
211 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000212
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000213 /* use cached exception object if available */
214 if (buf->excobj == NULL) {
215 buf->excobj = PyUnicodeEncodeError_Create(codec->encoding,
216 buf->inbuf_top,
217 (int)(buf->inbuf_end - buf->inbuf_top),
218 start, end, reason);
219 if (buf->excobj == NULL)
220 goto errorexit;
221 }
222 else
223 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
224 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
225 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
226 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000227
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000228 if (errors == ERROR_STRICT) {
229 PyCodec_StrictErrors(buf->excobj);
230 goto errorexit;
231 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000232
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000233 argsobj = PyTuple_New(1);
234 if (argsobj == NULL)
235 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000236
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000237 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
238 Py_INCREF(buf->excobj);
239 retobj = PyObject_CallObject(errors, argsobj);
240 Py_DECREF(argsobj);
241 if (retobj == NULL)
242 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000243
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000244 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
245 !PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) ||
246 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
247 PyErr_SetString(PyExc_ValueError,
248 "encoding error handler must return "
249 "(unicode, int) tuple");
250 goto errorexit;
251 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000252
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000253 {
254 const Py_UNICODE *uraw = PyUnicode_AS_UNICODE(tobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000255
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000256 retstr = multibytecodec_encode(codec, state, &uraw,
257 PyUnicode_GET_SIZE(tobj), ERROR_STRICT,
258 MBENC_FLUSH);
259 if (retstr == NULL)
260 goto errorexit;
261 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000262
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000263 retstrsize = PyString_GET_SIZE(retstr);
264 REQUIRE_ENCODEBUFFER(buf, retstrsize);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000265
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000266 memcpy(buf->outbuf, PyString_AS_STRING(retstr), retstrsize);
267 buf->outbuf += retstrsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000268
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000269 newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
270 if (newpos < 0)
271 newpos += (int)(buf->inbuf_end - buf->inbuf_top);
272 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
273 PyErr_Format(PyExc_IndexError,
274 "position %d from error handler out of bounds",
275 newpos);
276 goto errorexit;
277 }
278 buf->inbuf = buf->inbuf_top + newpos;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000279
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000280 Py_DECREF(retobj);
281 Py_DECREF(retstr);
282 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000283
284errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000285 Py_XDECREF(retobj);
286 Py_XDECREF(retstr);
287 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000288}
289
290static int
291multibytecodec_decerror(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000292 MultibyteCodec_State *state,
293 MultibyteDecodeBuffer *buf,
294 PyObject *errors, int e)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000295{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000296 PyObject *argsobj, *retobj = NULL, *retuni = NULL;
297 int retunisize, newpos;
298 const char *reason;
299 size_t esize;
300 int start, end;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000301
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000302 if (e > 0) {
303 reason = "illegal multibyte sequence";
304 esize = e;
305 }
306 else {
307 switch (e) {
308 case MBERR_TOOSMALL:
309 REQUIRE_DECODEBUFFER(buf, -1);
310 return 0; /* retry it */
311 case MBERR_TOOFEW:
312 reason = "incomplete multibyte sequence";
313 esize = (size_t)(buf->inbuf_end - buf->inbuf);
314 break;
315 case MBERR_INTERNAL:
316 PyErr_SetString(PyExc_RuntimeError,
317 "internal codec error");
318 return -1;
319 default:
320 PyErr_SetString(PyExc_RuntimeError,
321 "unknown runtime error");
322 return -1;
323 }
324 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000325
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000326 if (errors == ERROR_REPLACE) {
327 REQUIRE_DECODEBUFFER(buf, 1);
328 *buf->outbuf++ = Py_UNICODE_REPLACEMENT_CHARACTER;
329 }
330 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
331 buf->inbuf += esize;
332 return 0;
333 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000334
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000335 start = (int)(buf->inbuf - buf->inbuf_top);
336 end = start + esize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000337
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000338 /* use cached exception object if available */
339 if (buf->excobj == NULL) {
340 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
Hye-Shik Changf5a149a2004-08-19 17:49:56 +0000341 (const char *)buf->inbuf_top,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000342 (int)(buf->inbuf_end - buf->inbuf_top),
343 start, end, reason);
344 if (buf->excobj == NULL)
345 goto errorexit;
346 }
347 else
348 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
349 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
350 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
351 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000352
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000353 if (errors == ERROR_STRICT) {
354 PyCodec_StrictErrors(buf->excobj);
355 goto errorexit;
356 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000357
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000358 argsobj = PyTuple_New(1);
359 if (argsobj == NULL)
360 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000361
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000362 PyTuple_SET_ITEM(argsobj, 0, buf->excobj);
363 Py_INCREF(buf->excobj);
364 retobj = PyObject_CallObject(errors, argsobj);
365 Py_DECREF(argsobj);
366 if (retobj == NULL)
367 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000368
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000369 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
370 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
371 !PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
372 PyErr_SetString(PyExc_ValueError,
373 "decoding error handler must return "
374 "(unicode, int) tuple");
375 goto errorexit;
376 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000377
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000378 retunisize = PyUnicode_GET_SIZE(retuni);
379 if (retunisize > 0) {
380 REQUIRE_DECODEBUFFER(buf, retunisize);
381 memcpy((char *)buf->outbuf, PyUnicode_AS_DATA(retuni),
382 retunisize * Py_UNICODE_SIZE);
383 buf->outbuf += retunisize;
384 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000385
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000386 newpos = (int)PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
387 if (newpos < 0)
388 newpos += (int)(buf->inbuf_end - buf->inbuf_top);
389 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
390 PyErr_Format(PyExc_IndexError,
391 "position %d from error handler out of bounds",
392 newpos);
393 goto errorexit;
394 }
395 buf->inbuf = buf->inbuf_top + newpos;
396 Py_DECREF(retobj);
397 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000398
399errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000400 Py_XDECREF(retobj);
401 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000402}
403
404static PyObject *
405multibytecodec_encode(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000406 MultibyteCodec_State *state,
407 const Py_UNICODE **data, size_t datalen,
408 PyObject *errors, int flags)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000409{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000410 MultibyteEncodeBuffer buf;
411 int finalsize, r = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000412
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000413 if (datalen == 0)
414 return PyString_FromString("");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000415
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000416 buf.excobj = NULL;
417 buf.inbuf = buf.inbuf_top = *data;
418 buf.inbuf_end = buf.inbuf_top + datalen;
419 buf.outobj = PyString_FromStringAndSize(NULL, datalen * 2 + 16);
420 if (buf.outobj == NULL)
421 goto errorexit;
422 buf.outbuf = (unsigned char *)PyString_AS_STRING(buf.outobj);
423 buf.outbuf_end = buf.outbuf + PyString_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000424
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000425 while (buf.inbuf < buf.inbuf_end) {
426 size_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000427
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000428 /* we don't reuse inleft and outleft here.
429 * error callbacks can relocate the cursor anywhere on buffer*/
430 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
431 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
432 r = codec->encode(state, codec->config, &buf.inbuf, inleft,
433 &buf.outbuf, outleft, flags);
434 *data = buf.inbuf;
435 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
436 break;
437 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
438 goto errorexit;
439 else if (r == MBERR_TOOFEW)
440 break;
441 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000442
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000443 if (codec->encreset != NULL)
444 for (;;) {
445 size_t outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000446
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000447 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
448 r = codec->encreset(state, codec->config, &buf.outbuf,
449 outleft);
450 if (r == 0)
451 break;
452 else if (multibytecodec_encerror(codec, state,
453 &buf, errors, r))
454 goto errorexit;
455 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000456
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000457 finalsize = (int)((char*)buf.outbuf - PyString_AS_STRING(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000458
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000459 if (finalsize != PyString_GET_SIZE(buf.outobj))
460 if (_PyString_Resize(&buf.outobj, finalsize) == -1)
461 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000462
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000463 Py_XDECREF(buf.excobj);
464 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000465
466errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000467 Py_XDECREF(buf.excobj);
468 Py_XDECREF(buf.outobj);
469 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000470}
471
472static PyObject *
473MultibyteCodec_Encode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000474 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000475{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000476 MultibyteCodec_State state;
477 Py_UNICODE *data;
478 PyObject *errorcb, *r, *arg, *ucvt;
479 const char *errors = NULL;
480 int datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000481
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000482 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|z:encode",
483 codeckwarglist, &arg, &errors))
484 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000485
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000486 if (PyUnicode_Check(arg))
487 ucvt = NULL;
488 else {
489 arg = ucvt = PyObject_Unicode(arg);
490 if (arg == NULL)
491 return NULL;
492 else if (!PyUnicode_Check(arg)) {
493 PyErr_SetString(PyExc_TypeError,
494 "couldn't convert the object to unicode.");
495 Py_DECREF(ucvt);
496 return NULL;
497 }
498 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000499
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000500 data = PyUnicode_AS_UNICODE(arg);
501 datalen = PyUnicode_GET_SIZE(arg);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000502
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000503 errorcb = get_errorcallback(errors);
504 if (errorcb == NULL) {
505 Py_XDECREF(ucvt);
506 return NULL;
507 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000508
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000509 if (self->codec->encinit != NULL &&
510 self->codec->encinit(&state, self->codec->config) != 0)
511 goto errorexit;
512 r = multibytecodec_encode(self->codec, &state,
513 (const Py_UNICODE **)&data, datalen, errorcb,
514 MBENC_FLUSH | MBENC_RESET);
515 if (r == NULL)
516 goto errorexit;
517
518 if (errorcb > ERROR_MAX) {
519 Py_DECREF(errorcb);
520 }
521 Py_XDECREF(ucvt);
522 return make_tuple(r, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000523
524errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000525 if (errorcb > ERROR_MAX) {
526 Py_DECREF(errorcb);
527 }
528 Py_XDECREF(ucvt);
529 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000530}
531
532static PyObject *
533MultibyteCodec_Decode(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000534 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000535{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000536 MultibyteCodec_State state;
537 MultibyteDecodeBuffer buf;
538 PyObject *errorcb;
539 const char *data, *errors = NULL;
540 int datalen, finalsize;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000541
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000542 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|z:decode",
543 codeckwarglist, &data, &datalen, &errors))
544 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000545
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000546 errorcb = get_errorcallback(errors);
547 if (errorcb == NULL)
548 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000549
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000550 if (datalen == 0) {
551 if (errorcb > ERROR_MAX) {
552 Py_DECREF(errorcb);
553 }
554 return make_tuple(PyUnicode_FromUnicode(NULL, 0), 0);
555 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000556
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000557 buf.outobj = buf.excobj = NULL;
558 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
559 buf.inbuf_end = buf.inbuf_top + datalen;
560 buf.outobj = PyUnicode_FromUnicode(NULL, datalen);
561 if (buf.outobj == NULL)
562 goto errorexit;
563 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
564 buf.outbuf_end = buf.outbuf + PyUnicode_GET_SIZE(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000565
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000566 if (self->codec->decinit != NULL &&
567 self->codec->decinit(&state, self->codec->config) != 0)
568 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000569
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000570 while (buf.inbuf < buf.inbuf_end) {
571 size_t inleft, outleft;
572 int r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000573
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000574 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
575 outleft = (size_t)(buf.outbuf_end - buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000576
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000577 r = self->codec->decode(&state, self->codec->config,
578 &buf.inbuf, inleft, &buf.outbuf, outleft);
579 if (r == 0)
580 break;
581 else if (multibytecodec_decerror(self->codec, &state,
582 &buf, errorcb, r))
583 goto errorexit;
584 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000585
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000586 finalsize = (int)(buf.outbuf - PyUnicode_AS_UNICODE(buf.outobj));
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000587
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000588 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
589 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
590 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000591
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000592 Py_XDECREF(buf.excobj);
593 if (errorcb > ERROR_MAX) {
594 Py_DECREF(errorcb);
595 }
596 return make_tuple(buf.outobj, datalen);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000597
598errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000599 if (errorcb > ERROR_MAX) {
600 Py_DECREF(errorcb);
601 }
602 Py_XDECREF(buf.excobj);
603 Py_XDECREF(buf.outobj);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000604
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000605 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000606}
607
608static PyObject *
609MultibyteCodec_StreamReader(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000610 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000611{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000612 PyObject *stream;
613 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000614
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamReader",
616 streamkwarglist, &stream, &errors))
617 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000618
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000619 return mbstreamreader_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000620}
621
622static PyObject *
623MultibyteCodec_StreamWriter(MultibyteCodecObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000624 PyObject *args, PyObject *kwargs)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000625{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000626 PyObject *stream;
627 char *errors = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000628
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000629 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s:StreamWriter",
630 streamkwarglist, &stream, &errors))
631 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000632
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000633 return mbstreamwriter_create(self->codec, stream, errors);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000634}
635
636static struct PyMethodDef multibytecodec_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000637 {"encode", (PyCFunction)MultibyteCodec_Encode,
638 METH_VARARGS | METH_KEYWORDS,
639 MultibyteCodec_Encode__doc__},
640 {"decode", (PyCFunction)MultibyteCodec_Decode,
641 METH_VARARGS | METH_KEYWORDS,
642 MultibyteCodec_Decode__doc__},
643 {"StreamReader",(PyCFunction)MultibyteCodec_StreamReader,
644 METH_VARARGS | METH_KEYWORDS,
645 MultibyteCodec_StreamReader__doc__},
646 {"StreamWriter",(PyCFunction)MultibyteCodec_StreamWriter,
647 METH_VARARGS | METH_KEYWORDS,
648 MultibyteCodec_StreamWriter__doc__},
649 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000650};
651
652static void
653multibytecodec_dealloc(MultibyteCodecObject *self)
654{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000655 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000656}
657
658
659
660static PyTypeObject MultibyteCodec_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000661 PyObject_HEAD_INIT(NULL)
662 0, /* ob_size */
663 "MultibyteCodec", /* tp_name */
664 sizeof(MultibyteCodecObject), /* tp_basicsize */
665 0, /* tp_itemsize */
666 /* methods */
667 (destructor)multibytecodec_dealloc, /* tp_dealloc */
668 0, /* tp_print */
669 0, /* tp_getattr */
670 0, /* tp_setattr */
671 0, /* tp_compare */
672 0, /* tp_repr */
673 0, /* tp_as_number */
674 0, /* tp_as_sequence */
675 0, /* tp_as_mapping */
676 0, /* tp_hash */
677 0, /* tp_call */
678 0, /* tp_str */
679 PyObject_GenericGetAttr, /* tp_getattro */
680 0, /* tp_setattro */
681 0, /* tp_as_buffer */
682 Py_TPFLAGS_DEFAULT, /* tp_flags */
683 0, /* tp_doc */
684 0, /* tp_traverse */
685 0, /* tp_clear */
686 0, /* tp_richcompare */
687 0, /* tp_weaklistoffset */
688 0, /* tp_iter */
689 0, /* tp_iterext */
690 multibytecodec_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000691};
692
693static PyObject *
694mbstreamreader_iread(MultibyteStreamReaderObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000695 const char *method, int sizehint)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000696{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000697 MultibyteDecodeBuffer buf;
698 PyObject *cres;
699 int rsize, r, finalsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000700
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000701 if (sizehint == 0)
702 return PyUnicode_FromUnicode(NULL, 0);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000703
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000704 buf.outobj = buf.excobj = NULL;
705 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000706
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000707 for (;;) {
708 if (sizehint < 0)
709 cres = PyObject_CallMethod(self->stream,
710 (char *)method, NULL);
711 else
712 cres = PyObject_CallMethod(self->stream,
713 (char *)method, "i", sizehint);
714 if (cres == NULL)
715 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000716
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000717 if (!PyString_Check(cres)) {
718 PyErr_SetString(PyExc_TypeError,
719 "stream function returned a "
720 "non-string object");
721 goto errorexit;
722 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000723
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000724 if (self->pendingsize > 0) {
725 PyObject *ctr;
726 char *ctrdata;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000727
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000728 rsize = PyString_GET_SIZE(cres) + self->pendingsize;
729 ctr = PyString_FromStringAndSize(NULL, rsize);
730 if (ctr == NULL)
731 goto errorexit;
732 ctrdata = PyString_AS_STRING(ctr);
733 memcpy(ctrdata, self->pending, self->pendingsize);
734 memcpy(ctrdata + self->pendingsize,
735 PyString_AS_STRING(cres),
736 PyString_GET_SIZE(cres));
737 Py_DECREF(cres);
738 cres = ctr;
739 self->pendingsize = 0;
740 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000741
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000742 rsize = PyString_GET_SIZE(cres);
743 buf.inbuf = buf.inbuf_top =
744 (unsigned char *)PyString_AS_STRING(cres);
745 buf.inbuf_end = buf.inbuf_top + rsize;
746 if (buf.outobj == NULL) {
747 buf.outobj = PyUnicode_FromUnicode(NULL, rsize);
748 if (buf.outobj == NULL)
749 goto errorexit;
750 buf.outbuf = PyUnicode_AS_UNICODE(buf.outobj);
751 buf.outbuf_end = buf.outbuf +
752 PyUnicode_GET_SIZE(buf.outobj);
753 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000754
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000755 r = 0;
756 if (rsize > 0)
757 while (buf.inbuf < buf.inbuf_end) {
758 size_t inleft, outleft;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000759
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000760 inleft = (size_t)(buf.inbuf_end - buf.inbuf);
761 outleft = (size_t)(buf.outbuf_end -buf.outbuf);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000762
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000763 r = self->codec->decode(&self->state,
764 self->codec->config,
765 &buf.inbuf, inleft,
766 &buf.outbuf, outleft);
767 if (r == 0 || r == MBERR_TOOFEW)
768 break;
769 else if (multibytecodec_decerror(self->codec,
770 &self->state, &buf,
771 self->errors, r))
772 goto errorexit;
773 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000774
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000775 if (rsize == 0 || sizehint < 0) { /* end of file */
776 if (buf.inbuf < buf.inbuf_end &&
777 multibytecodec_decerror(self->codec, &self->state,
778 &buf, self->errors, MBERR_TOOFEW))
779 goto errorexit;
780 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000781
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000782 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
783 size_t npendings;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000784
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000785 /* we can't assume that pendingsize is still 0 here.
786 * because this function can be called recursively
787 * from error callback */
788 npendings = (size_t)(buf.inbuf_end - buf.inbuf);
789 if (npendings + self->pendingsize > MAXDECPENDING) {
790 PyErr_SetString(PyExc_RuntimeError,
791 "pending buffer overflow");
792 goto errorexit;
793 }
794 memcpy(self->pending + self->pendingsize, buf.inbuf,
795 npendings);
796 self->pendingsize += npendings;
797 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000798
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000799 finalsize = (int)(buf.outbuf -
800 PyUnicode_AS_UNICODE(buf.outobj));
801 Py_DECREF(cres);
802 cres = NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000803
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000804 if (sizehint < 0 || finalsize != 0 || rsize == 0)
805 break;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000806
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000807 sizehint = 1; /* read 1 more byte and retry */
808 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000809
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000810 if (finalsize != PyUnicode_GET_SIZE(buf.outobj))
811 if (PyUnicode_Resize(&buf.outobj, finalsize) == -1)
812 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000813
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000814 Py_XDECREF(cres);
815 Py_XDECREF(buf.excobj);
816 return buf.outobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000817
818errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000819 Py_XDECREF(cres);
820 Py_XDECREF(buf.excobj);
821 Py_XDECREF(buf.outobj);
822 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000823}
824
825static PyObject *
826mbstreamreader_read(MultibyteStreamReaderObject *self, PyObject *args)
827{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000828 PyObject *sizeobj = NULL;
829 long size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000830
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000831 if (!PyArg_ParseTuple(args, "|O:read", &sizeobj))
832 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000833
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000834 if (sizeobj == Py_None || sizeobj == NULL)
835 size = -1;
836 else if (PyInt_Check(sizeobj))
837 size = PyInt_AsLong(sizeobj);
838 else {
839 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
840 return NULL;
841 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000842
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000843 return mbstreamreader_iread(self, "read", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000844}
845
846static PyObject *
847mbstreamreader_readline(MultibyteStreamReaderObject *self, PyObject *args)
848{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000849 PyObject *sizeobj = NULL;
850 long size;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000851
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000852 if (!PyArg_ParseTuple(args, "|O:readline", &sizeobj))
853 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000854
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000855 if (sizeobj == Py_None || sizeobj == NULL)
856 size = -1;
857 else if (PyInt_Check(sizeobj))
858 size = PyInt_AsLong(sizeobj);
859 else {
860 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
861 return NULL;
862 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000863
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000864 return mbstreamreader_iread(self, "readline", size);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000865}
866
867static PyObject *
868mbstreamreader_readlines(MultibyteStreamReaderObject *self, PyObject *args)
869{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000870 PyObject *sizehintobj = NULL, *r, *sr;
871 long sizehint;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000872
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000873 if (!PyArg_ParseTuple(args, "|O:readlines", &sizehintobj))
874 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000875
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000876 if (sizehintobj == Py_None || sizehintobj == NULL)
877 sizehint = -1;
878 else if (PyInt_Check(sizehintobj))
879 sizehint = PyInt_AsLong(sizehintobj);
880 else {
881 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
882 return NULL;
883 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000884
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000885 r = mbstreamreader_iread(self, "read", sizehint);
886 if (r == NULL)
887 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000888
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000889 sr = PyUnicode_Splitlines(r, 1);
890 Py_DECREF(r);
891 return sr;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000892}
893
894static PyObject *
895mbstreamreader_reset(MultibyteStreamReaderObject *self)
896{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000897 if (self->codec->decreset != NULL &&
898 self->codec->decreset(&self->state, self->codec->config) != 0)
899 return NULL;
900 self->pendingsize = 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000901
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000902 Py_INCREF(Py_None);
903 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000904}
905
906static struct PyMethodDef mbstreamreader_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000907 {"read", (PyCFunction)mbstreamreader_read,
908 METH_VARARGS, NULL},
909 {"readline", (PyCFunction)mbstreamreader_readline,
910 METH_VARARGS, NULL},
911 {"readlines", (PyCFunction)mbstreamreader_readlines,
912 METH_VARARGS, NULL},
913 {"reset", (PyCFunction)mbstreamreader_reset,
914 METH_NOARGS, NULL},
915 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000916};
917
918static void
919mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
920{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000921 if (self->errors > ERROR_MAX) {
922 Py_DECREF(self->errors);
923 }
924 Py_DECREF(self->stream);
925 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000926}
927
928
929
930static PyTypeObject MultibyteStreamReader_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000931 PyObject_HEAD_INIT(NULL)
932 0, /* ob_size */
933 "MultibyteStreamReader", /* tp_name */
934 sizeof(MultibyteStreamReaderObject), /* tp_basicsize */
935 0, /* tp_itemsize */
936 /* methods */
937 (destructor)mbstreamreader_dealloc, /* tp_dealloc */
938 0, /* tp_print */
939 0, /* tp_getattr */
940 0, /* tp_setattr */
941 0, /* tp_compare */
942 0, /* tp_repr */
943 0, /* tp_as_number */
944 0, /* tp_as_sequence */
945 0, /* tp_as_mapping */
946 0, /* tp_hash */
947 0, /* tp_call */
948 0, /* tp_str */
949 PyObject_GenericGetAttr, /* tp_getattro */
950 0, /* tp_setattro */
951 0, /* tp_as_buffer */
952 Py_TPFLAGS_DEFAULT, /* tp_flags */
953 0, /* tp_doc */
954 0, /* tp_traverse */
955 0, /* tp_clear */
956 0, /* tp_richcompare */
957 0, /* tp_weaklistoffset */
958 0, /* tp_iter */
959 0, /* tp_iterext */
960 mbstreamreader_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000961};
962
963static int
964mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000965 PyObject *unistr)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000966{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000967 PyObject *wr, *ucvt, *r = NULL;
Hye-Shik Changf5a149a2004-08-19 17:49:56 +0000968 Py_UNICODE *inbuf, *inbuf_end, *inbuf_tmp = NULL;
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000969 int datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000970
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000971 if (PyUnicode_Check(unistr))
972 ucvt = NULL;
973 else {
974 unistr = ucvt = PyObject_Unicode(unistr);
975 if (unistr == NULL)
976 return -1;
977 else if (!PyUnicode_Check(unistr)) {
978 PyErr_SetString(PyExc_TypeError,
979 "couldn't convert the object to unicode.");
980 Py_DECREF(ucvt);
981 return -1;
982 }
983 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000984
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000985 datalen = PyUnicode_GET_SIZE(unistr);
986 if (datalen == 0) {
987 Py_XDECREF(ucvt);
988 return 0;
989 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +0000990
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +0000991 if (self->pendingsize > 0) {
992 inbuf_tmp = PyMem_New(Py_UNICODE, datalen + self->pendingsize);
993 if (inbuf_tmp == NULL)
994 goto errorexit;
995 memcpy(inbuf_tmp, self->pending,
996 Py_UNICODE_SIZE * self->pendingsize);
997 memcpy(inbuf_tmp + self->pendingsize,
998 PyUnicode_AS_UNICODE(unistr),
999 Py_UNICODE_SIZE * datalen);
1000 datalen += self->pendingsize;
1001 self->pendingsize = 0;
1002 inbuf = inbuf_tmp;
1003 }
1004 else
1005 inbuf = (Py_UNICODE *)PyUnicode_AS_UNICODE(unistr);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001006
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001007 inbuf_end = inbuf + datalen;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001008
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001009 r = multibytecodec_encode(self->codec, &self->state,
1010 (const Py_UNICODE **)&inbuf, datalen, self->errors, 0);
1011 if (r == NULL)
1012 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001013
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001014 if (inbuf < inbuf_end) {
1015 self->pendingsize = (int)(inbuf_end - inbuf);
1016 if (self->pendingsize > MAXENCPENDING) {
1017 self->pendingsize = 0;
1018 PyErr_SetString(PyExc_RuntimeError,
1019 "pending buffer overflow");
1020 goto errorexit;
1021 }
1022 memcpy(self->pending, inbuf,
1023 self->pendingsize * Py_UNICODE_SIZE);
1024 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001025
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001026 wr = PyObject_CallMethod(self->stream, "write", "O", r);
1027 if (wr == NULL)
1028 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001029
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001030 if (inbuf_tmp != NULL)
1031 PyMem_Del(inbuf_tmp);
1032 Py_DECREF(r);
1033 Py_DECREF(wr);
1034 Py_XDECREF(ucvt);
1035 return 0;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001036
1037errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001038 if (inbuf_tmp != NULL)
1039 PyMem_Del(inbuf_tmp);
1040 Py_XDECREF(r);
1041 Py_XDECREF(ucvt);
1042 return -1;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001043}
1044
1045static PyObject *
1046mbstreamwriter_write(MultibyteStreamWriterObject *self, PyObject *args)
1047{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001048 PyObject *strobj;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001049
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001050 if (!PyArg_ParseTuple(args, "O:write", &strobj))
1051 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001052
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001053 if (mbstreamwriter_iwrite(self, strobj))
1054 return NULL;
1055 else {
1056 Py_INCREF(Py_None);
1057 return Py_None;
1058 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001059}
1060
1061static PyObject *
1062mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *args)
1063{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001064 PyObject *lines, *strobj;
1065 int i, r;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001066
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001067 if (!PyArg_ParseTuple(args, "O:writelines", &lines))
1068 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001069
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001070 if (!PySequence_Check(lines)) {
1071 PyErr_SetString(PyExc_TypeError,
1072 "arg must be a sequence object");
1073 return NULL;
1074 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001075
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001076 for (i = 0; i < PySequence_Length(lines); i++) {
1077 /* length can be changed even within this loop */
1078 strobj = PySequence_GetItem(lines, i);
1079 if (strobj == NULL)
1080 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001081
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001082 r = mbstreamwriter_iwrite(self, strobj);
1083 Py_DECREF(strobj);
1084 if (r == -1)
1085 return NULL;
1086 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001087
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001088 Py_INCREF(Py_None);
1089 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001090}
1091
1092static PyObject *
1093mbstreamwriter_reset(MultibyteStreamWriterObject *self)
1094{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001095 const Py_UNICODE *pending;
1096 PyObject *pwrt;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001097
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001098 pending = self->pending;
1099 pwrt = multibytecodec_encode(self->codec, &self->state,
1100 &pending, self->pendingsize, self->errors,
1101 MBENC_FLUSH | MBENC_RESET);
1102 /* some pending buffer can be truncated when UnicodeEncodeError is
1103 * raised on 'strict' mode. but, 'reset' method is designed to
1104 * reset the pending buffer or states so failed string sequence
1105 * ought to be missed */
1106 self->pendingsize = 0;
1107 if (pwrt == NULL)
1108 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001109
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001110 if (PyString_Size(pwrt) > 0) {
1111 PyObject *wr;
1112 wr = PyObject_CallMethod(self->stream, "write", "O", pwrt);
1113 if (wr == NULL) {
1114 Py_DECREF(pwrt);
1115 return NULL;
1116 }
1117 }
1118 Py_DECREF(pwrt);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001119
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001120 Py_INCREF(Py_None);
1121 return Py_None;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001122}
1123
1124static void
1125mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1126{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001127 if (self->errors > ERROR_MAX) {
1128 Py_DECREF(self->errors);
1129 }
1130 Py_DECREF(self->stream);
1131 PyObject_Del(self);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001132}
1133
1134static struct PyMethodDef mbstreamwriter_methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001135 {"write", (PyCFunction)mbstreamwriter_write,
1136 METH_VARARGS, NULL},
1137 {"writelines", (PyCFunction)mbstreamwriter_writelines,
1138 METH_VARARGS, NULL},
1139 {"reset", (PyCFunction)mbstreamwriter_reset,
1140 METH_NOARGS, NULL},
1141 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001142};
1143
1144
1145
1146static PyTypeObject MultibyteStreamWriter_Type = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001147 PyObject_HEAD_INIT(NULL)
1148 0, /* ob_size */
1149 "MultibyteStreamWriter", /* tp_name */
1150 sizeof(MultibyteStreamWriterObject), /* tp_basicsize */
1151 0, /* tp_itemsize */
1152 /* methods */
1153 (destructor)mbstreamwriter_dealloc, /* tp_dealloc */
1154 0, /* tp_print */
1155 0, /* tp_getattr */
1156 0, /* tp_setattr */
1157 0, /* tp_compare */
1158 0, /* tp_repr */
1159 0, /* tp_as_number */
1160 0, /* tp_as_sequence */
1161 0, /* tp_as_mapping */
1162 0, /* tp_hash */
1163 0, /* tp_call */
1164 0, /* tp_str */
1165 PyObject_GenericGetAttr, /* tp_getattro */
1166 0, /* tp_setattro */
1167 0, /* tp_as_buffer */
1168 Py_TPFLAGS_DEFAULT, /* tp_flags */
1169 0, /* tp_doc */
1170 0, /* tp_traverse */
1171 0, /* tp_clear */
1172 0, /* tp_richcompare */
1173 0, /* tp_weaklistoffset */
1174 0, /* tp_iter */
1175 0, /* tp_iterext */
1176 mbstreamwriter_methods, /* tp_methods */
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001177};
1178
1179static PyObject *
1180__create_codec(PyObject *ignore, PyObject *arg)
1181{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001182 MultibyteCodecObject *self;
1183 MultibyteCodec *codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001184
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001185 if (!PyCObject_Check(arg)) {
1186 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1187 return NULL;
1188 }
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001189
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001190 codec = PyCObject_AsVoidPtr(arg);
1191 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1192 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001193
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001194 self = PyObject_New(MultibyteCodecObject, &MultibyteCodec_Type);
1195 if (self == NULL)
1196 return NULL;
1197 self->codec = codec;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001198
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001199 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001200}
1201
1202static PyObject *
1203mbstreamreader_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001204 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001205{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001206 MultibyteStreamReaderObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001207
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001208 self = PyObject_New(MultibyteStreamReaderObject,
1209 &MultibyteStreamReader_Type);
1210 if (self == NULL)
1211 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001212
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001213 self->codec = codec;
1214 self->stream = stream;
1215 Py_INCREF(stream);
1216 self->pendingsize = 0;
1217 self->errors = get_errorcallback(errors);
1218 if (self->errors == NULL)
1219 goto errorexit;
1220 if (self->codec->decinit != NULL &&
1221 self->codec->decinit(&self->state, self->codec->config) != 0)
1222 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001223
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001224 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001225
1226errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001227 Py_XDECREF(self);
1228 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001229}
1230
1231static PyObject *
1232mbstreamwriter_create(MultibyteCodec *codec,
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001233 PyObject *stream, const char *errors)
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001234{
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001235 MultibyteStreamWriterObject *self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001236
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001237 self = PyObject_New(MultibyteStreamWriterObject,
1238 &MultibyteStreamWriter_Type);
1239 if (self == NULL)
1240 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001241
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001242 self->codec = codec;
1243 self->stream = stream;
1244 Py_INCREF(stream);
1245 self->pendingsize = 0;
1246 self->errors = get_errorcallback(errors);
1247 if (self->errors == NULL)
1248 goto errorexit;
1249 if (self->codec->encinit != NULL &&
1250 self->codec->encinit(&self->state, self->codec->config) != 0)
1251 goto errorexit;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001252
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001253 return (PyObject *)self;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001254
1255errorexit:
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001256 Py_XDECREF(self);
1257 return NULL;
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001258}
1259
1260static struct PyMethodDef __methods[] = {
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001261 {"__create_codec", (PyCFunction)__create_codec, METH_O},
1262 {NULL, NULL},
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001263};
1264
1265void
1266init_multibytecodec(void)
1267{
Neal Norwitz058bde12005-09-21 06:44:25 +00001268 MultibyteCodec_Type.ob_type = &PyType_Type;
1269 MultibyteStreamReader_Type.ob_type = &PyType_Type;
1270 MultibyteStreamWriter_Type.ob_type = &PyType_Type;
1271
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001272 Py_InitModule("_multibytecodec", __methods);
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001273
Hye-Shik Chang2bb146f2004-07-18 03:06:29 +00001274 if (PyErr_Occurred())
1275 Py_FatalError("can't initialize the _multibytecodec module");
Hye-Shik Chang3e2a3062004-01-17 14:29:29 +00001276}