blob: 08827b9da05037c1621f098a710c9a22373084bc [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
686
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
689
690 PyObject *weakreflist;
691 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000692} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694
695/* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
720}
721
722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000723utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724{
Antoine Pitroue4501852009-05-14 18:55:55 +0000725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 }
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
752}
753
754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759#if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761#else
762 return utf32le_encode(self, text);
763#endif
764 }
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780{
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
784}
785
786/* Map normalized encoding names onto the specialized encoding funcs */
787
788typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791} encodefuncentry;
792
Antoine Pitrou24f36292009-03-28 22:16:42 +0000793static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000796 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {NULL, NULL}
804};
805
806
807static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000808textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809{
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
818 _PyIO_State *state = IO_STATE;
819
820 PyObject *res;
821 int r;
822
823 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000824 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
826 kwlist, &buffer, &encoding, &errors,
827 &newline, &line_buffering))
828 return -1;
829
830 if (newline && newline[0] != '\0'
831 && !(newline[0] == '\n' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\0')
833 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
834 PyErr_Format(PyExc_ValueError,
835 "illegal newline value: %s", newline);
836 return -1;
837 }
838
839 Py_CLEAR(self->buffer);
840 Py_CLEAR(self->encoding);
841 Py_CLEAR(self->encoder);
842 Py_CLEAR(self->decoder);
843 Py_CLEAR(self->readnl);
844 Py_CLEAR(self->decoded_chars);
845 Py_CLEAR(self->pending_bytes);
846 Py_CLEAR(self->snapshot);
847 Py_CLEAR(self->errors);
848 Py_CLEAR(self->raw);
849 self->decoded_chars_used = 0;
850 self->pending_bytes_count = 0;
851 self->encodefunc = NULL;
852
853 if (encoding == NULL) {
854 /* Try os.device_encoding(fileno) */
855 PyObject *fileno;
856 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
857 /* Ignore only AttributeError and UnsupportedOperation */
858 if (fileno == NULL) {
859 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
860 PyErr_ExceptionMatches(state->unsupported_operation)) {
861 PyErr_Clear();
862 }
863 else {
864 goto error;
865 }
866 }
867 else {
868 self->encoding = PyObject_CallMethod(state->os_module,
869 "device_encoding",
870 "N", fileno);
871 if (self->encoding == NULL)
872 goto error;
873 else if (!PyUnicode_Check(self->encoding))
874 Py_CLEAR(self->encoding);
875 }
876 }
877 if (encoding == NULL && self->encoding == NULL) {
878 if (state->locale_module == NULL) {
879 state->locale_module = PyImport_ImportModule("locale");
880 if (state->locale_module == NULL)
881 goto catch_ImportError;
882 else
883 goto use_locale;
884 }
885 else {
886 use_locale:
887 self->encoding = PyObject_CallMethod(
888 state->locale_module, "getpreferredencoding", NULL);
889 if (self->encoding == NULL) {
890 catch_ImportError:
891 /*
892 Importing locale can raise a ImportError because of
893 _functools, and locale.getpreferredencoding can raise a
894 ImportError if _locale is not available. These will happen
895 during module building.
896 */
897 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
898 PyErr_Clear();
899 self->encoding = PyUnicode_FromString("ascii");
900 }
901 else
902 goto error;
903 }
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000908 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000910 if (encoding == NULL)
911 goto error;
912 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000913 else if (encoding != NULL) {
914 self->encoding = PyUnicode_FromString(encoding);
915 if (self->encoding == NULL)
916 goto error;
917 }
918 else {
919 PyErr_SetString(PyExc_IOError,
920 "could not determine default encoding");
921 }
922
923 if (errors == NULL)
924 errors = "strict";
925 self->errors = PyBytes_FromString(errors);
926 if (self->errors == NULL)
927 goto error;
928
929 self->chunk_size = 8192;
930 self->readuniversal = (newline == NULL || newline[0] == '\0');
931 self->line_buffering = line_buffering;
932 self->readtranslate = (newline == NULL);
933 if (newline) {
934 self->readnl = PyUnicode_FromString(newline);
935 if (self->readnl == NULL)
936 return -1;
937 }
938 self->writetranslate = (newline == NULL || newline[0] != '\0');
939 if (!self->readuniversal && self->readnl) {
940 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000941 if (self->writenl == NULL)
942 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 if (!strcmp(self->writenl, "\n"))
944 self->writenl = NULL;
945 }
946#ifdef MS_WINDOWS
947 else
948 self->writenl = "\r\n";
949#endif
950
951 /* Build the decoder object */
952 res = PyObject_CallMethod(buffer, "readable", NULL);
953 if (res == NULL)
954 goto error;
955 r = PyObject_IsTrue(res);
956 Py_DECREF(res);
957 if (r == -1)
958 goto error;
959 if (r == 1) {
960 self->decoder = PyCodec_IncrementalDecoder(
961 encoding, errors);
962 if (self->decoder == NULL)
963 goto error;
964
965 if (self->readuniversal) {
966 PyObject *incrementalDecoder = PyObject_CallFunction(
967 (PyObject *)&PyIncrementalNewlineDecoder_Type,
968 "Oi", self->decoder, (int)self->readtranslate);
969 if (incrementalDecoder == NULL)
970 goto error;
971 Py_CLEAR(self->decoder);
972 self->decoder = incrementalDecoder;
973 }
974 }
975
976 /* Build the encoder object */
977 res = PyObject_CallMethod(buffer, "writable", NULL);
978 if (res == NULL)
979 goto error;
980 r = PyObject_IsTrue(res);
981 Py_DECREF(res);
982 if (r == -1)
983 goto error;
984 if (r == 1) {
985 PyObject *ci;
986 self->encoder = PyCodec_IncrementalEncoder(
987 encoding, errors);
988 if (self->encoder == NULL)
989 goto error;
990 /* Get the normalized named of the codec */
991 ci = _PyCodec_Lookup(encoding);
992 if (ci == NULL)
993 goto error;
994 res = PyObject_GetAttrString(ci, "name");
995 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000996 if (res == NULL) {
997 if (PyErr_ExceptionMatches(PyExc_AttributeError))
998 PyErr_Clear();
999 else
1000 goto error;
1001 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002 else if (PyUnicode_Check(res)) {
1003 encodefuncentry *e = encodefuncs;
1004 while (e->name != NULL) {
1005 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1006 self->encodefunc = e->encodefunc;
1007 break;
1008 }
1009 e++;
1010 }
1011 }
1012 Py_XDECREF(res);
1013 }
1014
1015 self->buffer = buffer;
1016 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001017
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1019 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1020 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1021 raw = PyObject_GetAttrString(buffer, "raw");
1022 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001023 if (raw == NULL) {
1024 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1025 PyErr_Clear();
1026 else
1027 goto error;
1028 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 else if (Py_TYPE(raw) == &PyFileIO_Type)
1030 self->raw = raw;
1031 else
1032 Py_DECREF(raw);
1033 }
1034
1035 res = PyObject_CallMethod(buffer, "seekable", NULL);
1036 if (res == NULL)
1037 goto error;
1038 self->seekable = self->telling = PyObject_IsTrue(res);
1039 Py_DECREF(res);
1040
Antoine Pitroue4501852009-05-14 18:55:55 +00001041 self->encoding_start_of_stream = 0;
1042 if (self->seekable && self->encoder) {
1043 PyObject *cookieObj;
1044 int cmp;
1045
1046 self->encoding_start_of_stream = 1;
1047
1048 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1049 if (cookieObj == NULL)
1050 goto error;
1051
1052 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1053 Py_DECREF(cookieObj);
1054 if (cmp < 0) {
1055 goto error;
1056 }
1057
1058 if (cmp == 0) {
1059 self->encoding_start_of_stream = 0;
1060 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1061 _PyIO_zero, NULL);
1062 if (res == NULL)
1063 goto error;
1064 Py_DECREF(res);
1065 }
1066 }
1067
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 self->ok = 1;
1069 return 0;
1070
1071 error:
1072 return -1;
1073}
1074
1075static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001076_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077{
1078 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1079 return -1;
1080 self->ok = 0;
1081 Py_CLEAR(self->buffer);
1082 Py_CLEAR(self->encoding);
1083 Py_CLEAR(self->encoder);
1084 Py_CLEAR(self->decoder);
1085 Py_CLEAR(self->readnl);
1086 Py_CLEAR(self->decoded_chars);
1087 Py_CLEAR(self->pending_bytes);
1088 Py_CLEAR(self->snapshot);
1089 Py_CLEAR(self->errors);
1090 Py_CLEAR(self->raw);
1091 return 0;
1092}
1093
1094static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001095textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001097 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 return;
1099 _PyObject_GC_UNTRACK(self);
1100 if (self->weakreflist != NULL)
1101 PyObject_ClearWeakRefs((PyObject *)self);
1102 Py_CLEAR(self->dict);
1103 Py_TYPE(self)->tp_free((PyObject *)self);
1104}
1105
1106static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001107textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108{
1109 Py_VISIT(self->buffer);
1110 Py_VISIT(self->encoding);
1111 Py_VISIT(self->encoder);
1112 Py_VISIT(self->decoder);
1113 Py_VISIT(self->readnl);
1114 Py_VISIT(self->decoded_chars);
1115 Py_VISIT(self->pending_bytes);
1116 Py_VISIT(self->snapshot);
1117 Py_VISIT(self->errors);
1118 Py_VISIT(self->raw);
1119
1120 Py_VISIT(self->dict);
1121 return 0;
1122}
1123
1124static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001125textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001126{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001127 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128 return -1;
1129 Py_CLEAR(self->dict);
1130 return 0;
1131}
1132
1133static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135
1136/* This macro takes some shortcuts to make the common case faster. */
1137#define CHECK_CLOSED(self) \
1138 do { \
1139 int r; \
1140 PyObject *_res; \
1141 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1142 if (self->raw != NULL) \
1143 r = _PyFileIO_closed(self->raw); \
1144 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 if (_res == NULL) \
1147 return NULL; \
1148 r = PyObject_IsTrue(_res); \
1149 Py_DECREF(_res); \
1150 if (r < 0) \
1151 return NULL; \
1152 } \
1153 if (r > 0) { \
1154 PyErr_SetString(PyExc_ValueError, \
1155 "I/O operation on closed file."); \
1156 return NULL; \
1157 } \
1158 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001159 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 return NULL; \
1161 } while (0)
1162
1163#define CHECK_INITIALIZED(self) \
1164 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001165 if (self->detached) { \
1166 PyErr_SetString(PyExc_ValueError, \
1167 "underlying buffer has been detached"); \
1168 } else { \
1169 PyErr_SetString(PyExc_ValueError, \
1170 "I/O operation on uninitialized object"); \
1171 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 return NULL; \
1173 }
1174
1175#define CHECK_INITIALIZED_INT(self) \
1176 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001177 if (self->detached) { \
1178 PyErr_SetString(PyExc_ValueError, \
1179 "underlying buffer has been detached"); \
1180 } else { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on uninitialized object"); \
1183 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 return -1; \
1185 }
1186
1187
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001188static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001189textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001190{
1191 PyObject *buffer, *res;
1192 CHECK_INITIALIZED(self);
1193 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1194 if (res == NULL)
1195 return NULL;
1196 Py_DECREF(res);
1197 buffer = self->buffer;
1198 self->buffer = NULL;
1199 self->detached = 1;
1200 self->ok = 0;
1201 return buffer;
1202}
1203
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204Py_LOCAL_INLINE(const Py_UNICODE *)
1205findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1206{
1207 /* like wcschr, but doesn't stop at NULL characters */
1208 while (size-- > 0) {
1209 if (*s == ch)
1210 return s;
1211 s++;
1212 }
1213 return NULL;
1214}
1215
Antoine Pitrou24f36292009-03-28 22:16:42 +00001216/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 underlying buffered object, though. */
1218static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001219_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001220{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001221 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222
1223 if (self->pending_bytes == NULL)
1224 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001225
1226 pending = self->pending_bytes;
1227 Py_INCREF(pending);
1228 self->pending_bytes_count = 0;
1229 Py_CLEAR(self->pending_bytes);
1230
1231 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1232 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 if (b == NULL)
1234 return -1;
1235 ret = PyObject_CallMethodObjArgs(self->buffer,
1236 _PyIO_str_write, b, NULL);
1237 Py_DECREF(b);
1238 if (ret == NULL)
1239 return -1;
1240 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 return 0;
1242}
1243
1244static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001245textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246{
1247 PyObject *ret;
1248 PyObject *text; /* owned reference */
1249 PyObject *b;
1250 Py_ssize_t textlen;
1251 int haslf = 0;
1252 int needflush = 0;
1253
1254 CHECK_INITIALIZED(self);
1255
1256 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1257 return NULL;
1258 }
1259
1260 CHECK_CLOSED(self);
1261
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001262 if (self->encoder == NULL)
1263 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001264
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 Py_INCREF(text);
1266
1267 textlen = PyUnicode_GetSize(text);
1268
1269 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1270 if (findchar(PyUnicode_AS_UNICODE(text),
1271 PyUnicode_GET_SIZE(text), '\n'))
1272 haslf = 1;
1273
1274 if (haslf && self->writetranslate && self->writenl != NULL) {
1275 PyObject *newtext = PyObject_CallMethod(
1276 text, "replace", "ss", "\n", self->writenl);
1277 Py_DECREF(text);
1278 if (newtext == NULL)
1279 return NULL;
1280 text = newtext;
1281 }
1282
1283 if (self->line_buffering &&
1284 (haslf ||
1285 findchar(PyUnicode_AS_UNICODE(text),
1286 PyUnicode_GET_SIZE(text), '\r')))
1287 needflush = 1;
1288
1289 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001290 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001292 self->encoding_start_of_stream = 0;
1293 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 else
1295 b = PyObject_CallMethodObjArgs(self->encoder,
1296 _PyIO_str_encode, text, NULL);
1297 Py_DECREF(text);
1298 if (b == NULL)
1299 return NULL;
1300
1301 if (self->pending_bytes == NULL) {
1302 self->pending_bytes = PyList_New(0);
1303 if (self->pending_bytes == NULL) {
1304 Py_DECREF(b);
1305 return NULL;
1306 }
1307 self->pending_bytes_count = 0;
1308 }
1309 if (PyList_Append(self->pending_bytes, b) < 0) {
1310 Py_DECREF(b);
1311 return NULL;
1312 }
1313 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1314 Py_DECREF(b);
1315 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001316 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 return NULL;
1318 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001319
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320 if (needflush) {
1321 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1322 if (ret == NULL)
1323 return NULL;
1324 Py_DECREF(ret);
1325 }
1326
1327 Py_CLEAR(self->snapshot);
1328
1329 if (self->decoder) {
1330 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1331 if (ret == NULL)
1332 return NULL;
1333 Py_DECREF(ret);
1334 }
1335
1336 return PyLong_FromSsize_t(textlen);
1337}
1338
1339/* Steal a reference to chars and store it in the decoded_char buffer;
1340 */
1341static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001342textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343{
1344 Py_CLEAR(self->decoded_chars);
1345 self->decoded_chars = chars;
1346 self->decoded_chars_used = 0;
1347}
1348
1349static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001350textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351{
1352 PyObject *chars;
1353 Py_ssize_t avail;
1354
1355 if (self->decoded_chars == NULL)
1356 return PyUnicode_FromStringAndSize(NULL, 0);
1357
1358 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1359 - self->decoded_chars_used);
1360
1361 assert(avail >= 0);
1362
1363 if (n < 0 || n > avail)
1364 n = avail;
1365
1366 if (self->decoded_chars_used > 0 || n < avail) {
1367 chars = PyUnicode_FromUnicode(
1368 PyUnicode_AS_UNICODE(self->decoded_chars)
1369 + self->decoded_chars_used, n);
1370 if (chars == NULL)
1371 return NULL;
1372 }
1373 else {
1374 chars = self->decoded_chars;
1375 Py_INCREF(chars);
1376 }
1377
1378 self->decoded_chars_used += n;
1379 return chars;
1380}
1381
1382/* Read and decode the next chunk of data from the BufferedReader.
1383 */
1384static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001385textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001386{
1387 PyObject *dec_buffer = NULL;
1388 PyObject *dec_flags = NULL;
1389 PyObject *input_chunk = NULL;
1390 PyObject *decoded_chars, *chunk_size;
1391 int eof;
1392
1393 /* The return value is True unless EOF was reached. The decoded string is
1394 * placed in self._decoded_chars (replacing its previous value). The
1395 * entire input chunk is sent to the decoder, though some of it may remain
1396 * buffered in the decoder, yet to be converted.
1397 */
1398
1399 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001400 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 return -1;
1402 }
1403
1404 if (self->telling) {
1405 /* To prepare for tell(), we need to snapshot a point in the file
1406 * where the decoder's input buffer is empty.
1407 */
1408
1409 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1410 _PyIO_str_getstate, NULL);
1411 if (state == NULL)
1412 return -1;
1413 /* Given this, we know there was a valid snapshot point
1414 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1415 */
1416 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1417 Py_DECREF(state);
1418 return -1;
1419 }
1420 Py_INCREF(dec_buffer);
1421 Py_INCREF(dec_flags);
1422 Py_DECREF(state);
1423 }
1424
1425 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1426 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1427 if (chunk_size == NULL)
1428 goto fail;
1429 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1430 _PyIO_str_read1, chunk_size, NULL);
1431 Py_DECREF(chunk_size);
1432 if (input_chunk == NULL)
1433 goto fail;
1434 assert(PyBytes_Check(input_chunk));
1435
1436 eof = (PyBytes_Size(input_chunk) == 0);
1437
1438 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1439 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1440 self->decoder, input_chunk, eof);
1441 }
1442 else {
1443 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1444 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1445 }
1446
1447 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1448 if (decoded_chars == NULL)
1449 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001450 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001451 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1452 eof = 0;
1453
1454 if (self->telling) {
1455 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1456 * next input to be decoded is dec_buffer + input_chunk.
1457 */
1458 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1459 if (next_input == NULL)
1460 goto fail;
1461 assert (PyBytes_Check(next_input));
1462 Py_DECREF(dec_buffer);
1463 Py_CLEAR(self->snapshot);
1464 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1465 }
1466 Py_DECREF(input_chunk);
1467
1468 return (eof == 0);
1469
1470 fail:
1471 Py_XDECREF(dec_buffer);
1472 Py_XDECREF(dec_flags);
1473 Py_XDECREF(input_chunk);
1474 return -1;
1475}
1476
1477static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001478textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001479{
1480 Py_ssize_t n = -1;
1481 PyObject *result = NULL, *chunks = NULL;
1482
1483 CHECK_INITIALIZED(self);
1484
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001485 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486 return NULL;
1487
1488 CHECK_CLOSED(self);
1489
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001490 if (self->decoder == NULL)
1491 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001492
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001493 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001494 return NULL;
1495
1496 if (n < 0) {
1497 /* Read everything */
1498 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1499 PyObject *decoded;
1500 if (bytes == NULL)
1501 goto fail;
1502 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1503 bytes, Py_True, NULL);
1504 Py_DECREF(bytes);
1505 if (decoded == NULL)
1506 goto fail;
1507
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001508 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001509
1510 if (result == NULL) {
1511 Py_DECREF(decoded);
1512 return NULL;
1513 }
1514
1515 PyUnicode_AppendAndDel(&result, decoded);
1516 if (result == NULL)
1517 goto fail;
1518
1519 Py_CLEAR(self->snapshot);
1520 return result;
1521 }
1522 else {
1523 int res = 1;
1524 Py_ssize_t remaining = n;
1525
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001526 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527 if (result == NULL)
1528 goto fail;
1529 remaining -= PyUnicode_GET_SIZE(result);
1530
1531 /* Keep reading chunks until we have n characters to return */
1532 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001533 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 if (res < 0)
1535 goto fail;
1536 if (res == 0) /* EOF */
1537 break;
1538 if (chunks == NULL) {
1539 chunks = PyList_New(0);
1540 if (chunks == NULL)
1541 goto fail;
1542 }
1543 if (PyList_Append(chunks, result) < 0)
1544 goto fail;
1545 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001546 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001547 if (result == NULL)
1548 goto fail;
1549 remaining -= PyUnicode_GET_SIZE(result);
1550 }
1551 if (chunks != NULL) {
1552 if (result != NULL && PyList_Append(chunks, result) < 0)
1553 goto fail;
1554 Py_CLEAR(result);
1555 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1556 if (result == NULL)
1557 goto fail;
1558 Py_CLEAR(chunks);
1559 }
1560 return result;
1561 }
1562 fail:
1563 Py_XDECREF(result);
1564 Py_XDECREF(chunks);
1565 return NULL;
1566}
1567
1568
1569/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1570 that is to the NUL character. Otherwise the function will produce
1571 incorrect results. */
1572static Py_UNICODE *
1573find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1574{
1575 Py_UNICODE *s = start;
1576 for (;;) {
1577 while (*s > ch)
1578 s++;
1579 if (*s == ch)
1580 return s;
1581 if (s == end)
1582 return NULL;
1583 s++;
1584 }
1585}
1586
1587Py_ssize_t
1588_PyIO_find_line_ending(
1589 int translated, int universal, PyObject *readnl,
1590 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1591{
1592 Py_ssize_t len = end - start;
1593
1594 if (translated) {
1595 /* Newlines are already translated, only search for \n */
1596 Py_UNICODE *pos = find_control_char(start, end, '\n');
1597 if (pos != NULL)
1598 return pos - start + 1;
1599 else {
1600 *consumed = len;
1601 return -1;
1602 }
1603 }
1604 else if (universal) {
1605 /* Universal newline search. Find any of \r, \r\n, \n
1606 * The decoder ensures that \r\n are not split in two pieces
1607 */
1608 Py_UNICODE *s = start;
1609 for (;;) {
1610 Py_UNICODE ch;
1611 /* Fast path for non-control chars. The loop always ends
1612 since the Py_UNICODE storage is NUL-terminated. */
1613 while (*s > '\r')
1614 s++;
1615 if (s >= end) {
1616 *consumed = len;
1617 return -1;
1618 }
1619 ch = *s++;
1620 if (ch == '\n')
1621 return s - start;
1622 if (ch == '\r') {
1623 if (*s == '\n')
1624 return s - start + 1;
1625 else
1626 return s - start;
1627 }
1628 }
1629 }
1630 else {
1631 /* Non-universal mode. */
1632 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1633 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1634 if (readnl_len == 1) {
1635 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1636 if (pos != NULL)
1637 return pos - start + 1;
1638 *consumed = len;
1639 return -1;
1640 }
1641 else {
1642 Py_UNICODE *s = start;
1643 Py_UNICODE *e = end - readnl_len + 1;
1644 Py_UNICODE *pos;
1645 if (e < s)
1646 e = s;
1647 while (s < e) {
1648 Py_ssize_t i;
1649 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1650 if (pos == NULL || pos >= e)
1651 break;
1652 for (i = 1; i < readnl_len; i++) {
1653 if (pos[i] != nl[i])
1654 break;
1655 }
1656 if (i == readnl_len)
1657 return pos - start + readnl_len;
1658 s = pos + 1;
1659 }
1660 pos = find_control_char(e, end, nl[0]);
1661 if (pos == NULL)
1662 *consumed = len;
1663 else
1664 *consumed = pos - start;
1665 return -1;
1666 }
1667 }
1668}
1669
1670static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001671_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672{
1673 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1674 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1675 int res;
1676
1677 CHECK_CLOSED(self);
1678
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001679 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 return NULL;
1681
1682 chunked = 0;
1683
1684 while (1) {
1685 Py_UNICODE *ptr;
1686 Py_ssize_t line_len;
1687 Py_ssize_t consumed = 0;
1688
1689 /* First, get some data if necessary */
1690 res = 1;
1691 while (!self->decoded_chars ||
1692 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001693 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (res < 0)
1695 goto error;
1696 if (res == 0)
1697 break;
1698 }
1699 if (res == 0) {
1700 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001701 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001702 Py_CLEAR(self->snapshot);
1703 start = endpos = offset_to_buffer = 0;
1704 break;
1705 }
1706
1707 if (remaining == NULL) {
1708 line = self->decoded_chars;
1709 start = self->decoded_chars_used;
1710 offset_to_buffer = 0;
1711 Py_INCREF(line);
1712 }
1713 else {
1714 assert(self->decoded_chars_used == 0);
1715 line = PyUnicode_Concat(remaining, self->decoded_chars);
1716 start = 0;
1717 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1718 Py_CLEAR(remaining);
1719 if (line == NULL)
1720 goto error;
1721 }
1722
1723 ptr = PyUnicode_AS_UNICODE(line);
1724 line_len = PyUnicode_GET_SIZE(line);
1725
1726 endpos = _PyIO_find_line_ending(
1727 self->readtranslate, self->readuniversal, self->readnl,
1728 ptr + start, ptr + line_len, &consumed);
1729 if (endpos >= 0) {
1730 endpos += start;
1731 if (limit >= 0 && (endpos - start) + chunked >= limit)
1732 endpos = start + limit - chunked;
1733 break;
1734 }
1735
1736 /* We can put aside up to `endpos` */
1737 endpos = consumed + start;
1738 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1739 /* Didn't find line ending, but reached length limit */
1740 endpos = start + limit - chunked;
1741 break;
1742 }
1743
1744 if (endpos > start) {
1745 /* No line ending seen yet - put aside current data */
1746 PyObject *s;
1747 if (chunks == NULL) {
1748 chunks = PyList_New(0);
1749 if (chunks == NULL)
1750 goto error;
1751 }
1752 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1753 if (s == NULL)
1754 goto error;
1755 if (PyList_Append(chunks, s) < 0) {
1756 Py_DECREF(s);
1757 goto error;
1758 }
1759 chunked += PyUnicode_GET_SIZE(s);
1760 Py_DECREF(s);
1761 }
1762 /* There may be some remaining bytes we'll have to prepend to the
1763 next chunk of data */
1764 if (endpos < line_len) {
1765 remaining = PyUnicode_FromUnicode(
1766 ptr + endpos, line_len - endpos);
1767 if (remaining == NULL)
1768 goto error;
1769 }
1770 Py_CLEAR(line);
1771 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001772 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 }
1774
1775 if (line != NULL) {
1776 /* Our line ends in the current buffer */
1777 self->decoded_chars_used = endpos - offset_to_buffer;
1778 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1779 if (start == 0 && Py_REFCNT(line) == 1) {
1780 if (PyUnicode_Resize(&line, endpos) < 0)
1781 goto error;
1782 }
1783 else {
1784 PyObject *s = PyUnicode_FromUnicode(
1785 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1786 Py_CLEAR(line);
1787 if (s == NULL)
1788 goto error;
1789 line = s;
1790 }
1791 }
1792 }
1793 if (remaining != NULL) {
1794 if (chunks == NULL) {
1795 chunks = PyList_New(0);
1796 if (chunks == NULL)
1797 goto error;
1798 }
1799 if (PyList_Append(chunks, remaining) < 0)
1800 goto error;
1801 Py_CLEAR(remaining);
1802 }
1803 if (chunks != NULL) {
1804 if (line != NULL && PyList_Append(chunks, line) < 0)
1805 goto error;
1806 Py_CLEAR(line);
1807 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1808 if (line == NULL)
1809 goto error;
1810 Py_DECREF(chunks);
1811 }
1812 if (line == NULL)
1813 line = PyUnicode_FromStringAndSize(NULL, 0);
1814
1815 return line;
1816
1817 error:
1818 Py_XDECREF(chunks);
1819 Py_XDECREF(remaining);
1820 Py_XDECREF(line);
1821 return NULL;
1822}
1823
1824static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001825textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826{
1827 Py_ssize_t limit = -1;
1828
1829 CHECK_INITIALIZED(self);
1830 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1831 return NULL;
1832 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001833 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834}
1835
1836/* Seek and Tell */
1837
1838typedef struct {
1839 Py_off_t start_pos;
1840 int dec_flags;
1841 int bytes_to_feed;
1842 int chars_to_skip;
1843 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001844} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845
1846/*
1847 To speed up cookie packing/unpacking, we store the fields in a temporary
1848 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1849 The following macros define at which offsets in the intermediary byte
1850 string the various CookieStruct fields will be stored.
1851 */
1852
1853#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1854
1855#if defined(WORDS_BIGENDIAN)
1856
1857# define IS_LITTLE_ENDIAN 0
1858
1859/* We want the least significant byte of start_pos to also be the least
1860 significant byte of the cookie, which means that in big-endian mode we
1861 must copy the fields in reverse order. */
1862
1863# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1864# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1865# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1866# define OFF_CHARS_TO_SKIP (sizeof(char))
1867# define OFF_NEED_EOF 0
1868
1869#else
1870
1871# define IS_LITTLE_ENDIAN 1
1872
1873/* Little-endian mode: the least significant byte of start_pos will
1874 naturally end up the least significant byte of the cookie. */
1875
1876# define OFF_START_POS 0
1877# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1878# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1879# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1880# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1881
1882#endif
1883
1884static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001885textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886{
1887 unsigned char buffer[COOKIE_BUF_LEN];
1888 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1889 if (cookieLong == NULL)
1890 return -1;
1891
1892 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1893 IS_LITTLE_ENDIAN, 0) < 0) {
1894 Py_DECREF(cookieLong);
1895 return -1;
1896 }
1897 Py_DECREF(cookieLong);
1898
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001899 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1900 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1901 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1902 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1903 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904
1905 return 0;
1906}
1907
1908static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001909textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910{
1911 unsigned char buffer[COOKIE_BUF_LEN];
1912
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001913 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1914 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1915 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1916 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1917 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918
1919 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1920}
1921#undef IS_LITTLE_ENDIAN
1922
1923static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001924_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925{
1926 PyObject *res;
1927 /* When seeking to the start of the stream, we call decoder.reset()
1928 rather than decoder.getstate().
1929 This is for a few decoders such as utf-16 for which the state value
1930 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1931 utf-16, that we are expecting a BOM).
1932 */
1933 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1934 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1935 else
1936 res = PyObject_CallMethod(self->decoder, "setstate",
1937 "((yi))", "", cookie->dec_flags);
1938 if (res == NULL)
1939 return -1;
1940 Py_DECREF(res);
1941 return 0;
1942}
1943
Antoine Pitroue4501852009-05-14 18:55:55 +00001944static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001945_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001946{
1947 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001948 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001949 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1950 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1951 self->encoding_start_of_stream = 1;
1952 }
1953 else {
1954 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1955 _PyIO_zero, NULL);
1956 self->encoding_start_of_stream = 0;
1957 }
1958 if (res == NULL)
1959 return -1;
1960 Py_DECREF(res);
1961 return 0;
1962}
1963
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001965textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001966{
1967 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001968 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001970 PyObject *res;
1971 int cmp;
1972
1973 CHECK_INITIALIZED(self);
1974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001975 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1976 return NULL;
1977 CHECK_CLOSED(self);
1978
1979 Py_INCREF(cookieObj);
1980
1981 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001982 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983 goto fail;
1984 }
1985
1986 if (whence == 1) {
1987 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001988 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (cmp < 0)
1990 goto fail;
1991
1992 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001993 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 goto fail;
1995 }
1996
1997 /* Seeking to the current position should attempt to
1998 * sync the underlying buffer with the current position.
1999 */
2000 Py_DECREF(cookieObj);
2001 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2002 if (cookieObj == NULL)
2003 goto fail;
2004 }
2005 else if (whence == 2) {
2006 /* seek relative to end of file */
2007
Antoine Pitroue4501852009-05-14 18:55:55 +00002008 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 if (cmp < 0)
2010 goto fail;
2011
2012 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002013 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 goto fail;
2015 }
2016
2017 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2018 if (res == NULL)
2019 goto fail;
2020 Py_DECREF(res);
2021
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002022 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 Py_CLEAR(self->snapshot);
2024 if (self->decoder) {
2025 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2026 if (res == NULL)
2027 goto fail;
2028 Py_DECREF(res);
2029 }
2030
2031 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2032 Py_XDECREF(cookieObj);
2033 return res;
2034 }
2035 else if (whence != 0) {
2036 PyErr_Format(PyExc_ValueError,
2037 "invalid whence (%d, should be 0, 1 or 2)", whence);
2038 goto fail;
2039 }
2040
Antoine Pitroue4501852009-05-14 18:55:55 +00002041 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 if (cmp < 0)
2043 goto fail;
2044
2045 if (cmp == 1) {
2046 PyErr_Format(PyExc_ValueError,
2047 "negative seek position %R", cookieObj);
2048 goto fail;
2049 }
2050
2051 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2052 if (res == NULL)
2053 goto fail;
2054 Py_DECREF(res);
2055
2056 /* The strategy of seek() is to go back to the safe start point
2057 * and replay the effect of read(chars_to_skip) from there.
2058 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002059 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060 goto fail;
2061
2062 /* Seek back to the safe start point. */
2063 posobj = PyLong_FromOff_t(cookie.start_pos);
2064 if (posobj == NULL)
2065 goto fail;
2066 res = PyObject_CallMethodObjArgs(self->buffer,
2067 _PyIO_str_seek, posobj, NULL);
2068 Py_DECREF(posobj);
2069 if (res == NULL)
2070 goto fail;
2071 Py_DECREF(res);
2072
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002073 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 Py_CLEAR(self->snapshot);
2075
2076 /* Restore the decoder to its state from the safe start point. */
2077 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002078 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 goto fail;
2080 }
2081
2082 if (cookie.chars_to_skip) {
2083 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2084 PyObject *input_chunk = PyObject_CallMethod(
2085 self->buffer, "read", "i", cookie.bytes_to_feed);
2086 PyObject *decoded;
2087
2088 if (input_chunk == NULL)
2089 goto fail;
2090
2091 assert (PyBytes_Check(input_chunk));
2092
2093 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2094 if (self->snapshot == NULL) {
2095 Py_DECREF(input_chunk);
2096 goto fail;
2097 }
2098
2099 decoded = PyObject_CallMethod(self->decoder, "decode",
2100 "Oi", input_chunk, (int)cookie.need_eof);
2101
2102 if (decoded == NULL)
2103 goto fail;
2104
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002105 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106
2107 /* Skip chars_to_skip of the decoded characters. */
2108 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2109 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2110 goto fail;
2111 }
2112 self->decoded_chars_used = cookie.chars_to_skip;
2113 }
2114 else {
2115 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2116 if (self->snapshot == NULL)
2117 goto fail;
2118 }
2119
Antoine Pitroue4501852009-05-14 18:55:55 +00002120 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2121 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002122 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002123 goto fail;
2124 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125 return cookieObj;
2126 fail:
2127 Py_XDECREF(cookieObj);
2128 return NULL;
2129
2130}
2131
2132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002133textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002134{
2135 PyObject *res;
2136 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002137 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138 PyObject *next_input;
2139 Py_ssize_t chars_to_skip, chars_decoded;
2140 PyObject *saved_state = NULL;
2141 char *input, *input_end;
2142
2143 CHECK_INITIALIZED(self);
2144 CHECK_CLOSED(self);
2145
2146 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002147 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148 goto fail;
2149 }
2150 if (!self->telling) {
2151 PyErr_SetString(PyExc_IOError,
2152 "telling position disabled by next() call");
2153 goto fail;
2154 }
2155
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002156 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 return NULL;
2158 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2159 if (res == NULL)
2160 goto fail;
2161 Py_DECREF(res);
2162
2163 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2164 if (posobj == NULL)
2165 goto fail;
2166
2167 if (self->decoder == NULL || self->snapshot == NULL) {
2168 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2169 return posobj;
2170 }
2171
2172#if defined(HAVE_LARGEFILE_SUPPORT)
2173 cookie.start_pos = PyLong_AsLongLong(posobj);
2174#else
2175 cookie.start_pos = PyLong_AsLong(posobj);
2176#endif
2177 if (PyErr_Occurred())
2178 goto fail;
2179
2180 /* Skip backward to the snapshot point (see _read_chunk). */
2181 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2182 goto fail;
2183
2184 assert (PyBytes_Check(next_input));
2185
2186 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2187
2188 /* How many decoded characters have been used up since the snapshot? */
2189 if (self->decoded_chars_used == 0) {
2190 /* We haven't moved from the snapshot point. */
2191 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002192 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 }
2194
2195 chars_to_skip = self->decoded_chars_used;
2196
2197 /* Starting from the snapshot position, we will walk the decoder
2198 * forward until it gives us enough decoded characters.
2199 */
2200 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2201 _PyIO_str_getstate, NULL);
2202 if (saved_state == NULL)
2203 goto fail;
2204
2205 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002206 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002207 goto fail;
2208
2209 /* Feed the decoder one byte at a time. As we go, note the
2210 * nearest "safe start point" before the current location
2211 * (a point where the decoder has nothing buffered, so seek()
2212 * can safely start from there and advance to this location).
2213 */
2214 chars_decoded = 0;
2215 input = PyBytes_AS_STRING(next_input);
2216 input_end = input + PyBytes_GET_SIZE(next_input);
2217 while (input < input_end) {
2218 PyObject *state;
2219 char *dec_buffer;
2220 Py_ssize_t dec_buffer_len;
2221 int dec_flags;
2222
2223 PyObject *decoded = PyObject_CallMethod(
2224 self->decoder, "decode", "y#", input, 1);
2225 if (decoded == NULL)
2226 goto fail;
2227 assert (PyUnicode_Check(decoded));
2228 chars_decoded += PyUnicode_GET_SIZE(decoded);
2229 Py_DECREF(decoded);
2230
2231 cookie.bytes_to_feed += 1;
2232
2233 state = PyObject_CallMethodObjArgs(self->decoder,
2234 _PyIO_str_getstate, NULL);
2235 if (state == NULL)
2236 goto fail;
2237 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2238 Py_DECREF(state);
2239 goto fail;
2240 }
2241 Py_DECREF(state);
2242
2243 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2244 /* Decoder buffer is empty, so this is a safe start point. */
2245 cookie.start_pos += cookie.bytes_to_feed;
2246 chars_to_skip -= chars_decoded;
2247 cookie.dec_flags = dec_flags;
2248 cookie.bytes_to_feed = 0;
2249 chars_decoded = 0;
2250 }
2251 if (chars_decoded >= chars_to_skip)
2252 break;
2253 input++;
2254 }
2255 if (input == input_end) {
2256 /* We didn't get enough decoded data; signal EOF to get more. */
2257 PyObject *decoded = PyObject_CallMethod(
2258 self->decoder, "decode", "yi", "", /* final = */ 1);
2259 if (decoded == NULL)
2260 goto fail;
2261 assert (PyUnicode_Check(decoded));
2262 chars_decoded += PyUnicode_GET_SIZE(decoded);
2263 Py_DECREF(decoded);
2264 cookie.need_eof = 1;
2265
2266 if (chars_decoded < chars_to_skip) {
2267 PyErr_SetString(PyExc_IOError,
2268 "can't reconstruct logical file position");
2269 goto fail;
2270 }
2271 }
2272
2273 /* finally */
2274 Py_XDECREF(posobj);
2275 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2276 Py_DECREF(saved_state);
2277 if (res == NULL)
2278 return NULL;
2279 Py_DECREF(res);
2280
2281 /* The returned cookie corresponds to the last safe start point. */
2282 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002283 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284
2285 fail:
2286 Py_XDECREF(posobj);
2287 if (saved_state) {
2288 PyObject *type, *value, *traceback;
2289 PyErr_Fetch(&type, &value, &traceback);
2290
2291 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2292 Py_DECREF(saved_state);
2293 if (res == NULL)
2294 return NULL;
2295 Py_DECREF(res);
2296
2297 PyErr_Restore(type, value, traceback);
2298 }
2299 return NULL;
2300}
2301
2302static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002303textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002304{
2305 PyObject *pos = Py_None;
2306 PyObject *res;
2307
2308 CHECK_INITIALIZED(self)
2309 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2310 return NULL;
2311 }
2312
2313 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2314 if (res == NULL)
2315 return NULL;
2316 Py_DECREF(res);
2317
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002318 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319}
2320
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002321static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002322textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002323{
Antoine Pitrou716c4442009-05-23 19:04:03 +00002324 PyObject *nameobj, *res;
2325
2326 CHECK_INITIALIZED(self);
2327
2328 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2329 if (nameobj == NULL) {
2330 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2331 PyErr_Clear();
2332 else
2333 return NULL;
2334 res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>",
2335 self->encoding);
2336 }
2337 else {
2338 res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>",
2339 nameobj, self->encoding);
2340 Py_DECREF(nameobj);
2341 }
2342 return res;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002343}
2344
2345
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002346/* Inquiries */
2347
2348static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002349textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350{
2351 CHECK_INITIALIZED(self);
2352 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2353}
2354
2355static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002356textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357{
2358 CHECK_INITIALIZED(self);
2359 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2360}
2361
2362static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002363textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364{
2365 CHECK_INITIALIZED(self);
2366 return PyObject_CallMethod(self->buffer, "readable", NULL);
2367}
2368
2369static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002370textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371{
2372 CHECK_INITIALIZED(self);
2373 return PyObject_CallMethod(self->buffer, "writable", NULL);
2374}
2375
2376static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002377textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378{
2379 CHECK_INITIALIZED(self);
2380 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2381}
2382
2383static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002384textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385{
2386 CHECK_INITIALIZED(self);
2387 CHECK_CLOSED(self);
2388 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002389 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 return NULL;
2391 return PyObject_CallMethod(self->buffer, "flush", NULL);
2392}
2393
2394static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002395textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396{
2397 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002398 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400
Antoine Pitrou6be88762010-05-03 16:48:20 +00002401 res = textiowrapper_closed_get(self, NULL);
2402 if (res == NULL)
2403 return NULL;
2404 r = PyObject_IsTrue(res);
2405 Py_DECREF(res);
2406 if (r < 0)
2407 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002408
Antoine Pitrou6be88762010-05-03 16:48:20 +00002409 if (r > 0) {
2410 Py_RETURN_NONE; /* stream already closed */
2411 }
2412 else {
2413 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2414 if (res == NULL) {
2415 return NULL;
2416 }
2417 else
2418 Py_DECREF(res);
2419
2420 return PyObject_CallMethod(self->buffer, "close", NULL);
2421 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422}
2423
2424static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002425textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002426{
2427 PyObject *line;
2428
2429 CHECK_INITIALIZED(self);
2430
2431 self->telling = 0;
2432 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2433 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002434 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002435 }
2436 else {
2437 line = PyObject_CallMethodObjArgs((PyObject *)self,
2438 _PyIO_str_readline, NULL);
2439 if (line && !PyUnicode_Check(line)) {
2440 PyErr_Format(PyExc_IOError,
2441 "readline() should have returned an str object, "
2442 "not '%.200s'", Py_TYPE(line)->tp_name);
2443 Py_DECREF(line);
2444 return NULL;
2445 }
2446 }
2447
2448 if (line == NULL)
2449 return NULL;
2450
2451 if (PyUnicode_GET_SIZE(line) == 0) {
2452 /* Reached EOF or would have blocked */
2453 Py_DECREF(line);
2454 Py_CLEAR(self->snapshot);
2455 self->telling = self->seekable;
2456 return NULL;
2457 }
2458
2459 return line;
2460}
2461
2462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002463textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464{
2465 CHECK_INITIALIZED(self);
2466 return PyObject_GetAttrString(self->buffer, "name");
2467}
2468
2469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471{
2472 CHECK_INITIALIZED(self);
2473 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2474}
2475
2476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002477textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478{
2479 PyObject *res;
2480 CHECK_INITIALIZED(self);
2481 if (self->decoder == NULL)
2482 Py_RETURN_NONE;
2483 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2484 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002485 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2486 PyErr_Clear();
2487 Py_RETURN_NONE;
2488 }
2489 else {
2490 return NULL;
2491 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492 }
2493 return res;
2494}
2495
2496static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002497textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002498{
2499 CHECK_INITIALIZED(self);
2500 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2501}
2502
2503static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002504textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505{
2506 CHECK_INITIALIZED(self);
2507 return PyLong_FromSsize_t(self->chunk_size);
2508}
2509
2510static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002511textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512{
2513 Py_ssize_t n;
2514 CHECK_INITIALIZED_INT(self);
2515 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2516 if (n == -1 && PyErr_Occurred())
2517 return -1;
2518 if (n <= 0) {
2519 PyErr_SetString(PyExc_ValueError,
2520 "a strictly positive integer is required");
2521 return -1;
2522 }
2523 self->chunk_size = n;
2524 return 0;
2525}
2526
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002527static PyMethodDef textiowrapper_methods[] = {
2528 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2529 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2530 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2531 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2532 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2533 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002535 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2536 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2537 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2538 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2539 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002541 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2542 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2543 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544 {NULL, NULL}
2545};
2546
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002547static PyMemberDef textiowrapper_members[] = {
2548 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2549 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2550 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 {NULL}
2552};
2553
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002554static PyGetSetDef textiowrapper_getset[] = {
2555 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2556 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2558*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002559 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2560 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2561 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2562 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002563 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564};
2565
2566PyTypeObject PyTextIOWrapper_Type = {
2567 PyVarObject_HEAD_INIT(NULL, 0)
2568 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002569 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002571 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 0, /*tp_print*/
2573 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002574 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002576 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002577 0, /*tp_as_number*/
2578 0, /*tp_as_sequence*/
2579 0, /*tp_as_mapping*/
2580 0, /*tp_hash */
2581 0, /*tp_call*/
2582 0, /*tp_str*/
2583 0, /*tp_getattro*/
2584 0, /*tp_setattro*/
2585 0, /*tp_as_buffer*/
2586 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2587 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002588 textiowrapper_doc, /* tp_doc */
2589 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2590 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002594 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2595 textiowrapper_methods, /* tp_methods */
2596 textiowrapper_members, /* tp_members */
2597 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002598 0, /* tp_base */
2599 0, /* tp_dict */
2600 0, /* tp_descr_get */
2601 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002602 offsetof(textio, dict), /*tp_dictoffset*/
2603 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002604 0, /* tp_alloc */
2605 PyType_GenericNew, /* tp_new */
2606};