blob: c9913d4a9eb9765b2df316f9d7857ff17a142aaa [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116 Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou2a466582009-09-21 21:17:48 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Antoine Pitrou19690592009-06-12 20:14:08 +0000196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336 }
337
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
463incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
565};
566
567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
616PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
Antoine Pitrou76370f42012-08-04 00:55:38 +0200625 "newline controls how line endings are handled. It can be None, '',\n"
626 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
627 "\n"
628 "* On input, if newline is None, universal newlines mode is\n"
629 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
630 " these are translated into '\\n' before being returned to the\n"
631 " caller. If it is '', universal newline mode is enabled, but line\n"
632 " endings are returned to the caller untranslated. If it has any of\n"
633 " the other legal values, input lines are only terminated by the given\n"
634 " string, and the line ending is returned to the caller untranslated.\n"
635 "\n"
636 "* On output, if newline is None, any '\\n' characters written are\n"
637 " translated to the system default line separator, os.linesep. If\n"
638 " newline is '', no translation takes place. If newline is any of the\n"
639 " other legal values, any '\\n' characters written are translated to\n"
640 " the given string.\n"
Antoine Pitrou19690592009-06-12 20:14:08 +0000641 "\n"
642 "If line_buffering is True, a call to flush is implied when a call to\n"
643 "write contains a newline character."
644 );
645
646typedef PyObject *
647 (*encodefunc_t)(PyObject *, PyObject *);
648
649typedef struct
650{
651 PyObject_HEAD
652 int ok; /* initialized? */
653 int detached;
654 Py_ssize_t chunk_size;
655 PyObject *buffer;
656 PyObject *encoding;
657 PyObject *encoder;
658 PyObject *decoder;
659 PyObject *readnl;
660 PyObject *errors;
661 const char *writenl; /* utf-8 encoded, NULL stands for \n */
662 char line_buffering;
663 char readuniversal;
664 char readtranslate;
665 char writetranslate;
666 char seekable;
667 char telling;
668 /* Specialized encoding func (see below) */
669 encodefunc_t encodefunc;
670 /* Whether or not it's the start of the stream */
671 char encoding_start_of_stream;
672
673 /* Reads and writes are internally buffered in order to speed things up.
674 However, any read will first flush the write buffer if itsn't empty.
675
676 Please also note that text to be written is first encoded before being
677 buffered. This is necessary so that encoding errors are immediately
678 reported to the caller, but it unfortunately means that the
679 IncrementalEncoder (whose encode() method is always written in Python)
680 becomes a bottleneck for small writes.
681 */
682 PyObject *decoded_chars; /* buffer for text returned from decoder */
683 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
684 PyObject *pending_bytes; /* list of bytes objects waiting to be
685 written, or NULL */
686 Py_ssize_t pending_bytes_count;
687 PyObject *snapshot;
688 /* snapshot is either None, or a tuple (dec_flags, next_input) where
689 * dec_flags is the second (integer) item of the decoder state and
690 * next_input is the chunk of input bytes that comes next after the
691 * snapshot point. We use this to reconstruct decoder states in tell().
692 */
693
694 /* Cache raw object if it's a FileIO object */
695 PyObject *raw;
696
697 PyObject *weakreflist;
698 PyObject *dict;
699} textio;
700
701
702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
706ascii_encode(textio *self, PyObject *text)
707{
708 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
709 PyUnicode_GET_SIZE(text),
710 PyBytes_AS_STRING(self->errors));
711}
712
713static PyObject *
714utf16be_encode(textio *self, PyObject *text)
715{
716 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
717 PyUnicode_GET_SIZE(text),
718 PyBytes_AS_STRING(self->errors), 1);
719}
720
721static PyObject *
722utf16le_encode(textio *self, PyObject *text)
723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), -1);
727}
728
729static PyObject *
730utf16_encode(textio *self, PyObject *text)
731{
732 if (!self->encoding_start_of_stream) {
733 /* Skip the BOM and use native byte ordering */
734#if defined(WORDS_BIGENDIAN)
735 return utf16be_encode(self, text);
736#else
737 return utf16le_encode(self, text);
738#endif
739 }
740 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
741 PyUnicode_GET_SIZE(text),
742 PyBytes_AS_STRING(self->errors), 0);
743}
744
745static PyObject *
746utf32be_encode(textio *self, PyObject *text)
747{
748 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 1);
751}
752
753static PyObject *
754utf32le_encode(textio *self, PyObject *text)
755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), -1);
759}
760
761static PyObject *
762utf32_encode(textio *self, PyObject *text)
763{
764 if (!self->encoding_start_of_stream) {
765 /* Skip the BOM and use native byte ordering */
766#if defined(WORDS_BIGENDIAN)
767 return utf32be_encode(self, text);
768#else
769 return utf32le_encode(self, text);
770#endif
771 }
772 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
773 PyUnicode_GET_SIZE(text),
774 PyBytes_AS_STRING(self->errors), 0);
775}
776
777static PyObject *
778utf8_encode(textio *self, PyObject *text)
779{
780 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors));
783}
784
785static PyObject *
786latin1_encode(textio *self, PyObject *text)
787{
788 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
789 PyUnicode_GET_SIZE(text),
790 PyBytes_AS_STRING(self->errors));
791}
792
793/* Map normalized encoding names onto the specialized encoding funcs */
794
795typedef struct {
796 const char *name;
797 encodefunc_t encodefunc;
798} encodefuncentry;
799
800static encodefuncentry encodefuncs[] = {
801 {"ascii", (encodefunc_t) ascii_encode},
802 {"iso8859-1", (encodefunc_t) latin1_encode},
803 {"utf-8", (encodefunc_t) utf8_encode},
804 {"utf-16-be", (encodefunc_t) utf16be_encode},
805 {"utf-16-le", (encodefunc_t) utf16le_encode},
806 {"utf-16", (encodefunc_t) utf16_encode},
807 {"utf-32-be", (encodefunc_t) utf32be_encode},
808 {"utf-32-le", (encodefunc_t) utf32le_encode},
809 {"utf-32", (encodefunc_t) utf32_encode},
810 {NULL, NULL}
811};
812
813
814static int
815textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
816{
817 char *kwlist[] = {"buffer", "encoding", "errors",
818 "newline", "line_buffering",
819 NULL};
820 PyObject *buffer, *raw;
821 char *encoding = NULL;
822 char *errors = NULL;
823 char *newline = NULL;
824 int line_buffering = 0;
825
826 PyObject *res;
827 int r;
828
829 self->ok = 0;
830 self->detached = 0;
831 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
832 kwlist, &buffer, &encoding, &errors,
833 &newline, &line_buffering))
834 return -1;
835
836 if (newline && newline[0] != '\0'
837 && !(newline[0] == '\n' && newline[1] == '\0')
838 && !(newline[0] == '\r' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
840 PyErr_Format(PyExc_ValueError,
841 "illegal newline value: %s", newline);
842 return -1;
843 }
844
845 Py_CLEAR(self->buffer);
846 Py_CLEAR(self->encoding);
847 Py_CLEAR(self->encoder);
848 Py_CLEAR(self->decoder);
849 Py_CLEAR(self->readnl);
850 Py_CLEAR(self->decoded_chars);
851 Py_CLEAR(self->pending_bytes);
852 Py_CLEAR(self->snapshot);
853 Py_CLEAR(self->errors);
854 Py_CLEAR(self->raw);
855 self->decoded_chars_used = 0;
856 self->pending_bytes_count = 0;
857 self->encodefunc = NULL;
858 self->writenl = NULL;
859
860 if (encoding == NULL && self->encoding == NULL) {
861 if (_PyIO_locale_module == NULL) {
862 _PyIO_locale_module = PyImport_ImportModule("locale");
863 if (_PyIO_locale_module == NULL)
864 goto catch_ImportError;
865 else
866 goto use_locale;
867 }
868 else {
869 use_locale:
870 self->encoding = PyObject_CallMethod(
871 _PyIO_locale_module, "getpreferredencoding", NULL);
872 if (self->encoding == NULL) {
873 catch_ImportError:
874 /*
875 Importing locale can raise a ImportError because of
876 _functools, and locale.getpreferredencoding can raise a
877 ImportError if _locale is not available. These will happen
878 during module building.
879 */
880 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
881 PyErr_Clear();
882 self->encoding = PyString_FromString("ascii");
883 }
884 else
885 goto error;
886 }
887 else if (!PyString_Check(self->encoding))
888 Py_CLEAR(self->encoding);
889 }
890 }
891 if (self->encoding != NULL)
892 encoding = PyString_AsString(self->encoding);
893 else if (encoding != NULL) {
894 self->encoding = PyString_FromString(encoding);
895 if (self->encoding == NULL)
896 goto error;
897 }
898 else {
899 PyErr_SetString(PyExc_IOError,
900 "could not determine default encoding");
901 }
902
903 if (errors == NULL)
904 errors = "strict";
905 self->errors = PyBytes_FromString(errors);
906 if (self->errors == NULL)
907 goto error;
908
909 self->chunk_size = 8192;
910 self->readuniversal = (newline == NULL || newline[0] == '\0');
911 self->line_buffering = line_buffering;
912 self->readtranslate = (newline == NULL);
913 if (newline) {
914 self->readnl = PyString_FromString(newline);
915 if (self->readnl == NULL)
916 return -1;
917 }
918 self->writetranslate = (newline == NULL || newline[0] != '\0');
919 if (!self->readuniversal && self->writetranslate) {
920 self->writenl = PyString_AsString(self->readnl);
921 if (!strcmp(self->writenl, "\n"))
922 self->writenl = NULL;
923 }
924#ifdef MS_WINDOWS
925 else
926 self->writenl = "\r\n";
927#endif
928
929 /* Build the decoder object */
930 res = PyObject_CallMethod(buffer, "readable", NULL);
931 if (res == NULL)
932 goto error;
933 r = PyObject_IsTrue(res);
934 Py_DECREF(res);
935 if (r == -1)
936 goto error;
937 if (r == 1) {
938 self->decoder = PyCodec_IncrementalDecoder(
939 encoding, errors);
940 if (self->decoder == NULL)
941 goto error;
942
943 if (self->readuniversal) {
944 PyObject *incrementalDecoder = PyObject_CallFunction(
945 (PyObject *)&PyIncrementalNewlineDecoder_Type,
946 "Oi", self->decoder, (int)self->readtranslate);
947 if (incrementalDecoder == NULL)
948 goto error;
949 Py_CLEAR(self->decoder);
950 self->decoder = incrementalDecoder;
951 }
952 }
953
954 /* Build the encoder object */
955 res = PyObject_CallMethod(buffer, "writable", NULL);
956 if (res == NULL)
957 goto error;
958 r = PyObject_IsTrue(res);
959 Py_DECREF(res);
960 if (r == -1)
961 goto error;
962 if (r == 1) {
963 PyObject *ci;
964 self->encoder = PyCodec_IncrementalEncoder(
965 encoding, errors);
966 if (self->encoder == NULL)
967 goto error;
968 /* Get the normalized named of the codec */
969 ci = _PyCodec_Lookup(encoding);
970 if (ci == NULL)
971 goto error;
972 res = PyObject_GetAttrString(ci, "name");
973 Py_DECREF(ci);
974 if (res == NULL) {
975 if (PyErr_ExceptionMatches(PyExc_AttributeError))
976 PyErr_Clear();
977 else
978 goto error;
979 }
980 else if (PyString_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!strcmp(PyString_AS_STRING(res), e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
995
996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL) {
1002 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1003 PyErr_Clear();
1004 else
1005 goto error;
1006 }
1007 else if (Py_TYPE(raw) == &PyFileIO_Type)
1008 self->raw = raw;
1009 else
1010 Py_DECREF(raw);
1011 }
1012
1013 res = PyObject_CallMethod(buffer, "seekable", NULL);
1014 if (res == NULL)
1015 goto error;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001016 r = PyObject_IsTrue(res);
Antoine Pitrou19690592009-06-12 20:14:08 +00001017 Py_DECREF(res);
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001018 if (r < 0)
1019 goto error;
1020 self->seekable = self->telling = r;
Antoine Pitrou19690592009-06-12 20:14:08 +00001021
1022 self->encoding_start_of_stream = 0;
1023 if (self->seekable && self->encoder) {
1024 PyObject *cookieObj;
1025 int cmp;
1026
1027 self->encoding_start_of_stream = 1;
1028
1029 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1030 if (cookieObj == NULL)
1031 goto error;
1032
1033 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1034 Py_DECREF(cookieObj);
1035 if (cmp < 0) {
1036 goto error;
1037 }
1038
1039 if (cmp == 0) {
1040 self->encoding_start_of_stream = 0;
1041 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1042 _PyIO_zero, NULL);
1043 if (res == NULL)
1044 goto error;
1045 Py_DECREF(res);
1046 }
1047 }
1048
1049 self->ok = 1;
1050 return 0;
1051
1052 error:
1053 return -1;
1054}
1055
1056static int
1057_textiowrapper_clear(textio *self)
1058{
1059 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1060 return -1;
1061 self->ok = 0;
1062 Py_CLEAR(self->buffer);
1063 Py_CLEAR(self->encoding);
1064 Py_CLEAR(self->encoder);
1065 Py_CLEAR(self->decoder);
1066 Py_CLEAR(self->readnl);
1067 Py_CLEAR(self->decoded_chars);
1068 Py_CLEAR(self->pending_bytes);
1069 Py_CLEAR(self->snapshot);
1070 Py_CLEAR(self->errors);
1071 Py_CLEAR(self->raw);
1072 return 0;
1073}
1074
1075static void
1076textiowrapper_dealloc(textio *self)
1077{
1078 if (_textiowrapper_clear(self) < 0)
1079 return;
1080 _PyObject_GC_UNTRACK(self);
1081 if (self->weakreflist != NULL)
1082 PyObject_ClearWeakRefs((PyObject *)self);
1083 Py_CLEAR(self->dict);
1084 Py_TYPE(self)->tp_free((PyObject *)self);
1085}
1086
1087static int
1088textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1089{
1090 Py_VISIT(self->buffer);
1091 Py_VISIT(self->encoding);
1092 Py_VISIT(self->encoder);
1093 Py_VISIT(self->decoder);
1094 Py_VISIT(self->readnl);
1095 Py_VISIT(self->decoded_chars);
1096 Py_VISIT(self->pending_bytes);
1097 Py_VISIT(self->snapshot);
1098 Py_VISIT(self->errors);
1099 Py_VISIT(self->raw);
1100
1101 Py_VISIT(self->dict);
1102 return 0;
1103}
1104
1105static int
1106textiowrapper_clear(textio *self)
1107{
1108 if (_textiowrapper_clear(self) < 0)
1109 return -1;
1110 Py_CLEAR(self->dict);
1111 return 0;
1112}
1113
1114static PyObject *
1115textiowrapper_closed_get(textio *self, void *context);
1116
1117/* This macro takes some shortcuts to make the common case faster. */
1118#define CHECK_CLOSED(self) \
1119 do { \
1120 int r; \
1121 PyObject *_res; \
1122 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1123 if (self->raw != NULL) \
1124 r = _PyFileIO_closed(self->raw); \
1125 else { \
1126 _res = textiowrapper_closed_get(self, NULL); \
1127 if (_res == NULL) \
1128 return NULL; \
1129 r = PyObject_IsTrue(_res); \
1130 Py_DECREF(_res); \
1131 if (r < 0) \
1132 return NULL; \
1133 } \
1134 if (r > 0) { \
1135 PyErr_SetString(PyExc_ValueError, \
1136 "I/O operation on closed file."); \
1137 return NULL; \
1138 } \
1139 } \
1140 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1141 return NULL; \
1142 } while (0)
1143
1144#define CHECK_INITIALIZED(self) \
1145 if (self->ok <= 0) { \
1146 if (self->detached) { \
1147 PyErr_SetString(PyExc_ValueError, \
1148 "underlying buffer has been detached"); \
1149 } else { \
1150 PyErr_SetString(PyExc_ValueError, \
1151 "I/O operation on uninitialized object"); \
1152 } \
1153 return NULL; \
1154 }
1155
1156#define CHECK_INITIALIZED_INT(self) \
1157 if (self->ok <= 0) { \
1158 if (self->detached) { \
1159 PyErr_SetString(PyExc_ValueError, \
1160 "underlying buffer has been detached"); \
1161 } else { \
1162 PyErr_SetString(PyExc_ValueError, \
1163 "I/O operation on uninitialized object"); \
1164 } \
1165 return -1; \
1166 }
1167
1168
1169static PyObject *
1170textiowrapper_detach(textio *self)
1171{
1172 PyObject *buffer, *res;
1173 CHECK_INITIALIZED(self);
1174 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1175 if (res == NULL)
1176 return NULL;
1177 Py_DECREF(res);
1178 buffer = self->buffer;
1179 self->buffer = NULL;
1180 self->detached = 1;
1181 self->ok = 0;
1182 return buffer;
1183}
1184
1185Py_LOCAL_INLINE(const Py_UNICODE *)
1186findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1187{
1188 /* like wcschr, but doesn't stop at NULL characters */
1189 while (size-- > 0) {
1190 if (*s == ch)
1191 return s;
1192 s++;
1193 }
1194 return NULL;
1195}
1196
1197/* Flush the internal write buffer. This doesn't explicitly flush the
1198 underlying buffered object, though. */
1199static int
1200_textiowrapper_writeflush(textio *self)
1201{
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001202 PyObject *pending, *b, *ret;
Antoine Pitrou19690592009-06-12 20:14:08 +00001203
1204 if (self->pending_bytes == NULL)
1205 return 0;
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001206
1207 pending = self->pending_bytes;
1208 Py_INCREF(pending);
1209 self->pending_bytes_count = 0;
1210 Py_CLEAR(self->pending_bytes);
1211
1212 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1213 Py_DECREF(pending);
Antoine Pitrou19690592009-06-12 20:14:08 +00001214 if (b == NULL)
1215 return -1;
Gregory P. Smitha998ad02013-02-01 13:02:59 -08001216 ret = NULL;
1217 do {
1218 ret = PyObject_CallMethodObjArgs(self->buffer,
1219 _PyIO_str_write, b, NULL);
1220 } while (ret == NULL && _PyIO_trap_eintr());
Antoine Pitrou19690592009-06-12 20:14:08 +00001221 Py_DECREF(b);
1222 if (ret == NULL)
1223 return -1;
1224 Py_DECREF(ret);
Antoine Pitrou19690592009-06-12 20:14:08 +00001225 return 0;
1226}
1227
1228static PyObject *
1229textiowrapper_write(textio *self, PyObject *args)
1230{
1231 PyObject *ret;
1232 PyObject *text; /* owned reference */
1233 PyObject *b;
1234 Py_ssize_t textlen;
1235 int haslf = 0;
1236 int needflush = 0;
1237
1238 CHECK_INITIALIZED(self);
1239
1240 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1241 return NULL;
1242 }
1243
1244 CHECK_CLOSED(self);
1245
1246 if (self->encoder == NULL) {
1247 PyErr_SetString(PyExc_IOError, "not writable");
1248 return NULL;
1249 }
1250
1251 Py_INCREF(text);
1252
1253 textlen = PyUnicode_GetSize(text);
1254
1255 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1256 if (findchar(PyUnicode_AS_UNICODE(text),
1257 PyUnicode_GET_SIZE(text), '\n'))
1258 haslf = 1;
1259
1260 if (haslf && self->writetranslate && self->writenl != NULL) {
1261 PyObject *newtext = PyObject_CallMethod(
1262 text, "replace", "ss", "\n", self->writenl);
1263 Py_DECREF(text);
1264 if (newtext == NULL)
1265 return NULL;
1266 text = newtext;
1267 }
1268
1269 if (self->line_buffering &&
1270 (haslf ||
1271 findchar(PyUnicode_AS_UNICODE(text),
1272 PyUnicode_GET_SIZE(text), '\r')))
1273 needflush = 1;
1274
1275 /* XXX What if we were just reading? */
1276 if (self->encodefunc != NULL) {
1277 b = (*self->encodefunc)((PyObject *) self, text);
1278 self->encoding_start_of_stream = 0;
1279 }
1280 else
1281 b = PyObject_CallMethodObjArgs(self->encoder,
1282 _PyIO_str_encode, text, NULL);
1283 Py_DECREF(text);
1284 if (b == NULL)
1285 return NULL;
1286
1287 if (self->pending_bytes == NULL) {
1288 self->pending_bytes = PyList_New(0);
1289 if (self->pending_bytes == NULL) {
1290 Py_DECREF(b);
1291 return NULL;
1292 }
1293 self->pending_bytes_count = 0;
1294 }
1295 if (PyList_Append(self->pending_bytes, b) < 0) {
1296 Py_DECREF(b);
1297 return NULL;
1298 }
1299 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1300 Py_DECREF(b);
1301 if (self->pending_bytes_count > self->chunk_size || needflush) {
1302 if (_textiowrapper_writeflush(self) < 0)
1303 return NULL;
1304 }
1305
1306 if (needflush) {
1307 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1308 if (ret == NULL)
1309 return NULL;
1310 Py_DECREF(ret);
1311 }
1312
1313 Py_CLEAR(self->snapshot);
1314
1315 if (self->decoder) {
1316 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1317 if (ret == NULL)
1318 return NULL;
1319 Py_DECREF(ret);
1320 }
1321
1322 return PyLong_FromSsize_t(textlen);
1323}
1324
1325/* Steal a reference to chars and store it in the decoded_char buffer;
1326 */
1327static void
1328textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1329{
1330 Py_CLEAR(self->decoded_chars);
1331 self->decoded_chars = chars;
1332 self->decoded_chars_used = 0;
1333}
1334
1335static PyObject *
1336textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1337{
1338 PyObject *chars;
1339 Py_ssize_t avail;
1340
1341 if (self->decoded_chars == NULL)
1342 return PyUnicode_FromStringAndSize(NULL, 0);
1343
1344 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1345 - self->decoded_chars_used);
1346
1347 assert(avail >= 0);
1348
1349 if (n < 0 || n > avail)
1350 n = avail;
1351
1352 if (self->decoded_chars_used > 0 || n < avail) {
1353 chars = PyUnicode_FromUnicode(
1354 PyUnicode_AS_UNICODE(self->decoded_chars)
1355 + self->decoded_chars_used, n);
1356 if (chars == NULL)
1357 return NULL;
1358 }
1359 else {
1360 chars = self->decoded_chars;
1361 Py_INCREF(chars);
1362 }
1363
1364 self->decoded_chars_used += n;
1365 return chars;
1366}
1367
1368/* Read and decode the next chunk of data from the BufferedReader.
1369 */
1370static int
1371textiowrapper_read_chunk(textio *self)
1372{
1373 PyObject *dec_buffer = NULL;
1374 PyObject *dec_flags = NULL;
1375 PyObject *input_chunk = NULL;
1376 PyObject *decoded_chars, *chunk_size;
1377 int eof;
1378
1379 /* The return value is True unless EOF was reached. The decoded string is
1380 * placed in self._decoded_chars (replacing its previous value). The
1381 * entire input chunk is sent to the decoder, though some of it may remain
1382 * buffered in the decoder, yet to be converted.
1383 */
1384
1385 if (self->decoder == NULL) {
1386 PyErr_SetString(PyExc_IOError, "not readable");
1387 return -1;
1388 }
1389
1390 if (self->telling) {
1391 /* To prepare for tell(), we need to snapshot a point in the file
1392 * where the decoder's input buffer is empty.
1393 */
1394
1395 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1396 _PyIO_str_getstate, NULL);
1397 if (state == NULL)
1398 return -1;
1399 /* Given this, we know there was a valid snapshot point
1400 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1401 */
1402 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1403 Py_DECREF(state);
1404 return -1;
1405 }
1406 Py_INCREF(dec_buffer);
1407 Py_INCREF(dec_flags);
1408 Py_DECREF(state);
1409 }
1410
1411 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1412 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1413 if (chunk_size == NULL)
1414 goto fail;
1415 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1416 _PyIO_str_read1, chunk_size, NULL);
1417 Py_DECREF(chunk_size);
1418 if (input_chunk == NULL)
1419 goto fail;
1420 assert(PyBytes_Check(input_chunk));
1421
1422 eof = (PyBytes_Size(input_chunk) == 0);
1423
1424 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1425 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1426 self->decoder, input_chunk, eof);
1427 }
1428 else {
1429 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1430 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1431 }
1432
1433 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1434 if (decoded_chars == NULL)
1435 goto fail;
1436 textiowrapper_set_decoded_chars(self, decoded_chars);
1437 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1438 eof = 0;
1439
1440 if (self->telling) {
1441 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1442 * next input to be decoded is dec_buffer + input_chunk.
1443 */
1444 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1445 if (next_input == NULL)
1446 goto fail;
1447 assert (PyBytes_Check(next_input));
1448 Py_DECREF(dec_buffer);
1449 Py_CLEAR(self->snapshot);
1450 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1451 }
1452 Py_DECREF(input_chunk);
1453
1454 return (eof == 0);
1455
1456 fail:
1457 Py_XDECREF(dec_buffer);
1458 Py_XDECREF(dec_flags);
1459 Py_XDECREF(input_chunk);
1460 return -1;
1461}
1462
1463static PyObject *
1464textiowrapper_read(textio *self, PyObject *args)
1465{
1466 Py_ssize_t n = -1;
1467 PyObject *result = NULL, *chunks = NULL;
1468
1469 CHECK_INITIALIZED(self);
1470
Benjamin Petersonddd392c2009-12-13 19:19:07 +00001471 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Antoine Pitrou19690592009-06-12 20:14:08 +00001472 return NULL;
1473
1474 CHECK_CLOSED(self);
1475
1476 if (self->decoder == NULL) {
1477 PyErr_SetString(PyExc_IOError, "not readable");
1478 return NULL;
1479 }
1480
1481 if (_textiowrapper_writeflush(self) < 0)
1482 return NULL;
1483
1484 if (n < 0) {
1485 /* Read everything */
1486 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1487 PyObject *decoded, *final;
1488 if (bytes == NULL)
1489 goto fail;
1490 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1491 bytes, Py_True, NULL);
1492 Py_DECREF(bytes);
1493 if (decoded == NULL)
1494 goto fail;
1495
1496 result = textiowrapper_get_decoded_chars(self, -1);
1497
1498 if (result == NULL) {
1499 Py_DECREF(decoded);
1500 return NULL;
1501 }
1502
1503 final = PyUnicode_Concat(result, decoded);
1504 Py_DECREF(result);
1505 Py_DECREF(decoded);
1506 if (final == NULL)
1507 goto fail;
1508
1509 Py_CLEAR(self->snapshot);
1510 return final;
1511 }
1512 else {
1513 int res = 1;
1514 Py_ssize_t remaining = n;
1515
1516 result = textiowrapper_get_decoded_chars(self, n);
1517 if (result == NULL)
1518 goto fail;
1519 remaining -= PyUnicode_GET_SIZE(result);
1520
1521 /* Keep reading chunks until we have n characters to return */
1522 while (remaining > 0) {
1523 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001524 if (res < 0) {
1525 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1526 when EINTR occurs so we needn't do it ourselves. */
1527 if (_PyIO_trap_eintr()) {
1528 continue;
1529 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001530 goto fail;
Gregory P. Smith99716162012-10-12 13:02:06 -07001531 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001532 if (res == 0) /* EOF */
1533 break;
1534 if (chunks == NULL) {
1535 chunks = PyList_New(0);
1536 if (chunks == NULL)
1537 goto fail;
1538 }
1539 if (PyList_Append(chunks, result) < 0)
1540 goto fail;
1541 Py_DECREF(result);
1542 result = textiowrapper_get_decoded_chars(self, remaining);
1543 if (result == NULL)
1544 goto fail;
1545 remaining -= PyUnicode_GET_SIZE(result);
1546 }
1547 if (chunks != NULL) {
1548 if (result != NULL && PyList_Append(chunks, result) < 0)
1549 goto fail;
1550 Py_CLEAR(result);
1551 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1552 if (result == NULL)
1553 goto fail;
1554 Py_CLEAR(chunks);
1555 }
1556 return result;
1557 }
1558 fail:
1559 Py_XDECREF(result);
1560 Py_XDECREF(chunks);
1561 return NULL;
1562}
1563
1564
1565/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1566 that is to the NUL character. Otherwise the function will produce
1567 incorrect results. */
1568static Py_UNICODE *
1569find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1570{
1571 Py_UNICODE *s = start;
1572 for (;;) {
1573 while (*s > ch)
1574 s++;
1575 if (*s == ch)
1576 return s;
1577 if (s == end)
1578 return NULL;
1579 s++;
1580 }
1581}
1582
1583Py_ssize_t
1584_PyIO_find_line_ending(
1585 int translated, int universal, PyObject *readnl,
1586 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1587{
1588 Py_ssize_t len = end - start;
1589
1590 if (translated) {
1591 /* Newlines are already translated, only search for \n */
1592 Py_UNICODE *pos = find_control_char(start, end, '\n');
1593 if (pos != NULL)
1594 return pos - start + 1;
1595 else {
1596 *consumed = len;
1597 return -1;
1598 }
1599 }
1600 else if (universal) {
1601 /* Universal newline search. Find any of \r, \r\n, \n
1602 * The decoder ensures that \r\n are not split in two pieces
1603 */
1604 Py_UNICODE *s = start;
1605 for (;;) {
1606 Py_UNICODE ch;
1607 /* Fast path for non-control chars. The loop always ends
1608 since the Py_UNICODE storage is NUL-terminated. */
1609 while (*s > '\r')
1610 s++;
1611 if (s >= end) {
1612 *consumed = len;
1613 return -1;
1614 }
1615 ch = *s++;
1616 if (ch == '\n')
1617 return s - start;
1618 if (ch == '\r') {
1619 if (*s == '\n')
1620 return s - start + 1;
1621 else
1622 return s - start;
1623 }
1624 }
1625 }
1626 else {
1627 /* Non-universal mode. */
1628 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1629 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1630 if (readnl_len == 1) {
1631 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1632 if (pos != NULL)
1633 return pos - start + 1;
1634 *consumed = len;
1635 return -1;
1636 }
1637 else {
1638 Py_UNICODE *s = start;
1639 Py_UNICODE *e = end - readnl_len + 1;
1640 Py_UNICODE *pos;
1641 if (e < s)
1642 e = s;
1643 while (s < e) {
1644 Py_ssize_t i;
1645 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1646 if (pos == NULL || pos >= e)
1647 break;
1648 for (i = 1; i < readnl_len; i++) {
1649 if (pos[i] != nl[i])
1650 break;
1651 }
1652 if (i == readnl_len)
1653 return pos - start + readnl_len;
1654 s = pos + 1;
1655 }
1656 pos = find_control_char(e, end, nl[0]);
1657 if (pos == NULL)
1658 *consumed = len;
1659 else
1660 *consumed = pos - start;
1661 return -1;
1662 }
1663 }
1664}
1665
1666static PyObject *
1667_textiowrapper_readline(textio *self, Py_ssize_t limit)
1668{
1669 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1670 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1671 int res;
1672
1673 CHECK_CLOSED(self);
1674
1675 if (_textiowrapper_writeflush(self) < 0)
1676 return NULL;
1677
1678 chunked = 0;
1679
1680 while (1) {
1681 Py_UNICODE *ptr;
1682 Py_ssize_t line_len;
1683 Py_ssize_t consumed = 0;
1684
1685 /* First, get some data if necessary */
1686 res = 1;
1687 while (!self->decoded_chars ||
1688 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1689 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001690 if (res < 0) {
1691 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1692 when EINTR occurs so we needn't do it ourselves. */
1693 if (_PyIO_trap_eintr()) {
1694 continue;
1695 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001696 goto error;
Gregory P. Smith99716162012-10-12 13:02:06 -07001697 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001698 if (res == 0)
1699 break;
1700 }
1701 if (res == 0) {
1702 /* end of file */
1703 textiowrapper_set_decoded_chars(self, NULL);
1704 Py_CLEAR(self->snapshot);
1705 start = endpos = offset_to_buffer = 0;
1706 break;
1707 }
1708
1709 if (remaining == NULL) {
1710 line = self->decoded_chars;
1711 start = self->decoded_chars_used;
1712 offset_to_buffer = 0;
1713 Py_INCREF(line);
1714 }
1715 else {
1716 assert(self->decoded_chars_used == 0);
1717 line = PyUnicode_Concat(remaining, self->decoded_chars);
1718 start = 0;
1719 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1720 Py_CLEAR(remaining);
1721 if (line == NULL)
1722 goto error;
1723 }
1724
1725 ptr = PyUnicode_AS_UNICODE(line);
1726 line_len = PyUnicode_GET_SIZE(line);
1727
1728 endpos = _PyIO_find_line_ending(
1729 self->readtranslate, self->readuniversal, self->readnl,
1730 ptr + start, ptr + line_len, &consumed);
1731 if (endpos >= 0) {
1732 endpos += start;
1733 if (limit >= 0 && (endpos - start) + chunked >= limit)
1734 endpos = start + limit - chunked;
1735 break;
1736 }
1737
1738 /* We can put aside up to `endpos` */
1739 endpos = consumed + start;
1740 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1741 /* Didn't find line ending, but reached length limit */
1742 endpos = start + limit - chunked;
1743 break;
1744 }
1745
1746 if (endpos > start) {
1747 /* No line ending seen yet - put aside current data */
1748 PyObject *s;
1749 if (chunks == NULL) {
1750 chunks = PyList_New(0);
1751 if (chunks == NULL)
1752 goto error;
1753 }
1754 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1755 if (s == NULL)
1756 goto error;
1757 if (PyList_Append(chunks, s) < 0) {
1758 Py_DECREF(s);
1759 goto error;
1760 }
1761 chunked += PyUnicode_GET_SIZE(s);
1762 Py_DECREF(s);
1763 }
1764 /* There may be some remaining bytes we'll have to prepend to the
1765 next chunk of data */
1766 if (endpos < line_len) {
1767 remaining = PyUnicode_FromUnicode(
1768 ptr + endpos, line_len - endpos);
1769 if (remaining == NULL)
1770 goto error;
1771 }
1772 Py_CLEAR(line);
1773 /* We have consumed the buffer */
1774 textiowrapper_set_decoded_chars(self, NULL);
1775 }
1776
1777 if (line != NULL) {
1778 /* Our line ends in the current buffer */
1779 self->decoded_chars_used = endpos - offset_to_buffer;
1780 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1781 if (start == 0 && Py_REFCNT(line) == 1) {
1782 if (PyUnicode_Resize(&line, endpos) < 0)
1783 goto error;
1784 }
1785 else {
1786 PyObject *s = PyUnicode_FromUnicode(
1787 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1788 Py_CLEAR(line);
1789 if (s == NULL)
1790 goto error;
1791 line = s;
1792 }
1793 }
1794 }
1795 if (remaining != NULL) {
1796 if (chunks == NULL) {
1797 chunks = PyList_New(0);
1798 if (chunks == NULL)
1799 goto error;
1800 }
1801 if (PyList_Append(chunks, remaining) < 0)
1802 goto error;
1803 Py_CLEAR(remaining);
1804 }
1805 if (chunks != NULL) {
1806 if (line != NULL && PyList_Append(chunks, line) < 0)
1807 goto error;
1808 Py_CLEAR(line);
1809 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1810 if (line == NULL)
1811 goto error;
1812 Py_DECREF(chunks);
1813 }
1814 if (line == NULL)
1815 line = PyUnicode_FromStringAndSize(NULL, 0);
1816
1817 return line;
1818
1819 error:
1820 Py_XDECREF(chunks);
1821 Py_XDECREF(remaining);
1822 Py_XDECREF(line);
1823 return NULL;
1824}
1825
1826static PyObject *
1827textiowrapper_readline(textio *self, PyObject *args)
1828{
1829 PyObject *limitobj = NULL;
1830 Py_ssize_t limit = -1;
1831
1832 CHECK_INITIALIZED(self);
1833 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1834 return NULL;
1835 }
1836 if (limitobj) {
1837 if (!PyNumber_Check(limitobj)) {
1838 PyErr_Format(PyExc_TypeError,
1839 "integer argument expected, got '%.200s'",
1840 Py_TYPE(limitobj)->tp_name);
1841 return NULL;
1842 }
1843 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1844 if (limit == -1 && PyErr_Occurred())
1845 return NULL;
1846 }
1847 return _textiowrapper_readline(self, limit);
1848}
1849
1850/* Seek and Tell */
1851
1852typedef struct {
1853 Py_off_t start_pos;
1854 int dec_flags;
1855 int bytes_to_feed;
1856 int chars_to_skip;
1857 char need_eof;
1858} cookie_type;
1859
1860/*
1861 To speed up cookie packing/unpacking, we store the fields in a temporary
1862 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1863 The following macros define at which offsets in the intermediary byte
1864 string the various CookieStruct fields will be stored.
1865 */
1866
1867#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1868
1869#if defined(WORDS_BIGENDIAN)
1870
1871# define IS_LITTLE_ENDIAN 0
1872
1873/* We want the least significant byte of start_pos to also be the least
1874 significant byte of the cookie, which means that in big-endian mode we
1875 must copy the fields in reverse order. */
1876
1877# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1878# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1879# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1880# define OFF_CHARS_TO_SKIP (sizeof(char))
1881# define OFF_NEED_EOF 0
1882
1883#else
1884
1885# define IS_LITTLE_ENDIAN 1
1886
1887/* Little-endian mode: the least significant byte of start_pos will
1888 naturally end up the least significant byte of the cookie. */
1889
1890# define OFF_START_POS 0
1891# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1892# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1893# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1894# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1895
1896#endif
1897
1898static int
1899textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1900{
1901 unsigned char buffer[COOKIE_BUF_LEN];
1902 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1903 if (cookieLong == NULL)
1904 return -1;
1905
1906 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1907 IS_LITTLE_ENDIAN, 0) < 0) {
1908 Py_DECREF(cookieLong);
1909 return -1;
1910 }
1911 Py_DECREF(cookieLong);
1912
1913 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1914 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1915 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1916 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1917 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1918
1919 return 0;
1920}
1921
1922static PyObject *
1923textiowrapper_build_cookie(cookie_type *cookie)
1924{
1925 unsigned char buffer[COOKIE_BUF_LEN];
1926
1927 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1928 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1929 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1930 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1931 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1932
1933 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1934}
1935#undef IS_LITTLE_ENDIAN
1936
1937static int
1938_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1939{
1940 PyObject *res;
1941 /* When seeking to the start of the stream, we call decoder.reset()
1942 rather than decoder.getstate().
1943 This is for a few decoders such as utf-16 for which the state value
1944 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1945 utf-16, that we are expecting a BOM).
1946 */
1947 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1948 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1949 else
1950 res = PyObject_CallMethod(self->decoder, "setstate",
1951 "((si))", "", cookie->dec_flags);
1952 if (res == NULL)
1953 return -1;
1954 Py_DECREF(res);
1955 return 0;
1956}
1957
1958static int
1959_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1960{
1961 PyObject *res;
1962 /* Same as _textiowrapper_decoder_setstate() above. */
1963 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1964 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1965 self->encoding_start_of_stream = 1;
1966 }
1967 else {
1968 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1969 _PyIO_zero, NULL);
1970 self->encoding_start_of_stream = 0;
1971 }
1972 if (res == NULL)
1973 return -1;
1974 Py_DECREF(res);
1975 return 0;
1976}
1977
1978static PyObject *
1979textiowrapper_seek(textio *self, PyObject *args)
1980{
1981 PyObject *cookieObj, *posobj;
1982 cookie_type cookie;
1983 int whence = 0;
1984 PyObject *res;
1985 int cmp;
1986
1987 CHECK_INITIALIZED(self);
1988
1989 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1990 return NULL;
1991 CHECK_CLOSED(self);
1992
1993 Py_INCREF(cookieObj);
1994
1995 if (!self->seekable) {
1996 PyErr_SetString(PyExc_IOError,
1997 "underlying stream is not seekable");
1998 goto fail;
1999 }
2000
2001 if (whence == 1) {
2002 /* seek relative to current position */
2003 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2004 if (cmp < 0)
2005 goto fail;
2006
2007 if (cmp == 0) {
2008 PyErr_SetString(PyExc_IOError,
2009 "can't do nonzero cur-relative seeks");
2010 goto fail;
2011 }
2012
2013 /* Seeking to the current position should attempt to
2014 * sync the underlying buffer with the current position.
2015 */
2016 Py_DECREF(cookieObj);
2017 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2018 if (cookieObj == NULL)
2019 goto fail;
2020 }
2021 else if (whence == 2) {
2022 /* seek relative to end of file */
2023
2024 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2025 if (cmp < 0)
2026 goto fail;
2027
2028 if (cmp == 0) {
2029 PyErr_SetString(PyExc_IOError,
2030 "can't do nonzero end-relative seeks");
2031 goto fail;
2032 }
2033
2034 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2035 if (res == NULL)
2036 goto fail;
2037 Py_DECREF(res);
2038
2039 textiowrapper_set_decoded_chars(self, NULL);
2040 Py_CLEAR(self->snapshot);
2041 if (self->decoder) {
2042 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2043 if (res == NULL)
2044 goto fail;
2045 Py_DECREF(res);
2046 }
2047
2048 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2049 Py_XDECREF(cookieObj);
2050 return res;
2051 }
2052 else if (whence != 0) {
2053 PyErr_Format(PyExc_ValueError,
2054 "invalid whence (%d, should be 0, 1 or 2)", whence);
2055 goto fail;
2056 }
2057
2058 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2059 if (cmp < 0)
2060 goto fail;
2061
2062 if (cmp == 1) {
2063 PyObject *repr = PyObject_Repr(cookieObj);
2064 if (repr != NULL) {
2065 PyErr_Format(PyExc_ValueError,
2066 "negative seek position %s",
2067 PyString_AS_STRING(repr));
2068 Py_DECREF(repr);
2069 }
2070 goto fail;
2071 }
2072
2073 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2074 if (res == NULL)
2075 goto fail;
2076 Py_DECREF(res);
2077
2078 /* The strategy of seek() is to go back to the safe start point
2079 * and replay the effect of read(chars_to_skip) from there.
2080 */
2081 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2082 goto fail;
2083
2084 /* Seek back to the safe start point. */
2085 posobj = PyLong_FromOff_t(cookie.start_pos);
2086 if (posobj == NULL)
2087 goto fail;
2088 res = PyObject_CallMethodObjArgs(self->buffer,
2089 _PyIO_str_seek, posobj, NULL);
2090 Py_DECREF(posobj);
2091 if (res == NULL)
2092 goto fail;
2093 Py_DECREF(res);
2094
2095 textiowrapper_set_decoded_chars(self, NULL);
2096 Py_CLEAR(self->snapshot);
2097
2098 /* Restore the decoder to its state from the safe start point. */
2099 if (self->decoder) {
2100 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2101 goto fail;
2102 }
2103
2104 if (cookie.chars_to_skip) {
2105 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2106 PyObject *input_chunk = PyObject_CallMethod(
2107 self->buffer, "read", "i", cookie.bytes_to_feed);
2108 PyObject *decoded;
2109
2110 if (input_chunk == NULL)
2111 goto fail;
2112
2113 assert (PyBytes_Check(input_chunk));
2114
2115 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2116 if (self->snapshot == NULL) {
2117 Py_DECREF(input_chunk);
2118 goto fail;
2119 }
2120
2121 decoded = PyObject_CallMethod(self->decoder, "decode",
2122 "Oi", input_chunk, (int)cookie.need_eof);
2123
2124 if (decoded == NULL)
2125 goto fail;
2126
2127 textiowrapper_set_decoded_chars(self, decoded);
2128
2129 /* Skip chars_to_skip of the decoded characters. */
2130 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2131 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2132 goto fail;
2133 }
2134 self->decoded_chars_used = cookie.chars_to_skip;
2135 }
2136 else {
2137 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2138 if (self->snapshot == NULL)
2139 goto fail;
2140 }
2141
2142 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2143 if (self->encoder) {
2144 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2145 goto fail;
2146 }
2147 return cookieObj;
2148 fail:
2149 Py_XDECREF(cookieObj);
2150 return NULL;
2151
2152}
2153
2154static PyObject *
2155textiowrapper_tell(textio *self, PyObject *args)
2156{
2157 PyObject *res;
2158 PyObject *posobj = NULL;
2159 cookie_type cookie = {0,0,0,0,0};
2160 PyObject *next_input;
2161 Py_ssize_t chars_to_skip, chars_decoded;
2162 PyObject *saved_state = NULL;
2163 char *input, *input_end;
2164
2165 CHECK_INITIALIZED(self);
2166 CHECK_CLOSED(self);
2167
2168 if (!self->seekable) {
2169 PyErr_SetString(PyExc_IOError,
2170 "underlying stream is not seekable");
2171 goto fail;
2172 }
2173 if (!self->telling) {
2174 PyErr_SetString(PyExc_IOError,
2175 "telling position disabled by next() call");
2176 goto fail;
2177 }
2178
2179 if (_textiowrapper_writeflush(self) < 0)
2180 return NULL;
2181 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2182 if (res == NULL)
2183 goto fail;
2184 Py_DECREF(res);
2185
2186 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2187 if (posobj == NULL)
2188 goto fail;
2189
2190 if (self->decoder == NULL || self->snapshot == NULL) {
2191 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2192 return posobj;
2193 }
2194
2195#if defined(HAVE_LARGEFILE_SUPPORT)
2196 cookie.start_pos = PyLong_AsLongLong(posobj);
2197#else
2198 cookie.start_pos = PyLong_AsLong(posobj);
2199#endif
2200 if (PyErr_Occurred())
2201 goto fail;
2202
2203 /* Skip backward to the snapshot point (see _read_chunk). */
2204 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2205 goto fail;
2206
2207 assert (PyBytes_Check(next_input));
2208
2209 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2210
2211 /* How many decoded characters have been used up since the snapshot? */
2212 if (self->decoded_chars_used == 0) {
2213 /* We haven't moved from the snapshot point. */
2214 Py_DECREF(posobj);
2215 return textiowrapper_build_cookie(&cookie);
2216 }
2217
2218 chars_to_skip = self->decoded_chars_used;
2219
2220 /* Starting from the snapshot position, we will walk the decoder
2221 * forward until it gives us enough decoded characters.
2222 */
2223 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2224 _PyIO_str_getstate, NULL);
2225 if (saved_state == NULL)
2226 goto fail;
2227
2228 /* Note our initial start point. */
2229 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2230 goto fail;
2231
2232 /* Feed the decoder one byte at a time. As we go, note the
2233 * nearest "safe start point" before the current location
2234 * (a point where the decoder has nothing buffered, so seek()
2235 * can safely start from there and advance to this location).
2236 */
2237 chars_decoded = 0;
2238 input = PyBytes_AS_STRING(next_input);
2239 input_end = input + PyBytes_GET_SIZE(next_input);
2240 while (input < input_end) {
2241 PyObject *state;
2242 char *dec_buffer;
2243 Py_ssize_t dec_buffer_len;
2244 int dec_flags;
2245
2246 PyObject *decoded = PyObject_CallMethod(
2247 self->decoder, "decode", "s#", input, 1);
2248 if (decoded == NULL)
2249 goto fail;
2250 assert (PyUnicode_Check(decoded));
2251 chars_decoded += PyUnicode_GET_SIZE(decoded);
2252 Py_DECREF(decoded);
2253
2254 cookie.bytes_to_feed += 1;
2255
2256 state = PyObject_CallMethodObjArgs(self->decoder,
2257 _PyIO_str_getstate, NULL);
2258 if (state == NULL)
2259 goto fail;
2260 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2261 Py_DECREF(state);
2262 goto fail;
2263 }
2264 Py_DECREF(state);
2265
2266 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2267 /* Decoder buffer is empty, so this is a safe start point. */
2268 cookie.start_pos += cookie.bytes_to_feed;
2269 chars_to_skip -= chars_decoded;
2270 cookie.dec_flags = dec_flags;
2271 cookie.bytes_to_feed = 0;
2272 chars_decoded = 0;
2273 }
2274 if (chars_decoded >= chars_to_skip)
2275 break;
2276 input++;
2277 }
2278 if (input == input_end) {
2279 /* We didn't get enough decoded data; signal EOF to get more. */
2280 PyObject *decoded = PyObject_CallMethod(
2281 self->decoder, "decode", "si", "", /* final = */ 1);
2282 if (decoded == NULL)
2283 goto fail;
2284 assert (PyUnicode_Check(decoded));
2285 chars_decoded += PyUnicode_GET_SIZE(decoded);
2286 Py_DECREF(decoded);
2287 cookie.need_eof = 1;
2288
2289 if (chars_decoded < chars_to_skip) {
2290 PyErr_SetString(PyExc_IOError,
2291 "can't reconstruct logical file position");
2292 goto fail;
2293 }
2294 }
2295
2296 /* finally */
2297 Py_XDECREF(posobj);
2298 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2299 Py_DECREF(saved_state);
2300 if (res == NULL)
2301 return NULL;
2302 Py_DECREF(res);
2303
2304 /* The returned cookie corresponds to the last safe start point. */
2305 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2306 return textiowrapper_build_cookie(&cookie);
2307
2308 fail:
2309 Py_XDECREF(posobj);
2310 if (saved_state) {
2311 PyObject *type, *value, *traceback;
2312 PyErr_Fetch(&type, &value, &traceback);
2313
2314 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2315 Py_DECREF(saved_state);
2316 if (res == NULL)
2317 return NULL;
2318 Py_DECREF(res);
2319
2320 PyErr_Restore(type, value, traceback);
2321 }
2322 return NULL;
2323}
2324
2325static PyObject *
2326textiowrapper_truncate(textio *self, PyObject *args)
2327{
2328 PyObject *pos = Py_None;
2329 PyObject *res;
2330
2331 CHECK_INITIALIZED(self)
2332 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2333 return NULL;
2334 }
2335
2336 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2337 if (res == NULL)
2338 return NULL;
2339 Py_DECREF(res);
2340
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00002341 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Antoine Pitrou19690592009-06-12 20:14:08 +00002342}
2343
2344static PyObject *
2345textiowrapper_repr(textio *self)
2346{
2347 PyObject *nameobj, *res;
2348 PyObject *namerepr = NULL, *encrepr = NULL;
2349
2350 CHECK_INITIALIZED(self);
2351
2352 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2353 if (nameobj == NULL) {
2354 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2355 PyErr_Clear();
2356 else
2357 goto error;
2358 encrepr = PyObject_Repr(self->encoding);
2359 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2360 PyString_AS_STRING(encrepr));
2361 }
2362 else {
2363 encrepr = PyObject_Repr(self->encoding);
2364 namerepr = PyObject_Repr(nameobj);
2365 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2366 PyString_AS_STRING(namerepr),
2367 PyString_AS_STRING(encrepr));
2368 Py_DECREF(nameobj);
2369 }
2370 Py_XDECREF(namerepr);
2371 Py_XDECREF(encrepr);
2372 return res;
2373
2374error:
2375 Py_XDECREF(namerepr);
2376 Py_XDECREF(encrepr);
2377 return NULL;
2378}
2379
2380
2381/* Inquiries */
2382
2383static PyObject *
2384textiowrapper_fileno(textio *self, PyObject *args)
2385{
2386 CHECK_INITIALIZED(self);
2387 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2388}
2389
2390static PyObject *
2391textiowrapper_seekable(textio *self, PyObject *args)
2392{
2393 CHECK_INITIALIZED(self);
2394 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2395}
2396
2397static PyObject *
2398textiowrapper_readable(textio *self, PyObject *args)
2399{
2400 CHECK_INITIALIZED(self);
2401 return PyObject_CallMethod(self->buffer, "readable", NULL);
2402}
2403
2404static PyObject *
2405textiowrapper_writable(textio *self, PyObject *args)
2406{
2407 CHECK_INITIALIZED(self);
2408 return PyObject_CallMethod(self->buffer, "writable", NULL);
2409}
2410
2411static PyObject *
2412textiowrapper_isatty(textio *self, PyObject *args)
2413{
2414 CHECK_INITIALIZED(self);
2415 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2416}
2417
2418static PyObject *
2419textiowrapper_flush(textio *self, PyObject *args)
2420{
2421 CHECK_INITIALIZED(self);
2422 CHECK_CLOSED(self);
2423 self->telling = self->seekable;
2424 if (_textiowrapper_writeflush(self) < 0)
2425 return NULL;
2426 return PyObject_CallMethod(self->buffer, "flush", NULL);
2427}
2428
2429static PyObject *
2430textiowrapper_close(textio *self, PyObject *args)
2431{
2432 PyObject *res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002433 int r;
Antoine Pitrou19690592009-06-12 20:14:08 +00002434 CHECK_INITIALIZED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002435
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002436 res = textiowrapper_closed_get(self, NULL);
2437 if (res == NULL)
2438 return NULL;
2439 r = PyObject_IsTrue(res);
2440 Py_DECREF(res);
2441 if (r < 0)
2442 return NULL;
2443
2444 if (r > 0) {
2445 Py_RETURN_NONE; /* stream already closed */
2446 }
2447 else {
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002448 PyObject *exc = NULL, *val, *tb;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002449 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002450 if (res == NULL)
2451 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002452 else
2453 Py_DECREF(res);
2454
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002455 res = PyObject_CallMethod(self->buffer, "close", NULL);
2456 if (exc != NULL) {
2457 if (res != NULL) {
2458 Py_CLEAR(res);
2459 PyErr_Restore(exc, val, tb);
2460 }
2461 else {
2462 Py_DECREF(exc);
2463 Py_XDECREF(val);
2464 Py_XDECREF(tb);
2465 }
2466 }
2467 return res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002468 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002469}
2470
2471static PyObject *
2472textiowrapper_iternext(textio *self)
2473{
2474 PyObject *line;
2475
2476 CHECK_INITIALIZED(self);
2477
2478 self->telling = 0;
2479 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2480 /* Skip method call overhead for speed */
2481 line = _textiowrapper_readline(self, -1);
2482 }
2483 else {
2484 line = PyObject_CallMethodObjArgs((PyObject *)self,
2485 _PyIO_str_readline, NULL);
2486 if (line && !PyUnicode_Check(line)) {
2487 PyErr_Format(PyExc_IOError,
2488 "readline() should have returned an str object, "
2489 "not '%.200s'", Py_TYPE(line)->tp_name);
2490 Py_DECREF(line);
2491 return NULL;
2492 }
2493 }
2494
2495 if (line == NULL)
2496 return NULL;
2497
2498 if (PyUnicode_GET_SIZE(line) == 0) {
2499 /* Reached EOF or would have blocked */
2500 Py_DECREF(line);
2501 Py_CLEAR(self->snapshot);
2502 self->telling = self->seekable;
2503 return NULL;
2504 }
2505
2506 return line;
2507}
2508
2509static PyObject *
2510textiowrapper_name_get(textio *self, void *context)
2511{
2512 CHECK_INITIALIZED(self);
2513 return PyObject_GetAttrString(self->buffer, "name");
2514}
2515
2516static PyObject *
2517textiowrapper_closed_get(textio *self, void *context)
2518{
2519 CHECK_INITIALIZED(self);
2520 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2521}
2522
2523static PyObject *
2524textiowrapper_newlines_get(textio *self, void *context)
2525{
2526 PyObject *res;
2527 CHECK_INITIALIZED(self);
2528 if (self->decoder == NULL)
2529 Py_RETURN_NONE;
2530 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2531 if (res == NULL) {
2532 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2533 PyErr_Clear();
2534 Py_RETURN_NONE;
2535 }
2536 else {
2537 return NULL;
2538 }
2539 }
2540 return res;
2541}
2542
2543static PyObject *
2544textiowrapper_errors_get(textio *self, void *context)
2545{
2546 CHECK_INITIALIZED(self);
2547 Py_INCREF(self->errors);
2548 return self->errors;
2549}
2550
2551static PyObject *
2552textiowrapper_chunk_size_get(textio *self, void *context)
2553{
2554 CHECK_INITIALIZED(self);
2555 return PyLong_FromSsize_t(self->chunk_size);
2556}
2557
2558static int
2559textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2560{
2561 Py_ssize_t n;
2562 CHECK_INITIALIZED_INT(self);
2563 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2564 if (n == -1 && PyErr_Occurred())
2565 return -1;
2566 if (n <= 0) {
2567 PyErr_SetString(PyExc_ValueError,
2568 "a strictly positive integer is required");
2569 return -1;
2570 }
2571 self->chunk_size = n;
2572 return 0;
2573}
2574
2575static PyMethodDef textiowrapper_methods[] = {
2576 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2577 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2578 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2579 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2580 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2581 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2582
2583 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2584 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2585 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2586 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2587 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2588
2589 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2590 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2591 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2592 {NULL, NULL}
2593};
2594
2595static PyMemberDef textiowrapper_members[] = {
2596 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2597 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2598 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2599 {NULL}
2600};
2601
2602static PyGetSetDef textiowrapper_getset[] = {
2603 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2604 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2605/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2606*/
2607 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2608 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2609 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2610 (setter)textiowrapper_chunk_size_set, NULL},
2611 {NULL}
2612};
2613
2614PyTypeObject PyTextIOWrapper_Type = {
2615 PyVarObject_HEAD_INIT(NULL, 0)
2616 "_io.TextIOWrapper", /*tp_name*/
2617 sizeof(textio), /*tp_basicsize*/
2618 0, /*tp_itemsize*/
2619 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2620 0, /*tp_print*/
2621 0, /*tp_getattr*/
2622 0, /*tps_etattr*/
2623 0, /*tp_compare */
2624 (reprfunc)textiowrapper_repr,/*tp_repr*/
2625 0, /*tp_as_number*/
2626 0, /*tp_as_sequence*/
2627 0, /*tp_as_mapping*/
2628 0, /*tp_hash */
2629 0, /*tp_call*/
2630 0, /*tp_str*/
2631 0, /*tp_getattro*/
2632 0, /*tp_setattro*/
2633 0, /*tp_as_buffer*/
2634 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2635 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2636 textiowrapper_doc, /* tp_doc */
2637 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2638 (inquiry)textiowrapper_clear, /* tp_clear */
2639 0, /* tp_richcompare */
2640 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2641 0, /* tp_iter */
2642 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2643 textiowrapper_methods, /* tp_methods */
2644 textiowrapper_members, /* tp_members */
2645 textiowrapper_getset, /* tp_getset */
2646 0, /* tp_base */
2647 0, /* tp_dict */
2648 0, /* tp_descr_get */
2649 0, /* tp_descr_set */
2650 offsetof(textio, dict), /*tp_dictoffset*/
2651 (initproc)textiowrapper_init, /* tp_init */
2652 0, /* tp_alloc */
2653 PyType_GenericNew, /* tp_new */
2654};