blob: d2e92fa20170ca738cb60558250159217a63d873 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116 Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou2a466582009-09-21 21:17:48 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Antoine Pitrou19690592009-06-12 20:14:08 +0000196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336 }
337
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
463incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
565};
566
567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
616PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
Antoine Pitrou76370f42012-08-04 00:55:38 +0200625 "newline controls how line endings are handled. It can be None, '',\n"
626 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
627 "\n"
628 "* On input, if newline is None, universal newlines mode is\n"
629 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
630 " these are translated into '\\n' before being returned to the\n"
631 " caller. If it is '', universal newline mode is enabled, but line\n"
632 " endings are returned to the caller untranslated. If it has any of\n"
633 " the other legal values, input lines are only terminated by the given\n"
634 " string, and the line ending is returned to the caller untranslated.\n"
635 "\n"
636 "* On output, if newline is None, any '\\n' characters written are\n"
637 " translated to the system default line separator, os.linesep. If\n"
638 " newline is '', no translation takes place. If newline is any of the\n"
639 " other legal values, any '\\n' characters written are translated to\n"
640 " the given string.\n"
Antoine Pitrou19690592009-06-12 20:14:08 +0000641 "\n"
642 "If line_buffering is True, a call to flush is implied when a call to\n"
643 "write contains a newline character."
644 );
645
646typedef PyObject *
647 (*encodefunc_t)(PyObject *, PyObject *);
648
649typedef struct
650{
651 PyObject_HEAD
652 int ok; /* initialized? */
653 int detached;
654 Py_ssize_t chunk_size;
655 PyObject *buffer;
656 PyObject *encoding;
657 PyObject *encoder;
658 PyObject *decoder;
659 PyObject *readnl;
660 PyObject *errors;
661 const char *writenl; /* utf-8 encoded, NULL stands for \n */
662 char line_buffering;
663 char readuniversal;
664 char readtranslate;
665 char writetranslate;
666 char seekable;
667 char telling;
668 /* Specialized encoding func (see below) */
669 encodefunc_t encodefunc;
670 /* Whether or not it's the start of the stream */
671 char encoding_start_of_stream;
672
673 /* Reads and writes are internally buffered in order to speed things up.
674 However, any read will first flush the write buffer if itsn't empty.
675
676 Please also note that text to be written is first encoded before being
677 buffered. This is necessary so that encoding errors are immediately
678 reported to the caller, but it unfortunately means that the
679 IncrementalEncoder (whose encode() method is always written in Python)
680 becomes a bottleneck for small writes.
681 */
682 PyObject *decoded_chars; /* buffer for text returned from decoder */
683 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
684 PyObject *pending_bytes; /* list of bytes objects waiting to be
685 written, or NULL */
686 Py_ssize_t pending_bytes_count;
687 PyObject *snapshot;
688 /* snapshot is either None, or a tuple (dec_flags, next_input) where
689 * dec_flags is the second (integer) item of the decoder state and
690 * next_input is the chunk of input bytes that comes next after the
691 * snapshot point. We use this to reconstruct decoder states in tell().
692 */
693
694 /* Cache raw object if it's a FileIO object */
695 PyObject *raw;
696
697 PyObject *weakreflist;
698 PyObject *dict;
699} textio;
700
701
702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
706ascii_encode(textio *self, PyObject *text)
707{
708 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
709 PyUnicode_GET_SIZE(text),
710 PyBytes_AS_STRING(self->errors));
711}
712
713static PyObject *
714utf16be_encode(textio *self, PyObject *text)
715{
716 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
717 PyUnicode_GET_SIZE(text),
718 PyBytes_AS_STRING(self->errors), 1);
719}
720
721static PyObject *
722utf16le_encode(textio *self, PyObject *text)
723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), -1);
727}
728
729static PyObject *
730utf16_encode(textio *self, PyObject *text)
731{
732 if (!self->encoding_start_of_stream) {
733 /* Skip the BOM and use native byte ordering */
734#if defined(WORDS_BIGENDIAN)
735 return utf16be_encode(self, text);
736#else
737 return utf16le_encode(self, text);
738#endif
739 }
740 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
741 PyUnicode_GET_SIZE(text),
742 PyBytes_AS_STRING(self->errors), 0);
743}
744
745static PyObject *
746utf32be_encode(textio *self, PyObject *text)
747{
748 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 1);
751}
752
753static PyObject *
754utf32le_encode(textio *self, PyObject *text)
755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), -1);
759}
760
761static PyObject *
762utf32_encode(textio *self, PyObject *text)
763{
764 if (!self->encoding_start_of_stream) {
765 /* Skip the BOM and use native byte ordering */
766#if defined(WORDS_BIGENDIAN)
767 return utf32be_encode(self, text);
768#else
769 return utf32le_encode(self, text);
770#endif
771 }
772 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
773 PyUnicode_GET_SIZE(text),
774 PyBytes_AS_STRING(self->errors), 0);
775}
776
777static PyObject *
778utf8_encode(textio *self, PyObject *text)
779{
780 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors));
783}
784
785static PyObject *
786latin1_encode(textio *self, PyObject *text)
787{
788 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
789 PyUnicode_GET_SIZE(text),
790 PyBytes_AS_STRING(self->errors));
791}
792
793/* Map normalized encoding names onto the specialized encoding funcs */
794
795typedef struct {
796 const char *name;
797 encodefunc_t encodefunc;
798} encodefuncentry;
799
800static encodefuncentry encodefuncs[] = {
801 {"ascii", (encodefunc_t) ascii_encode},
802 {"iso8859-1", (encodefunc_t) latin1_encode},
803 {"utf-8", (encodefunc_t) utf8_encode},
804 {"utf-16-be", (encodefunc_t) utf16be_encode},
805 {"utf-16-le", (encodefunc_t) utf16le_encode},
806 {"utf-16", (encodefunc_t) utf16_encode},
807 {"utf-32-be", (encodefunc_t) utf32be_encode},
808 {"utf-32-le", (encodefunc_t) utf32le_encode},
809 {"utf-32", (encodefunc_t) utf32_encode},
810 {NULL, NULL}
811};
812
813
814static int
815textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
816{
817 char *kwlist[] = {"buffer", "encoding", "errors",
818 "newline", "line_buffering",
819 NULL};
820 PyObject *buffer, *raw;
821 char *encoding = NULL;
822 char *errors = NULL;
823 char *newline = NULL;
824 int line_buffering = 0;
825
826 PyObject *res;
827 int r;
828
829 self->ok = 0;
830 self->detached = 0;
831 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
832 kwlist, &buffer, &encoding, &errors,
833 &newline, &line_buffering))
834 return -1;
835
836 if (newline && newline[0] != '\0'
837 && !(newline[0] == '\n' && newline[1] == '\0')
838 && !(newline[0] == '\r' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
840 PyErr_Format(PyExc_ValueError,
841 "illegal newline value: %s", newline);
842 return -1;
843 }
844
845 Py_CLEAR(self->buffer);
846 Py_CLEAR(self->encoding);
847 Py_CLEAR(self->encoder);
848 Py_CLEAR(self->decoder);
849 Py_CLEAR(self->readnl);
850 Py_CLEAR(self->decoded_chars);
851 Py_CLEAR(self->pending_bytes);
852 Py_CLEAR(self->snapshot);
853 Py_CLEAR(self->errors);
854 Py_CLEAR(self->raw);
855 self->decoded_chars_used = 0;
856 self->pending_bytes_count = 0;
857 self->encodefunc = NULL;
858 self->writenl = NULL;
859
860 if (encoding == NULL && self->encoding == NULL) {
861 if (_PyIO_locale_module == NULL) {
862 _PyIO_locale_module = PyImport_ImportModule("locale");
863 if (_PyIO_locale_module == NULL)
864 goto catch_ImportError;
865 else
866 goto use_locale;
867 }
868 else {
869 use_locale:
870 self->encoding = PyObject_CallMethod(
871 _PyIO_locale_module, "getpreferredencoding", NULL);
872 if (self->encoding == NULL) {
873 catch_ImportError:
874 /*
875 Importing locale can raise a ImportError because of
876 _functools, and locale.getpreferredencoding can raise a
877 ImportError if _locale is not available. These will happen
878 during module building.
879 */
880 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
881 PyErr_Clear();
882 self->encoding = PyString_FromString("ascii");
883 }
884 else
885 goto error;
886 }
887 else if (!PyString_Check(self->encoding))
888 Py_CLEAR(self->encoding);
889 }
890 }
891 if (self->encoding != NULL)
892 encoding = PyString_AsString(self->encoding);
893 else if (encoding != NULL) {
894 self->encoding = PyString_FromString(encoding);
895 if (self->encoding == NULL)
896 goto error;
897 }
898 else {
899 PyErr_SetString(PyExc_IOError,
900 "could not determine default encoding");
901 }
902
903 if (errors == NULL)
904 errors = "strict";
905 self->errors = PyBytes_FromString(errors);
906 if (self->errors == NULL)
907 goto error;
908
909 self->chunk_size = 8192;
910 self->readuniversal = (newline == NULL || newline[0] == '\0');
911 self->line_buffering = line_buffering;
912 self->readtranslate = (newline == NULL);
913 if (newline) {
914 self->readnl = PyString_FromString(newline);
915 if (self->readnl == NULL)
916 return -1;
917 }
918 self->writetranslate = (newline == NULL || newline[0] != '\0');
919 if (!self->readuniversal && self->writetranslate) {
920 self->writenl = PyString_AsString(self->readnl);
921 if (!strcmp(self->writenl, "\n"))
922 self->writenl = NULL;
923 }
924#ifdef MS_WINDOWS
925 else
926 self->writenl = "\r\n";
927#endif
928
929 /* Build the decoder object */
930 res = PyObject_CallMethod(buffer, "readable", NULL);
931 if (res == NULL)
932 goto error;
933 r = PyObject_IsTrue(res);
934 Py_DECREF(res);
935 if (r == -1)
936 goto error;
937 if (r == 1) {
938 self->decoder = PyCodec_IncrementalDecoder(
939 encoding, errors);
940 if (self->decoder == NULL)
941 goto error;
942
943 if (self->readuniversal) {
944 PyObject *incrementalDecoder = PyObject_CallFunction(
945 (PyObject *)&PyIncrementalNewlineDecoder_Type,
946 "Oi", self->decoder, (int)self->readtranslate);
947 if (incrementalDecoder == NULL)
948 goto error;
949 Py_CLEAR(self->decoder);
950 self->decoder = incrementalDecoder;
951 }
952 }
953
954 /* Build the encoder object */
955 res = PyObject_CallMethod(buffer, "writable", NULL);
956 if (res == NULL)
957 goto error;
958 r = PyObject_IsTrue(res);
959 Py_DECREF(res);
960 if (r == -1)
961 goto error;
962 if (r == 1) {
963 PyObject *ci;
964 self->encoder = PyCodec_IncrementalEncoder(
965 encoding, errors);
966 if (self->encoder == NULL)
967 goto error;
968 /* Get the normalized named of the codec */
969 ci = _PyCodec_Lookup(encoding);
970 if (ci == NULL)
971 goto error;
972 res = PyObject_GetAttrString(ci, "name");
973 Py_DECREF(ci);
974 if (res == NULL) {
975 if (PyErr_ExceptionMatches(PyExc_AttributeError))
976 PyErr_Clear();
977 else
978 goto error;
979 }
980 else if (PyString_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!strcmp(PyString_AS_STRING(res), e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
995
996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL) {
1002 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1003 PyErr_Clear();
1004 else
1005 goto error;
1006 }
1007 else if (Py_TYPE(raw) == &PyFileIO_Type)
1008 self->raw = raw;
1009 else
1010 Py_DECREF(raw);
1011 }
1012
1013 res = PyObject_CallMethod(buffer, "seekable", NULL);
1014 if (res == NULL)
1015 goto error;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001016 r = PyObject_IsTrue(res);
Antoine Pitrou19690592009-06-12 20:14:08 +00001017 Py_DECREF(res);
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001018 if (r < 0)
1019 goto error;
1020 self->seekable = self->telling = r;
Antoine Pitrou19690592009-06-12 20:14:08 +00001021
1022 self->encoding_start_of_stream = 0;
1023 if (self->seekable && self->encoder) {
1024 PyObject *cookieObj;
1025 int cmp;
1026
1027 self->encoding_start_of_stream = 1;
1028
1029 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1030 if (cookieObj == NULL)
1031 goto error;
1032
1033 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1034 Py_DECREF(cookieObj);
1035 if (cmp < 0) {
1036 goto error;
1037 }
1038
1039 if (cmp == 0) {
1040 self->encoding_start_of_stream = 0;
1041 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1042 _PyIO_zero, NULL);
1043 if (res == NULL)
1044 goto error;
1045 Py_DECREF(res);
1046 }
1047 }
1048
1049 self->ok = 1;
1050 return 0;
1051
1052 error:
1053 return -1;
1054}
1055
1056static int
1057_textiowrapper_clear(textio *self)
1058{
1059 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1060 return -1;
1061 self->ok = 0;
1062 Py_CLEAR(self->buffer);
1063 Py_CLEAR(self->encoding);
1064 Py_CLEAR(self->encoder);
1065 Py_CLEAR(self->decoder);
1066 Py_CLEAR(self->readnl);
1067 Py_CLEAR(self->decoded_chars);
1068 Py_CLEAR(self->pending_bytes);
1069 Py_CLEAR(self->snapshot);
1070 Py_CLEAR(self->errors);
1071 Py_CLEAR(self->raw);
1072 return 0;
1073}
1074
1075static void
1076textiowrapper_dealloc(textio *self)
1077{
1078 if (_textiowrapper_clear(self) < 0)
1079 return;
1080 _PyObject_GC_UNTRACK(self);
1081 if (self->weakreflist != NULL)
1082 PyObject_ClearWeakRefs((PyObject *)self);
1083 Py_CLEAR(self->dict);
1084 Py_TYPE(self)->tp_free((PyObject *)self);
1085}
1086
1087static int
1088textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1089{
1090 Py_VISIT(self->buffer);
1091 Py_VISIT(self->encoding);
1092 Py_VISIT(self->encoder);
1093 Py_VISIT(self->decoder);
1094 Py_VISIT(self->readnl);
1095 Py_VISIT(self->decoded_chars);
1096 Py_VISIT(self->pending_bytes);
1097 Py_VISIT(self->snapshot);
1098 Py_VISIT(self->errors);
1099 Py_VISIT(self->raw);
1100
1101 Py_VISIT(self->dict);
1102 return 0;
1103}
1104
1105static int
1106textiowrapper_clear(textio *self)
1107{
1108 if (_textiowrapper_clear(self) < 0)
1109 return -1;
1110 Py_CLEAR(self->dict);
1111 return 0;
1112}
1113
1114static PyObject *
1115textiowrapper_closed_get(textio *self, void *context);
1116
1117/* This macro takes some shortcuts to make the common case faster. */
1118#define CHECK_CLOSED(self) \
1119 do { \
1120 int r; \
1121 PyObject *_res; \
1122 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1123 if (self->raw != NULL) \
1124 r = _PyFileIO_closed(self->raw); \
1125 else { \
1126 _res = textiowrapper_closed_get(self, NULL); \
1127 if (_res == NULL) \
1128 return NULL; \
1129 r = PyObject_IsTrue(_res); \
1130 Py_DECREF(_res); \
1131 if (r < 0) \
1132 return NULL; \
1133 } \
1134 if (r > 0) { \
1135 PyErr_SetString(PyExc_ValueError, \
1136 "I/O operation on closed file."); \
1137 return NULL; \
1138 } \
1139 } \
1140 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1141 return NULL; \
1142 } while (0)
1143
1144#define CHECK_INITIALIZED(self) \
1145 if (self->ok <= 0) { \
1146 if (self->detached) { \
1147 PyErr_SetString(PyExc_ValueError, \
1148 "underlying buffer has been detached"); \
1149 } else { \
1150 PyErr_SetString(PyExc_ValueError, \
1151 "I/O operation on uninitialized object"); \
1152 } \
1153 return NULL; \
1154 }
1155
1156#define CHECK_INITIALIZED_INT(self) \
1157 if (self->ok <= 0) { \
1158 if (self->detached) { \
1159 PyErr_SetString(PyExc_ValueError, \
1160 "underlying buffer has been detached"); \
1161 } else { \
1162 PyErr_SetString(PyExc_ValueError, \
1163 "I/O operation on uninitialized object"); \
1164 } \
1165 return -1; \
1166 }
1167
1168
1169static PyObject *
1170textiowrapper_detach(textio *self)
1171{
1172 PyObject *buffer, *res;
1173 CHECK_INITIALIZED(self);
1174 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1175 if (res == NULL)
1176 return NULL;
1177 Py_DECREF(res);
1178 buffer = self->buffer;
1179 self->buffer = NULL;
1180 self->detached = 1;
1181 self->ok = 0;
1182 return buffer;
1183}
1184
1185Py_LOCAL_INLINE(const Py_UNICODE *)
1186findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1187{
1188 /* like wcschr, but doesn't stop at NULL characters */
1189 while (size-- > 0) {
1190 if (*s == ch)
1191 return s;
1192 s++;
1193 }
1194 return NULL;
1195}
1196
1197/* Flush the internal write buffer. This doesn't explicitly flush the
1198 underlying buffered object, though. */
1199static int
1200_textiowrapper_writeflush(textio *self)
1201{
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001202 PyObject *pending, *b, *ret;
Antoine Pitrou19690592009-06-12 20:14:08 +00001203
1204 if (self->pending_bytes == NULL)
1205 return 0;
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001206
1207 pending = self->pending_bytes;
1208 Py_INCREF(pending);
1209 self->pending_bytes_count = 0;
1210 Py_CLEAR(self->pending_bytes);
1211
1212 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1213 Py_DECREF(pending);
Antoine Pitrou19690592009-06-12 20:14:08 +00001214 if (b == NULL)
1215 return -1;
1216 ret = PyObject_CallMethodObjArgs(self->buffer,
1217 _PyIO_str_write, b, NULL);
1218 Py_DECREF(b);
1219 if (ret == NULL)
1220 return -1;
1221 Py_DECREF(ret);
Antoine Pitrou19690592009-06-12 20:14:08 +00001222 return 0;
1223}
1224
1225static PyObject *
1226textiowrapper_write(textio *self, PyObject *args)
1227{
1228 PyObject *ret;
1229 PyObject *text; /* owned reference */
1230 PyObject *b;
1231 Py_ssize_t textlen;
1232 int haslf = 0;
1233 int needflush = 0;
1234
1235 CHECK_INITIALIZED(self);
1236
1237 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1238 return NULL;
1239 }
1240
1241 CHECK_CLOSED(self);
1242
1243 if (self->encoder == NULL) {
1244 PyErr_SetString(PyExc_IOError, "not writable");
1245 return NULL;
1246 }
1247
1248 Py_INCREF(text);
1249
1250 textlen = PyUnicode_GetSize(text);
1251
1252 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1253 if (findchar(PyUnicode_AS_UNICODE(text),
1254 PyUnicode_GET_SIZE(text), '\n'))
1255 haslf = 1;
1256
1257 if (haslf && self->writetranslate && self->writenl != NULL) {
1258 PyObject *newtext = PyObject_CallMethod(
1259 text, "replace", "ss", "\n", self->writenl);
1260 Py_DECREF(text);
1261 if (newtext == NULL)
1262 return NULL;
1263 text = newtext;
1264 }
1265
1266 if (self->line_buffering &&
1267 (haslf ||
1268 findchar(PyUnicode_AS_UNICODE(text),
1269 PyUnicode_GET_SIZE(text), '\r')))
1270 needflush = 1;
1271
1272 /* XXX What if we were just reading? */
1273 if (self->encodefunc != NULL) {
1274 b = (*self->encodefunc)((PyObject *) self, text);
1275 self->encoding_start_of_stream = 0;
1276 }
1277 else
1278 b = PyObject_CallMethodObjArgs(self->encoder,
1279 _PyIO_str_encode, text, NULL);
1280 Py_DECREF(text);
1281 if (b == NULL)
1282 return NULL;
1283
1284 if (self->pending_bytes == NULL) {
1285 self->pending_bytes = PyList_New(0);
1286 if (self->pending_bytes == NULL) {
1287 Py_DECREF(b);
1288 return NULL;
1289 }
1290 self->pending_bytes_count = 0;
1291 }
1292 if (PyList_Append(self->pending_bytes, b) < 0) {
1293 Py_DECREF(b);
1294 return NULL;
1295 }
1296 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1297 Py_DECREF(b);
1298 if (self->pending_bytes_count > self->chunk_size || needflush) {
1299 if (_textiowrapper_writeflush(self) < 0)
1300 return NULL;
1301 }
1302
1303 if (needflush) {
1304 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1305 if (ret == NULL)
1306 return NULL;
1307 Py_DECREF(ret);
1308 }
1309
1310 Py_CLEAR(self->snapshot);
1311
1312 if (self->decoder) {
1313 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1314 if (ret == NULL)
1315 return NULL;
1316 Py_DECREF(ret);
1317 }
1318
1319 return PyLong_FromSsize_t(textlen);
1320}
1321
1322/* Steal a reference to chars and store it in the decoded_char buffer;
1323 */
1324static void
1325textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1326{
1327 Py_CLEAR(self->decoded_chars);
1328 self->decoded_chars = chars;
1329 self->decoded_chars_used = 0;
1330}
1331
1332static PyObject *
1333textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1334{
1335 PyObject *chars;
1336 Py_ssize_t avail;
1337
1338 if (self->decoded_chars == NULL)
1339 return PyUnicode_FromStringAndSize(NULL, 0);
1340
1341 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1342 - self->decoded_chars_used);
1343
1344 assert(avail >= 0);
1345
1346 if (n < 0 || n > avail)
1347 n = avail;
1348
1349 if (self->decoded_chars_used > 0 || n < avail) {
1350 chars = PyUnicode_FromUnicode(
1351 PyUnicode_AS_UNICODE(self->decoded_chars)
1352 + self->decoded_chars_used, n);
1353 if (chars == NULL)
1354 return NULL;
1355 }
1356 else {
1357 chars = self->decoded_chars;
1358 Py_INCREF(chars);
1359 }
1360
1361 self->decoded_chars_used += n;
1362 return chars;
1363}
1364
1365/* Read and decode the next chunk of data from the BufferedReader.
1366 */
1367static int
1368textiowrapper_read_chunk(textio *self)
1369{
1370 PyObject *dec_buffer = NULL;
1371 PyObject *dec_flags = NULL;
1372 PyObject *input_chunk = NULL;
1373 PyObject *decoded_chars, *chunk_size;
1374 int eof;
1375
1376 /* The return value is True unless EOF was reached. The decoded string is
1377 * placed in self._decoded_chars (replacing its previous value). The
1378 * entire input chunk is sent to the decoder, though some of it may remain
1379 * buffered in the decoder, yet to be converted.
1380 */
1381
1382 if (self->decoder == NULL) {
1383 PyErr_SetString(PyExc_IOError, "not readable");
1384 return -1;
1385 }
1386
1387 if (self->telling) {
1388 /* To prepare for tell(), we need to snapshot a point in the file
1389 * where the decoder's input buffer is empty.
1390 */
1391
1392 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1393 _PyIO_str_getstate, NULL);
1394 if (state == NULL)
1395 return -1;
1396 /* Given this, we know there was a valid snapshot point
1397 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1398 */
1399 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1400 Py_DECREF(state);
1401 return -1;
1402 }
1403 Py_INCREF(dec_buffer);
1404 Py_INCREF(dec_flags);
1405 Py_DECREF(state);
1406 }
1407
1408 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1409 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1410 if (chunk_size == NULL)
1411 goto fail;
1412 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1413 _PyIO_str_read1, chunk_size, NULL);
1414 Py_DECREF(chunk_size);
1415 if (input_chunk == NULL)
1416 goto fail;
1417 assert(PyBytes_Check(input_chunk));
1418
1419 eof = (PyBytes_Size(input_chunk) == 0);
1420
1421 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1422 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1423 self->decoder, input_chunk, eof);
1424 }
1425 else {
1426 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1427 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1428 }
1429
1430 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1431 if (decoded_chars == NULL)
1432 goto fail;
1433 textiowrapper_set_decoded_chars(self, decoded_chars);
1434 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1435 eof = 0;
1436
1437 if (self->telling) {
1438 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1439 * next input to be decoded is dec_buffer + input_chunk.
1440 */
1441 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1442 if (next_input == NULL)
1443 goto fail;
1444 assert (PyBytes_Check(next_input));
1445 Py_DECREF(dec_buffer);
1446 Py_CLEAR(self->snapshot);
1447 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1448 }
1449 Py_DECREF(input_chunk);
1450
1451 return (eof == 0);
1452
1453 fail:
1454 Py_XDECREF(dec_buffer);
1455 Py_XDECREF(dec_flags);
1456 Py_XDECREF(input_chunk);
1457 return -1;
1458}
1459
1460static PyObject *
1461textiowrapper_read(textio *self, PyObject *args)
1462{
1463 Py_ssize_t n = -1;
1464 PyObject *result = NULL, *chunks = NULL;
1465
1466 CHECK_INITIALIZED(self);
1467
Benjamin Petersonddd392c2009-12-13 19:19:07 +00001468 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Antoine Pitrou19690592009-06-12 20:14:08 +00001469 return NULL;
1470
1471 CHECK_CLOSED(self);
1472
1473 if (self->decoder == NULL) {
1474 PyErr_SetString(PyExc_IOError, "not readable");
1475 return NULL;
1476 }
1477
1478 if (_textiowrapper_writeflush(self) < 0)
1479 return NULL;
1480
1481 if (n < 0) {
1482 /* Read everything */
1483 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1484 PyObject *decoded, *final;
1485 if (bytes == NULL)
1486 goto fail;
1487 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1488 bytes, Py_True, NULL);
1489 Py_DECREF(bytes);
1490 if (decoded == NULL)
1491 goto fail;
1492
1493 result = textiowrapper_get_decoded_chars(self, -1);
1494
1495 if (result == NULL) {
1496 Py_DECREF(decoded);
1497 return NULL;
1498 }
1499
1500 final = PyUnicode_Concat(result, decoded);
1501 Py_DECREF(result);
1502 Py_DECREF(decoded);
1503 if (final == NULL)
1504 goto fail;
1505
1506 Py_CLEAR(self->snapshot);
1507 return final;
1508 }
1509 else {
1510 int res = 1;
1511 Py_ssize_t remaining = n;
1512
1513 result = textiowrapper_get_decoded_chars(self, n);
1514 if (result == NULL)
1515 goto fail;
1516 remaining -= PyUnicode_GET_SIZE(result);
1517
1518 /* Keep reading chunks until we have n characters to return */
1519 while (remaining > 0) {
1520 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001521 if (res < 0) {
1522 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1523 when EINTR occurs so we needn't do it ourselves. */
1524 if (_PyIO_trap_eintr()) {
1525 continue;
1526 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001527 goto fail;
Gregory P. Smith99716162012-10-12 13:02:06 -07001528 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001529 if (res == 0) /* EOF */
1530 break;
1531 if (chunks == NULL) {
1532 chunks = PyList_New(0);
1533 if (chunks == NULL)
1534 goto fail;
1535 }
1536 if (PyList_Append(chunks, result) < 0)
1537 goto fail;
1538 Py_DECREF(result);
1539 result = textiowrapper_get_decoded_chars(self, remaining);
1540 if (result == NULL)
1541 goto fail;
1542 remaining -= PyUnicode_GET_SIZE(result);
1543 }
1544 if (chunks != NULL) {
1545 if (result != NULL && PyList_Append(chunks, result) < 0)
1546 goto fail;
1547 Py_CLEAR(result);
1548 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1549 if (result == NULL)
1550 goto fail;
1551 Py_CLEAR(chunks);
1552 }
1553 return result;
1554 }
1555 fail:
1556 Py_XDECREF(result);
1557 Py_XDECREF(chunks);
1558 return NULL;
1559}
1560
1561
1562/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1563 that is to the NUL character. Otherwise the function will produce
1564 incorrect results. */
1565static Py_UNICODE *
1566find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1567{
1568 Py_UNICODE *s = start;
1569 for (;;) {
1570 while (*s > ch)
1571 s++;
1572 if (*s == ch)
1573 return s;
1574 if (s == end)
1575 return NULL;
1576 s++;
1577 }
1578}
1579
1580Py_ssize_t
1581_PyIO_find_line_ending(
1582 int translated, int universal, PyObject *readnl,
1583 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1584{
1585 Py_ssize_t len = end - start;
1586
1587 if (translated) {
1588 /* Newlines are already translated, only search for \n */
1589 Py_UNICODE *pos = find_control_char(start, end, '\n');
1590 if (pos != NULL)
1591 return pos - start + 1;
1592 else {
1593 *consumed = len;
1594 return -1;
1595 }
1596 }
1597 else if (universal) {
1598 /* Universal newline search. Find any of \r, \r\n, \n
1599 * The decoder ensures that \r\n are not split in two pieces
1600 */
1601 Py_UNICODE *s = start;
1602 for (;;) {
1603 Py_UNICODE ch;
1604 /* Fast path for non-control chars. The loop always ends
1605 since the Py_UNICODE storage is NUL-terminated. */
1606 while (*s > '\r')
1607 s++;
1608 if (s >= end) {
1609 *consumed = len;
1610 return -1;
1611 }
1612 ch = *s++;
1613 if (ch == '\n')
1614 return s - start;
1615 if (ch == '\r') {
1616 if (*s == '\n')
1617 return s - start + 1;
1618 else
1619 return s - start;
1620 }
1621 }
1622 }
1623 else {
1624 /* Non-universal mode. */
1625 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1626 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1627 if (readnl_len == 1) {
1628 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1629 if (pos != NULL)
1630 return pos - start + 1;
1631 *consumed = len;
1632 return -1;
1633 }
1634 else {
1635 Py_UNICODE *s = start;
1636 Py_UNICODE *e = end - readnl_len + 1;
1637 Py_UNICODE *pos;
1638 if (e < s)
1639 e = s;
1640 while (s < e) {
1641 Py_ssize_t i;
1642 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1643 if (pos == NULL || pos >= e)
1644 break;
1645 for (i = 1; i < readnl_len; i++) {
1646 if (pos[i] != nl[i])
1647 break;
1648 }
1649 if (i == readnl_len)
1650 return pos - start + readnl_len;
1651 s = pos + 1;
1652 }
1653 pos = find_control_char(e, end, nl[0]);
1654 if (pos == NULL)
1655 *consumed = len;
1656 else
1657 *consumed = pos - start;
1658 return -1;
1659 }
1660 }
1661}
1662
1663static PyObject *
1664_textiowrapper_readline(textio *self, Py_ssize_t limit)
1665{
1666 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1667 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1668 int res;
1669
1670 CHECK_CLOSED(self);
1671
1672 if (_textiowrapper_writeflush(self) < 0)
1673 return NULL;
1674
1675 chunked = 0;
1676
1677 while (1) {
1678 Py_UNICODE *ptr;
1679 Py_ssize_t line_len;
1680 Py_ssize_t consumed = 0;
1681
1682 /* First, get some data if necessary */
1683 res = 1;
1684 while (!self->decoded_chars ||
1685 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1686 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001687 if (res < 0) {
1688 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1689 when EINTR occurs so we needn't do it ourselves. */
1690 if (_PyIO_trap_eintr()) {
1691 continue;
1692 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001693 goto error;
Gregory P. Smith99716162012-10-12 13:02:06 -07001694 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001695 if (res == 0)
1696 break;
1697 }
1698 if (res == 0) {
1699 /* end of file */
1700 textiowrapper_set_decoded_chars(self, NULL);
1701 Py_CLEAR(self->snapshot);
1702 start = endpos = offset_to_buffer = 0;
1703 break;
1704 }
1705
1706 if (remaining == NULL) {
1707 line = self->decoded_chars;
1708 start = self->decoded_chars_used;
1709 offset_to_buffer = 0;
1710 Py_INCREF(line);
1711 }
1712 else {
1713 assert(self->decoded_chars_used == 0);
1714 line = PyUnicode_Concat(remaining, self->decoded_chars);
1715 start = 0;
1716 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1717 Py_CLEAR(remaining);
1718 if (line == NULL)
1719 goto error;
1720 }
1721
1722 ptr = PyUnicode_AS_UNICODE(line);
1723 line_len = PyUnicode_GET_SIZE(line);
1724
1725 endpos = _PyIO_find_line_ending(
1726 self->readtranslate, self->readuniversal, self->readnl,
1727 ptr + start, ptr + line_len, &consumed);
1728 if (endpos >= 0) {
1729 endpos += start;
1730 if (limit >= 0 && (endpos - start) + chunked >= limit)
1731 endpos = start + limit - chunked;
1732 break;
1733 }
1734
1735 /* We can put aside up to `endpos` */
1736 endpos = consumed + start;
1737 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1738 /* Didn't find line ending, but reached length limit */
1739 endpos = start + limit - chunked;
1740 break;
1741 }
1742
1743 if (endpos > start) {
1744 /* No line ending seen yet - put aside current data */
1745 PyObject *s;
1746 if (chunks == NULL) {
1747 chunks = PyList_New(0);
1748 if (chunks == NULL)
1749 goto error;
1750 }
1751 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1752 if (s == NULL)
1753 goto error;
1754 if (PyList_Append(chunks, s) < 0) {
1755 Py_DECREF(s);
1756 goto error;
1757 }
1758 chunked += PyUnicode_GET_SIZE(s);
1759 Py_DECREF(s);
1760 }
1761 /* There may be some remaining bytes we'll have to prepend to the
1762 next chunk of data */
1763 if (endpos < line_len) {
1764 remaining = PyUnicode_FromUnicode(
1765 ptr + endpos, line_len - endpos);
1766 if (remaining == NULL)
1767 goto error;
1768 }
1769 Py_CLEAR(line);
1770 /* We have consumed the buffer */
1771 textiowrapper_set_decoded_chars(self, NULL);
1772 }
1773
1774 if (line != NULL) {
1775 /* Our line ends in the current buffer */
1776 self->decoded_chars_used = endpos - offset_to_buffer;
1777 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1778 if (start == 0 && Py_REFCNT(line) == 1) {
1779 if (PyUnicode_Resize(&line, endpos) < 0)
1780 goto error;
1781 }
1782 else {
1783 PyObject *s = PyUnicode_FromUnicode(
1784 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1785 Py_CLEAR(line);
1786 if (s == NULL)
1787 goto error;
1788 line = s;
1789 }
1790 }
1791 }
1792 if (remaining != NULL) {
1793 if (chunks == NULL) {
1794 chunks = PyList_New(0);
1795 if (chunks == NULL)
1796 goto error;
1797 }
1798 if (PyList_Append(chunks, remaining) < 0)
1799 goto error;
1800 Py_CLEAR(remaining);
1801 }
1802 if (chunks != NULL) {
1803 if (line != NULL && PyList_Append(chunks, line) < 0)
1804 goto error;
1805 Py_CLEAR(line);
1806 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1807 if (line == NULL)
1808 goto error;
1809 Py_DECREF(chunks);
1810 }
1811 if (line == NULL)
1812 line = PyUnicode_FromStringAndSize(NULL, 0);
1813
1814 return line;
1815
1816 error:
1817 Py_XDECREF(chunks);
1818 Py_XDECREF(remaining);
1819 Py_XDECREF(line);
1820 return NULL;
1821}
1822
1823static PyObject *
1824textiowrapper_readline(textio *self, PyObject *args)
1825{
1826 PyObject *limitobj = NULL;
1827 Py_ssize_t limit = -1;
1828
1829 CHECK_INITIALIZED(self);
1830 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1831 return NULL;
1832 }
1833 if (limitobj) {
1834 if (!PyNumber_Check(limitobj)) {
1835 PyErr_Format(PyExc_TypeError,
1836 "integer argument expected, got '%.200s'",
1837 Py_TYPE(limitobj)->tp_name);
1838 return NULL;
1839 }
1840 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1841 if (limit == -1 && PyErr_Occurred())
1842 return NULL;
1843 }
1844 return _textiowrapper_readline(self, limit);
1845}
1846
1847/* Seek and Tell */
1848
1849typedef struct {
1850 Py_off_t start_pos;
1851 int dec_flags;
1852 int bytes_to_feed;
1853 int chars_to_skip;
1854 char need_eof;
1855} cookie_type;
1856
1857/*
1858 To speed up cookie packing/unpacking, we store the fields in a temporary
1859 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1860 The following macros define at which offsets in the intermediary byte
1861 string the various CookieStruct fields will be stored.
1862 */
1863
1864#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1865
1866#if defined(WORDS_BIGENDIAN)
1867
1868# define IS_LITTLE_ENDIAN 0
1869
1870/* We want the least significant byte of start_pos to also be the least
1871 significant byte of the cookie, which means that in big-endian mode we
1872 must copy the fields in reverse order. */
1873
1874# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1875# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1876# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1877# define OFF_CHARS_TO_SKIP (sizeof(char))
1878# define OFF_NEED_EOF 0
1879
1880#else
1881
1882# define IS_LITTLE_ENDIAN 1
1883
1884/* Little-endian mode: the least significant byte of start_pos will
1885 naturally end up the least significant byte of the cookie. */
1886
1887# define OFF_START_POS 0
1888# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1889# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1890# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1891# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1892
1893#endif
1894
1895static int
1896textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1897{
1898 unsigned char buffer[COOKIE_BUF_LEN];
1899 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1900 if (cookieLong == NULL)
1901 return -1;
1902
1903 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1904 IS_LITTLE_ENDIAN, 0) < 0) {
1905 Py_DECREF(cookieLong);
1906 return -1;
1907 }
1908 Py_DECREF(cookieLong);
1909
1910 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1911 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1912 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1913 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1914 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1915
1916 return 0;
1917}
1918
1919static PyObject *
1920textiowrapper_build_cookie(cookie_type *cookie)
1921{
1922 unsigned char buffer[COOKIE_BUF_LEN];
1923
1924 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1925 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1926 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1927 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1928 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1929
1930 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1931}
1932#undef IS_LITTLE_ENDIAN
1933
1934static int
1935_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1936{
1937 PyObject *res;
1938 /* When seeking to the start of the stream, we call decoder.reset()
1939 rather than decoder.getstate().
1940 This is for a few decoders such as utf-16 for which the state value
1941 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1942 utf-16, that we are expecting a BOM).
1943 */
1944 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1945 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1946 else
1947 res = PyObject_CallMethod(self->decoder, "setstate",
1948 "((si))", "", cookie->dec_flags);
1949 if (res == NULL)
1950 return -1;
1951 Py_DECREF(res);
1952 return 0;
1953}
1954
1955static int
1956_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1957{
1958 PyObject *res;
1959 /* Same as _textiowrapper_decoder_setstate() above. */
1960 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1961 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1962 self->encoding_start_of_stream = 1;
1963 }
1964 else {
1965 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1966 _PyIO_zero, NULL);
1967 self->encoding_start_of_stream = 0;
1968 }
1969 if (res == NULL)
1970 return -1;
1971 Py_DECREF(res);
1972 return 0;
1973}
1974
1975static PyObject *
1976textiowrapper_seek(textio *self, PyObject *args)
1977{
1978 PyObject *cookieObj, *posobj;
1979 cookie_type cookie;
1980 int whence = 0;
1981 PyObject *res;
1982 int cmp;
1983
1984 CHECK_INITIALIZED(self);
1985
1986 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1987 return NULL;
1988 CHECK_CLOSED(self);
1989
1990 Py_INCREF(cookieObj);
1991
1992 if (!self->seekable) {
1993 PyErr_SetString(PyExc_IOError,
1994 "underlying stream is not seekable");
1995 goto fail;
1996 }
1997
1998 if (whence == 1) {
1999 /* seek relative to current position */
2000 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2001 if (cmp < 0)
2002 goto fail;
2003
2004 if (cmp == 0) {
2005 PyErr_SetString(PyExc_IOError,
2006 "can't do nonzero cur-relative seeks");
2007 goto fail;
2008 }
2009
2010 /* Seeking to the current position should attempt to
2011 * sync the underlying buffer with the current position.
2012 */
2013 Py_DECREF(cookieObj);
2014 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2015 if (cookieObj == NULL)
2016 goto fail;
2017 }
2018 else if (whence == 2) {
2019 /* seek relative to end of file */
2020
2021 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2022 if (cmp < 0)
2023 goto fail;
2024
2025 if (cmp == 0) {
2026 PyErr_SetString(PyExc_IOError,
2027 "can't do nonzero end-relative seeks");
2028 goto fail;
2029 }
2030
2031 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2032 if (res == NULL)
2033 goto fail;
2034 Py_DECREF(res);
2035
2036 textiowrapper_set_decoded_chars(self, NULL);
2037 Py_CLEAR(self->snapshot);
2038 if (self->decoder) {
2039 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2040 if (res == NULL)
2041 goto fail;
2042 Py_DECREF(res);
2043 }
2044
2045 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2046 Py_XDECREF(cookieObj);
2047 return res;
2048 }
2049 else if (whence != 0) {
2050 PyErr_Format(PyExc_ValueError,
2051 "invalid whence (%d, should be 0, 1 or 2)", whence);
2052 goto fail;
2053 }
2054
2055 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2056 if (cmp < 0)
2057 goto fail;
2058
2059 if (cmp == 1) {
2060 PyObject *repr = PyObject_Repr(cookieObj);
2061 if (repr != NULL) {
2062 PyErr_Format(PyExc_ValueError,
2063 "negative seek position %s",
2064 PyString_AS_STRING(repr));
2065 Py_DECREF(repr);
2066 }
2067 goto fail;
2068 }
2069
2070 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2071 if (res == NULL)
2072 goto fail;
2073 Py_DECREF(res);
2074
2075 /* The strategy of seek() is to go back to the safe start point
2076 * and replay the effect of read(chars_to_skip) from there.
2077 */
2078 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2079 goto fail;
2080
2081 /* Seek back to the safe start point. */
2082 posobj = PyLong_FromOff_t(cookie.start_pos);
2083 if (posobj == NULL)
2084 goto fail;
2085 res = PyObject_CallMethodObjArgs(self->buffer,
2086 _PyIO_str_seek, posobj, NULL);
2087 Py_DECREF(posobj);
2088 if (res == NULL)
2089 goto fail;
2090 Py_DECREF(res);
2091
2092 textiowrapper_set_decoded_chars(self, NULL);
2093 Py_CLEAR(self->snapshot);
2094
2095 /* Restore the decoder to its state from the safe start point. */
2096 if (self->decoder) {
2097 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2098 goto fail;
2099 }
2100
2101 if (cookie.chars_to_skip) {
2102 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2103 PyObject *input_chunk = PyObject_CallMethod(
2104 self->buffer, "read", "i", cookie.bytes_to_feed);
2105 PyObject *decoded;
2106
2107 if (input_chunk == NULL)
2108 goto fail;
2109
2110 assert (PyBytes_Check(input_chunk));
2111
2112 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2113 if (self->snapshot == NULL) {
2114 Py_DECREF(input_chunk);
2115 goto fail;
2116 }
2117
2118 decoded = PyObject_CallMethod(self->decoder, "decode",
2119 "Oi", input_chunk, (int)cookie.need_eof);
2120
2121 if (decoded == NULL)
2122 goto fail;
2123
2124 textiowrapper_set_decoded_chars(self, decoded);
2125
2126 /* Skip chars_to_skip of the decoded characters. */
2127 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2128 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2129 goto fail;
2130 }
2131 self->decoded_chars_used = cookie.chars_to_skip;
2132 }
2133 else {
2134 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2135 if (self->snapshot == NULL)
2136 goto fail;
2137 }
2138
2139 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2140 if (self->encoder) {
2141 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2142 goto fail;
2143 }
2144 return cookieObj;
2145 fail:
2146 Py_XDECREF(cookieObj);
2147 return NULL;
2148
2149}
2150
2151static PyObject *
2152textiowrapper_tell(textio *self, PyObject *args)
2153{
2154 PyObject *res;
2155 PyObject *posobj = NULL;
2156 cookie_type cookie = {0,0,0,0,0};
2157 PyObject *next_input;
2158 Py_ssize_t chars_to_skip, chars_decoded;
2159 PyObject *saved_state = NULL;
2160 char *input, *input_end;
2161
2162 CHECK_INITIALIZED(self);
2163 CHECK_CLOSED(self);
2164
2165 if (!self->seekable) {
2166 PyErr_SetString(PyExc_IOError,
2167 "underlying stream is not seekable");
2168 goto fail;
2169 }
2170 if (!self->telling) {
2171 PyErr_SetString(PyExc_IOError,
2172 "telling position disabled by next() call");
2173 goto fail;
2174 }
2175
2176 if (_textiowrapper_writeflush(self) < 0)
2177 return NULL;
2178 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2179 if (res == NULL)
2180 goto fail;
2181 Py_DECREF(res);
2182
2183 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2184 if (posobj == NULL)
2185 goto fail;
2186
2187 if (self->decoder == NULL || self->snapshot == NULL) {
2188 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2189 return posobj;
2190 }
2191
2192#if defined(HAVE_LARGEFILE_SUPPORT)
2193 cookie.start_pos = PyLong_AsLongLong(posobj);
2194#else
2195 cookie.start_pos = PyLong_AsLong(posobj);
2196#endif
2197 if (PyErr_Occurred())
2198 goto fail;
2199
2200 /* Skip backward to the snapshot point (see _read_chunk). */
2201 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2202 goto fail;
2203
2204 assert (PyBytes_Check(next_input));
2205
2206 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2207
2208 /* How many decoded characters have been used up since the snapshot? */
2209 if (self->decoded_chars_used == 0) {
2210 /* We haven't moved from the snapshot point. */
2211 Py_DECREF(posobj);
2212 return textiowrapper_build_cookie(&cookie);
2213 }
2214
2215 chars_to_skip = self->decoded_chars_used;
2216
2217 /* Starting from the snapshot position, we will walk the decoder
2218 * forward until it gives us enough decoded characters.
2219 */
2220 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2221 _PyIO_str_getstate, NULL);
2222 if (saved_state == NULL)
2223 goto fail;
2224
2225 /* Note our initial start point. */
2226 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2227 goto fail;
2228
2229 /* Feed the decoder one byte at a time. As we go, note the
2230 * nearest "safe start point" before the current location
2231 * (a point where the decoder has nothing buffered, so seek()
2232 * can safely start from there and advance to this location).
2233 */
2234 chars_decoded = 0;
2235 input = PyBytes_AS_STRING(next_input);
2236 input_end = input + PyBytes_GET_SIZE(next_input);
2237 while (input < input_end) {
2238 PyObject *state;
2239 char *dec_buffer;
2240 Py_ssize_t dec_buffer_len;
2241 int dec_flags;
2242
2243 PyObject *decoded = PyObject_CallMethod(
2244 self->decoder, "decode", "s#", input, 1);
2245 if (decoded == NULL)
2246 goto fail;
2247 assert (PyUnicode_Check(decoded));
2248 chars_decoded += PyUnicode_GET_SIZE(decoded);
2249 Py_DECREF(decoded);
2250
2251 cookie.bytes_to_feed += 1;
2252
2253 state = PyObject_CallMethodObjArgs(self->decoder,
2254 _PyIO_str_getstate, NULL);
2255 if (state == NULL)
2256 goto fail;
2257 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2258 Py_DECREF(state);
2259 goto fail;
2260 }
2261 Py_DECREF(state);
2262
2263 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2264 /* Decoder buffer is empty, so this is a safe start point. */
2265 cookie.start_pos += cookie.bytes_to_feed;
2266 chars_to_skip -= chars_decoded;
2267 cookie.dec_flags = dec_flags;
2268 cookie.bytes_to_feed = 0;
2269 chars_decoded = 0;
2270 }
2271 if (chars_decoded >= chars_to_skip)
2272 break;
2273 input++;
2274 }
2275 if (input == input_end) {
2276 /* We didn't get enough decoded data; signal EOF to get more. */
2277 PyObject *decoded = PyObject_CallMethod(
2278 self->decoder, "decode", "si", "", /* final = */ 1);
2279 if (decoded == NULL)
2280 goto fail;
2281 assert (PyUnicode_Check(decoded));
2282 chars_decoded += PyUnicode_GET_SIZE(decoded);
2283 Py_DECREF(decoded);
2284 cookie.need_eof = 1;
2285
2286 if (chars_decoded < chars_to_skip) {
2287 PyErr_SetString(PyExc_IOError,
2288 "can't reconstruct logical file position");
2289 goto fail;
2290 }
2291 }
2292
2293 /* finally */
2294 Py_XDECREF(posobj);
2295 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2296 Py_DECREF(saved_state);
2297 if (res == NULL)
2298 return NULL;
2299 Py_DECREF(res);
2300
2301 /* The returned cookie corresponds to the last safe start point. */
2302 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2303 return textiowrapper_build_cookie(&cookie);
2304
2305 fail:
2306 Py_XDECREF(posobj);
2307 if (saved_state) {
2308 PyObject *type, *value, *traceback;
2309 PyErr_Fetch(&type, &value, &traceback);
2310
2311 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2312 Py_DECREF(saved_state);
2313 if (res == NULL)
2314 return NULL;
2315 Py_DECREF(res);
2316
2317 PyErr_Restore(type, value, traceback);
2318 }
2319 return NULL;
2320}
2321
2322static PyObject *
2323textiowrapper_truncate(textio *self, PyObject *args)
2324{
2325 PyObject *pos = Py_None;
2326 PyObject *res;
2327
2328 CHECK_INITIALIZED(self)
2329 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2330 return NULL;
2331 }
2332
2333 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2334 if (res == NULL)
2335 return NULL;
2336 Py_DECREF(res);
2337
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00002338 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Antoine Pitrou19690592009-06-12 20:14:08 +00002339}
2340
2341static PyObject *
2342textiowrapper_repr(textio *self)
2343{
2344 PyObject *nameobj, *res;
2345 PyObject *namerepr = NULL, *encrepr = NULL;
2346
2347 CHECK_INITIALIZED(self);
2348
2349 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2350 if (nameobj == NULL) {
2351 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2352 PyErr_Clear();
2353 else
2354 goto error;
2355 encrepr = PyObject_Repr(self->encoding);
2356 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2357 PyString_AS_STRING(encrepr));
2358 }
2359 else {
2360 encrepr = PyObject_Repr(self->encoding);
2361 namerepr = PyObject_Repr(nameobj);
2362 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2363 PyString_AS_STRING(namerepr),
2364 PyString_AS_STRING(encrepr));
2365 Py_DECREF(nameobj);
2366 }
2367 Py_XDECREF(namerepr);
2368 Py_XDECREF(encrepr);
2369 return res;
2370
2371error:
2372 Py_XDECREF(namerepr);
2373 Py_XDECREF(encrepr);
2374 return NULL;
2375}
2376
2377
2378/* Inquiries */
2379
2380static PyObject *
2381textiowrapper_fileno(textio *self, PyObject *args)
2382{
2383 CHECK_INITIALIZED(self);
2384 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2385}
2386
2387static PyObject *
2388textiowrapper_seekable(textio *self, PyObject *args)
2389{
2390 CHECK_INITIALIZED(self);
2391 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2392}
2393
2394static PyObject *
2395textiowrapper_readable(textio *self, PyObject *args)
2396{
2397 CHECK_INITIALIZED(self);
2398 return PyObject_CallMethod(self->buffer, "readable", NULL);
2399}
2400
2401static PyObject *
2402textiowrapper_writable(textio *self, PyObject *args)
2403{
2404 CHECK_INITIALIZED(self);
2405 return PyObject_CallMethod(self->buffer, "writable", NULL);
2406}
2407
2408static PyObject *
2409textiowrapper_isatty(textio *self, PyObject *args)
2410{
2411 CHECK_INITIALIZED(self);
2412 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2413}
2414
2415static PyObject *
2416textiowrapper_flush(textio *self, PyObject *args)
2417{
2418 CHECK_INITIALIZED(self);
2419 CHECK_CLOSED(self);
2420 self->telling = self->seekable;
2421 if (_textiowrapper_writeflush(self) < 0)
2422 return NULL;
2423 return PyObject_CallMethod(self->buffer, "flush", NULL);
2424}
2425
2426static PyObject *
2427textiowrapper_close(textio *self, PyObject *args)
2428{
2429 PyObject *res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002430 int r;
Antoine Pitrou19690592009-06-12 20:14:08 +00002431 CHECK_INITIALIZED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002432
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002433 res = textiowrapper_closed_get(self, NULL);
2434 if (res == NULL)
2435 return NULL;
2436 r = PyObject_IsTrue(res);
2437 Py_DECREF(res);
2438 if (r < 0)
2439 return NULL;
2440
2441 if (r > 0) {
2442 Py_RETURN_NONE; /* stream already closed */
2443 }
2444 else {
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002445 PyObject *exc = NULL, *val, *tb;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002446 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002447 if (res == NULL)
2448 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002449 else
2450 Py_DECREF(res);
2451
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002452 res = PyObject_CallMethod(self->buffer, "close", NULL);
2453 if (exc != NULL) {
2454 if (res != NULL) {
2455 Py_CLEAR(res);
2456 PyErr_Restore(exc, val, tb);
2457 }
2458 else {
2459 Py_DECREF(exc);
2460 Py_XDECREF(val);
2461 Py_XDECREF(tb);
2462 }
2463 }
2464 return res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002465 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002466}
2467
2468static PyObject *
2469textiowrapper_iternext(textio *self)
2470{
2471 PyObject *line;
2472
2473 CHECK_INITIALIZED(self);
2474
2475 self->telling = 0;
2476 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2477 /* Skip method call overhead for speed */
2478 line = _textiowrapper_readline(self, -1);
2479 }
2480 else {
2481 line = PyObject_CallMethodObjArgs((PyObject *)self,
2482 _PyIO_str_readline, NULL);
2483 if (line && !PyUnicode_Check(line)) {
2484 PyErr_Format(PyExc_IOError,
2485 "readline() should have returned an str object, "
2486 "not '%.200s'", Py_TYPE(line)->tp_name);
2487 Py_DECREF(line);
2488 return NULL;
2489 }
2490 }
2491
2492 if (line == NULL)
2493 return NULL;
2494
2495 if (PyUnicode_GET_SIZE(line) == 0) {
2496 /* Reached EOF or would have blocked */
2497 Py_DECREF(line);
2498 Py_CLEAR(self->snapshot);
2499 self->telling = self->seekable;
2500 return NULL;
2501 }
2502
2503 return line;
2504}
2505
2506static PyObject *
2507textiowrapper_name_get(textio *self, void *context)
2508{
2509 CHECK_INITIALIZED(self);
2510 return PyObject_GetAttrString(self->buffer, "name");
2511}
2512
2513static PyObject *
2514textiowrapper_closed_get(textio *self, void *context)
2515{
2516 CHECK_INITIALIZED(self);
2517 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2518}
2519
2520static PyObject *
2521textiowrapper_newlines_get(textio *self, void *context)
2522{
2523 PyObject *res;
2524 CHECK_INITIALIZED(self);
2525 if (self->decoder == NULL)
2526 Py_RETURN_NONE;
2527 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2528 if (res == NULL) {
2529 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2530 PyErr_Clear();
2531 Py_RETURN_NONE;
2532 }
2533 else {
2534 return NULL;
2535 }
2536 }
2537 return res;
2538}
2539
2540static PyObject *
2541textiowrapper_errors_get(textio *self, void *context)
2542{
2543 CHECK_INITIALIZED(self);
2544 Py_INCREF(self->errors);
2545 return self->errors;
2546}
2547
2548static PyObject *
2549textiowrapper_chunk_size_get(textio *self, void *context)
2550{
2551 CHECK_INITIALIZED(self);
2552 return PyLong_FromSsize_t(self->chunk_size);
2553}
2554
2555static int
2556textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2557{
2558 Py_ssize_t n;
2559 CHECK_INITIALIZED_INT(self);
2560 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2561 if (n == -1 && PyErr_Occurred())
2562 return -1;
2563 if (n <= 0) {
2564 PyErr_SetString(PyExc_ValueError,
2565 "a strictly positive integer is required");
2566 return -1;
2567 }
2568 self->chunk_size = n;
2569 return 0;
2570}
2571
2572static PyMethodDef textiowrapper_methods[] = {
2573 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2574 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2575 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2576 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2577 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2578 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2579
2580 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2581 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2582 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2583 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2584 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2585
2586 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2587 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2588 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2589 {NULL, NULL}
2590};
2591
2592static PyMemberDef textiowrapper_members[] = {
2593 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2594 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2595 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2596 {NULL}
2597};
2598
2599static PyGetSetDef textiowrapper_getset[] = {
2600 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2601 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2602/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2603*/
2604 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2605 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2606 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2607 (setter)textiowrapper_chunk_size_set, NULL},
2608 {NULL}
2609};
2610
2611PyTypeObject PyTextIOWrapper_Type = {
2612 PyVarObject_HEAD_INIT(NULL, 0)
2613 "_io.TextIOWrapper", /*tp_name*/
2614 sizeof(textio), /*tp_basicsize*/
2615 0, /*tp_itemsize*/
2616 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2617 0, /*tp_print*/
2618 0, /*tp_getattr*/
2619 0, /*tps_etattr*/
2620 0, /*tp_compare */
2621 (reprfunc)textiowrapper_repr,/*tp_repr*/
2622 0, /*tp_as_number*/
2623 0, /*tp_as_sequence*/
2624 0, /*tp_as_mapping*/
2625 0, /*tp_hash */
2626 0, /*tp_call*/
2627 0, /*tp_str*/
2628 0, /*tp_getattro*/
2629 0, /*tp_setattro*/
2630 0, /*tp_as_buffer*/
2631 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2632 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2633 textiowrapper_doc, /* tp_doc */
2634 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2635 (inquiry)textiowrapper_clear, /* tp_clear */
2636 0, /* tp_richcompare */
2637 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2638 0, /* tp_iter */
2639 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2640 textiowrapper_methods, /* tp_methods */
2641 textiowrapper_members, /* tp_members */
2642 textiowrapper_getset, /* tp_getset */
2643 0, /* tp_base */
2644 0, /* tp_dict */
2645 0, /* tp_descr_get */
2646 0, /* tp_descr_set */
2647 offsetof(textio, dict), /*tp_dictoffset*/
2648 (initproc)textiowrapper_init, /* tp_init */
2649 0, /* tp_alloc */
2650 PyType_GenericNew, /* tp_new */
2651};