blob: 145f8eaf9c1cc17296244d22da1b8c5c7228a695 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(TextIOBase_read_doc,
32 "Read at most n characters from stream.\n"
33 "\n"
34 "Read from underlying buffer until we have n characters or we hit EOF.\n"
35 "If n is negative or omitted, read until EOF.\n"
36 );
37
38static PyObject *
39TextIOBase_read(PyObject *self, PyObject *args)
40{
41 return _unsupported("read");
42}
43
44PyDoc_STRVAR(TextIOBase_readline_doc,
45 "Read until newline or EOF.\n"
46 "\n"
47 "Returns an empty string if EOF is hit immediately.\n"
48 );
49
50static PyObject *
51TextIOBase_readline(PyObject *self, PyObject *args)
52{
53 return _unsupported("readline");
54}
55
56PyDoc_STRVAR(TextIOBase_write_doc,
57 "Write string to stream.\n"
58 "Returns the number of characters written (which is always equal to\n"
59 "the length of the string).\n"
60 );
61
62static PyObject *
63TextIOBase_write(PyObject *self, PyObject *args)
64{
65 return _unsupported("write");
66}
67
68PyDoc_STRVAR(TextIOBase_encoding_doc,
69 "Encoding of the text stream.\n"
70 "\n"
71 "Subclasses should override.\n"
72 );
73
74static PyObject *
75TextIOBase_encoding_get(PyObject *self, void *context)
76{
77 Py_RETURN_NONE;
78}
79
80PyDoc_STRVAR(TextIOBase_newlines_doc,
81 "Line endings translated so far.\n"
82 "\n"
83 "Only line endings translated during reading are considered.\n"
84 "\n"
85 "Subclasses should override.\n"
86 );
87
88static PyObject *
89TextIOBase_newlines_get(PyObject *self, void *context)
90{
91 Py_RETURN_NONE;
92}
93
94
95static PyMethodDef TextIOBase_methods[] = {
96 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
97 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
98 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
99 {NULL, NULL}
100};
101
102static PyGetSetDef TextIOBase_getset[] = {
103 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
104 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
105 {0}
106};
107
108PyTypeObject PyTextIOBase_Type = {
109 PyVarObject_HEAD_INIT(NULL, 0)
110 "_io._TextIOBase", /*tp_name*/
111 0, /*tp_basicsize*/
112 0, /*tp_itemsize*/
113 0, /*tp_dealloc*/
114 0, /*tp_print*/
115 0, /*tp_getattr*/
116 0, /*tp_setattr*/
117 0, /*tp_compare */
118 0, /*tp_repr*/
119 0, /*tp_as_number*/
120 0, /*tp_as_sequence*/
121 0, /*tp_as_mapping*/
122 0, /*tp_hash */
123 0, /*tp_call*/
124 0, /*tp_str*/
125 0, /*tp_getattro*/
126 0, /*tp_setattro*/
127 0, /*tp_as_buffer*/
128 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
129 TextIOBase_doc, /* tp_doc */
130 0, /* tp_traverse */
131 0, /* tp_clear */
132 0, /* tp_richcompare */
133 0, /* tp_weaklistoffset */
134 0, /* tp_iter */
135 0, /* tp_iternext */
136 TextIOBase_methods, /* tp_methods */
137 0, /* tp_members */
138 TextIOBase_getset, /* tp_getset */
139 &PyIOBase_Type, /* tp_base */
140 0, /* tp_dict */
141 0, /* tp_descr_get */
142 0, /* tp_descr_set */
143 0, /* tp_dictoffset */
144 0, /* tp_init */
145 0, /* tp_alloc */
146 0, /* tp_new */
147};
148
149
150/* IncrementalNewlineDecoder */
151
152PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
153 "Codec used when reading a file in universal newlines mode. It wraps\n"
154 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
155 "records the types of newlines encountered. When used with\n"
156 "translate=False, it ensures that the newline sequence is returned in\n"
157 "one piece. When used with decoder=None, it expects unicode strings as\n"
158 "decode input and translates newlines without first invoking an external\n"
159 "decoder.\n"
160 );
161
162typedef struct {
163 PyObject_HEAD
164 PyObject *decoder;
165 PyObject *errors;
166 int pendingcr:1;
167 int translate:1;
168 unsigned int seennl:3;
169} PyNewLineDecoderObject;
170
171static int
172IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
173 PyObject *args, PyObject *kwds)
174{
175 PyObject *decoder;
176 int translate;
177 PyObject *errors = NULL;
178 char *kwlist[] = {"decoder", "translate", "errors", NULL};
179
180 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
181 kwlist, &decoder, &translate, &errors))
182 return -1;
183
184 self->decoder = decoder;
185 Py_INCREF(decoder);
186
187 if (errors == NULL) {
188 self->errors = PyUnicode_FromString("strict");
189 if (self->errors == NULL)
190 return -1;
191 }
192 else {
193 Py_INCREF(errors);
194 self->errors = errors;
195 }
196
197 self->translate = translate;
198 self->seennl = 0;
199 self->pendingcr = 0;
200
201 return 0;
202}
203
204static void
205IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
206{
207 Py_CLEAR(self->decoder);
208 Py_CLEAR(self->errors);
209 Py_TYPE(self)->tp_free((PyObject *)self);
210}
211
212#define SEEN_CR 1
213#define SEEN_LF 2
214#define SEEN_CRLF 4
215#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
216
217PyObject *
218_PyIncrementalNewlineDecoder_decode(PyObject *_self,
219 PyObject *input, int final)
220{
221 PyObject *output;
222 Py_ssize_t output_len;
223 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
224
225 if (self->decoder == NULL) {
226 PyErr_SetString(PyExc_ValueError,
227 "IncrementalNewlineDecoder.__init__ not called");
228 return NULL;
229 }
230
231 /* decode input (with the eventual \r from a previous pass) */
232 if (self->decoder != Py_None) {
233 output = PyObject_CallMethodObjArgs(self->decoder,
234 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
235 }
236 else {
237 output = input;
238 Py_INCREF(output);
239 }
240
241 if (output == NULL)
242 return NULL;
243
244 if (!PyUnicode_Check(output)) {
245 PyErr_SetString(PyExc_TypeError,
246 "decoder should return a string result");
247 goto error;
248 }
249
250 output_len = PyUnicode_GET_SIZE(output);
251 if (self->pendingcr && (final || output_len > 0)) {
252 Py_UNICODE *out;
253 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
254 if (modified == NULL)
255 goto error;
256 out = PyUnicode_AS_UNICODE(modified);
257 out[0] = '\r';
258 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
259 output_len * sizeof(Py_UNICODE));
260 Py_DECREF(output);
261 output = modified;
262 self->pendingcr = 0;
263 output_len++;
264 }
265
266 /* retain last \r even when not translating data:
267 * then readline() is sure to get \r\n in one pass
268 */
269 if (!final) {
270 if (output_len > 0
271 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
272
273 if (Py_REFCNT(output) == 1) {
274 if (PyUnicode_Resize(&output, output_len - 1) < 0)
275 goto error;
276 }
277 else {
278 PyObject *modified = PyUnicode_FromUnicode(
279 PyUnicode_AS_UNICODE(output),
280 output_len - 1);
281 if (modified == NULL)
282 goto error;
283 Py_DECREF(output);
284 output = modified;
285 }
286 self->pendingcr = 1;
287 }
288 }
289
290 /* Record which newlines are read and do newline translation if desired,
291 all in one pass. */
292 {
293 Py_UNICODE *in_str;
294 Py_ssize_t len;
295 int seennl = self->seennl;
296 int only_lf = 0;
297
298 in_str = PyUnicode_AS_UNICODE(output);
299 len = PyUnicode_GET_SIZE(output);
300
301 if (len == 0)
302 return output;
303
304 /* If, up to now, newlines are consistently \n, do a quick check
305 for the \r *byte* with the libc's optimized memchr.
306 */
307 if (seennl == SEEN_LF || seennl == 0) {
308 int has_cr, has_lf;
309 has_lf = (seennl == SEEN_LF) ||
310 (memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL);
311 has_cr = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) != NULL);
312 if (has_lf && !has_cr) {
313 only_lf = 1;
314 seennl = SEEN_LF;
315 }
316 }
317
318 if (!self->translate) {
319 Py_UNICODE *s, *end;
320 if (seennl == SEEN_ALL)
321 goto endscan;
322 if (only_lf)
323 goto endscan;
324 s = in_str;
325 end = in_str + len;
326 for (;;) {
327 Py_UNICODE c;
328 /* Fast loop for non-control characters */
329 while (*s > '\r')
330 s++;
331 c = *s++;
332 if (c == '\n')
333 seennl |= SEEN_LF;
334 else if (c == '\r') {
335 if (*s == '\n') {
336 seennl |= SEEN_CRLF;
337 s++;
338 }
339 else
340 seennl |= SEEN_CR;
341 }
342 if (s > end)
343 break;
344 if (seennl == SEEN_ALL)
345 break;
346 }
347 endscan:
348 ;
349 }
350 else if (!only_lf) {
351 PyObject *translated = NULL;
352 Py_UNICODE *out_str;
353 Py_UNICODE *in, *out, *end;
354 if (Py_REFCNT(output) != 1) {
355 /* We could try to optimize this so that we only do a copy
356 when there is something to translate. On the other hand,
357 most decoders should only output non-shared strings, i.e.
358 translation is done in place. */
359 translated = PyUnicode_FromUnicode(NULL, len);
360 if (translated == NULL)
361 goto error;
362 assert(Py_REFCNT(translated) == 1);
363 memcpy(PyUnicode_AS_UNICODE(translated),
364 PyUnicode_AS_UNICODE(output),
365 len * sizeof(Py_UNICODE));
366 }
367 else {
368 translated = output;
369 }
370 out_str = PyUnicode_AS_UNICODE(translated);
371 in = in_str;
372 out = out_str;
373 end = in_str + len;
374 for (;;) {
375 Py_UNICODE c;
376 /* Fast loop for non-control characters */
377 while ((c = *in++) > '\r')
378 *out++ = c;
379 if (c == '\n') {
380 *out++ = c;
381 seennl |= SEEN_LF;
382 continue;
383 }
384 if (c == '\r') {
385 if (*in == '\n') {
386 in++;
387 seennl |= SEEN_CRLF;
388 }
389 else
390 seennl |= SEEN_CR;
391 *out++ = '\n';
392 continue;
393 }
394 if (in > end)
395 break;
396 *out++ = c;
397 }
398 if (translated != output) {
399 Py_DECREF(output);
400 output = translated;
401 }
402 if (out - out_str != len) {
403 if (PyUnicode_Resize(&output, out - out_str) < 0)
404 goto error;
405 }
406 }
407 self->seennl |= seennl;
408 }
409
410 return output;
411
412 error:
413 Py_DECREF(output);
414 return NULL;
415}
416
417static PyObject *
418IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
419 PyObject *args, PyObject *kwds)
420{
421 char *kwlist[] = {"input", "final", NULL};
422 PyObject *input;
423 int final = 0;
424
425 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
426 kwlist, &input, &final))
427 return NULL;
428 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
429}
430
431static PyObject *
432IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
433{
434 PyObject *buffer;
435 unsigned PY_LONG_LONG flag;
436
437 if (self->decoder != Py_None) {
438 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
439 _PyIO_str_getstate, NULL);
440 if (state == NULL)
441 return NULL;
442 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
443 Py_DECREF(state);
444 return NULL;
445 }
446 Py_INCREF(buffer);
447 Py_DECREF(state);
448 }
449 else {
450 buffer = PyBytes_FromString("");
451 flag = 0;
452 }
453 flag <<= 1;
454 if (self->pendingcr)
455 flag |= 1;
456 return Py_BuildValue("NK", buffer, flag);
457}
458
459static PyObject *
460IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
461{
462 PyObject *buffer;
463 unsigned PY_LONG_LONG flag;
464
465 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
466 return NULL;
467
468 self->pendingcr = (int) flag & 1;
469 flag >>= 1;
470
471 if (self->decoder != Py_None)
472 return PyObject_CallMethod(self->decoder,
473 "setstate", "((OK))", buffer, flag);
474 else
475 Py_RETURN_NONE;
476}
477
478static PyObject *
479IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
480{
481 self->seennl = 0;
482 self->pendingcr = 0;
483 if (self->decoder != Py_None)
484 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
485 else
486 Py_RETURN_NONE;
487}
488
489static PyObject *
490IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
491{
492 switch (self->seennl) {
493 case SEEN_CR:
494 return PyUnicode_FromString("\r");
495 case SEEN_LF:
496 return PyUnicode_FromString("\n");
497 case SEEN_CRLF:
498 return PyUnicode_FromString("\r\n");
499 case SEEN_CR | SEEN_LF:
500 return Py_BuildValue("ss", "\r", "\n");
501 case SEEN_CR | SEEN_CRLF:
502 return Py_BuildValue("ss", "\r", "\r\n");
503 case SEEN_LF | SEEN_CRLF:
504 return Py_BuildValue("ss", "\n", "\r\n");
505 case SEEN_CR | SEEN_LF | SEEN_CRLF:
506 return Py_BuildValue("sss", "\r", "\n", "\r\n");
507 default:
508 Py_RETURN_NONE;
509 }
510
511}
512
513
514static PyMethodDef IncrementalNewlineDecoder_methods[] = {
515 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
516 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
517 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
518 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
519 {0}
520};
521
522static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
523 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
524 {0}
525};
526
527PyTypeObject PyIncrementalNewlineDecoder_Type = {
528 PyVarObject_HEAD_INIT(NULL, 0)
529 "_io.IncrementalNewlineDecoder", /*tp_name*/
530 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
531 0, /*tp_itemsize*/
532 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
533 0, /*tp_print*/
534 0, /*tp_getattr*/
535 0, /*tp_setattr*/
536 0, /*tp_compare */
537 0, /*tp_repr*/
538 0, /*tp_as_number*/
539 0, /*tp_as_sequence*/
540 0, /*tp_as_mapping*/
541 0, /*tp_hash */
542 0, /*tp_call*/
543 0, /*tp_str*/
544 0, /*tp_getattro*/
545 0, /*tp_setattro*/
546 0, /*tp_as_buffer*/
547 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
548 IncrementalNewlineDecoder_doc, /* tp_doc */
549 0, /* tp_traverse */
550 0, /* tp_clear */
551 0, /* tp_richcompare */
552 0, /*tp_weaklistoffset*/
553 0, /* tp_iter */
554 0, /* tp_iternext */
555 IncrementalNewlineDecoder_methods, /* tp_methods */
556 0, /* tp_members */
557 IncrementalNewlineDecoder_getset, /* tp_getset */
558 0, /* tp_base */
559 0, /* tp_dict */
560 0, /* tp_descr_get */
561 0, /* tp_descr_set */
562 0, /* tp_dictoffset */
563 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
564 0, /* tp_alloc */
565 PyType_GenericNew, /* tp_new */
566};
567
568
569/* TextIOWrapper */
570
571PyDoc_STRVAR(TextIOWrapper_doc,
572 "Character and line based layer over a BufferedIOBase object, buffer.\n"
573 "\n"
574 "encoding gives the name of the encoding that the stream will be\n"
575 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
576 "\n"
577 "errors determines the strictness of encoding and decoding (see the\n"
578 "codecs.register) and defaults to \"strict\".\n"
579 "\n"
580 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
581 "handling of line endings. If it is None, universal newlines is\n"
582 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
583 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
584 "caller. Conversely, on output, '\\n' is translated to the system\n"
585 "default line seperator, os.linesep. If newline is any other of its\n"
586 "legal values, that newline becomes the newline when the file is read\n"
587 "and it is returned untranslated. On output, '\\n' is converted to the\n"
588 "newline.\n"
589 "\n"
590 "If line_buffering is True, a call to flush is implied when a call to\n"
591 "write contains a newline character."
592 );
593
594typedef PyObject *
595 (*encodefunc_t)(PyObject *, PyObject *);
596
597typedef struct
598{
599 PyObject_HEAD
600 int ok; /* initialized? */
601 Py_ssize_t chunk_size;
602 PyObject *buffer;
603 PyObject *encoding;
604 PyObject *encoder;
605 PyObject *decoder;
606 PyObject *readnl;
607 PyObject *errors;
608 const char *writenl; /* utf-8 encoded, NULL stands for \n */
609 char line_buffering;
610 char readuniversal;
611 char readtranslate;
612 char writetranslate;
613 char seekable;
614 char telling;
615 /* Specialized encoding func (see below) */
616 encodefunc_t encodefunc;
617
618 /* Reads and writes are internally buffered in order to speed things up.
619 However, any read will first flush the write buffer if itsn't empty.
620
621 Please also note that text to be written is first encoded before being
622 buffered. This is necessary so that encoding errors are immediately
623 reported to the caller, but it unfortunately means that the
624 IncrementalEncoder (whose encode() method is always written in Python)
625 becomes a bottleneck for small writes.
626 */
627 PyObject *decoded_chars; /* buffer for text returned from decoder */
628 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
629 PyObject *pending_bytes; /* list of bytes objects waiting to be
630 written, or NULL */
631 Py_ssize_t pending_bytes_count;
632 PyObject *snapshot;
633 /* snapshot is either None, or a tuple (dec_flags, next_input) where
634 * dec_flags is the second (integer) item of the decoder state and
635 * next_input is the chunk of input bytes that comes next after the
636 * snapshot point. We use this to reconstruct decoder states in tell().
637 */
638
639 /* Cache raw object if it's a FileIO object */
640 PyObject *raw;
641
642 PyObject *weakreflist;
643 PyObject *dict;
644} PyTextIOWrapperObject;
645
646
647/* A couple of specialized cases in order to bypass the slow incremental
648 encoding methods for the most popular encodings. */
649
650static PyObject *
651ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
652{
653 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
654 PyUnicode_GET_SIZE(text),
655 PyBytes_AS_STRING(self->errors));
656}
657
658static PyObject *
659utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
660{
661 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
662 PyUnicode_GET_SIZE(text),
663 PyBytes_AS_STRING(self->errors), 1);
664}
665
666static PyObject *
667utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
668{
669 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
670 PyUnicode_GET_SIZE(text),
671 PyBytes_AS_STRING(self->errors), -1);
672}
673
674static PyObject *
675utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
676{
677 PyObject *res;
678 res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
679 PyUnicode_GET_SIZE(text),
680 PyBytes_AS_STRING(self->errors), 0);
681 if (res == NULL)
682 return NULL;
683 /* Next writes will skip the BOM and use native byte ordering */
684#if defined(WORDS_BIGENDIAN)
685 self->encodefunc = (encodefunc_t) utf16be_encode;
686#else
687 self->encodefunc = (encodefunc_t) utf16le_encode;
688#endif
689 return res;
690}
691
692
693static PyObject *
694utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
695{
696 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
697 PyUnicode_GET_SIZE(text),
698 PyBytes_AS_STRING(self->errors));
699}
700
701static PyObject *
702latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
703{
704 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
705 PyUnicode_GET_SIZE(text),
706 PyBytes_AS_STRING(self->errors));
707}
708
709/* Map normalized encoding names onto the specialized encoding funcs */
710
711typedef struct {
712 const char *name;
713 encodefunc_t encodefunc;
714} encodefuncentry;
715
716encodefuncentry encodefuncs[] = {
717 {"ascii", (encodefunc_t) ascii_encode},
718 {"iso8859-1", (encodefunc_t) latin1_encode},
719 {"utf-16-be", (encodefunc_t) utf16be_encode},
720 {"utf-16-le", (encodefunc_t) utf16le_encode},
721 {"utf-16", (encodefunc_t) utf16_encode},
722 {"utf-8", (encodefunc_t) utf8_encode},
723 {NULL, NULL}
724};
725
726
727static int
728TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
729{
730 char *kwlist[] = {"buffer", "encoding", "errors",
731 "newline", "line_buffering",
732 NULL};
733 PyObject *buffer, *raw;
734 char *encoding = NULL;
735 char *errors = NULL;
736 char *newline = NULL;
737 int line_buffering = 0;
738 _PyIO_State *state = IO_STATE;
739
740 PyObject *res;
741 int r;
742
743 self->ok = 0;
744 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
745 kwlist, &buffer, &encoding, &errors,
746 &newline, &line_buffering))
747 return -1;
748
749 if (newline && newline[0] != '\0'
750 && !(newline[0] == '\n' && newline[1] == '\0')
751 && !(newline[0] == '\r' && newline[1] == '\0')
752 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
753 PyErr_Format(PyExc_ValueError,
754 "illegal newline value: %s", newline);
755 return -1;
756 }
757
758 Py_CLEAR(self->buffer);
759 Py_CLEAR(self->encoding);
760 Py_CLEAR(self->encoder);
761 Py_CLEAR(self->decoder);
762 Py_CLEAR(self->readnl);
763 Py_CLEAR(self->decoded_chars);
764 Py_CLEAR(self->pending_bytes);
765 Py_CLEAR(self->snapshot);
766 Py_CLEAR(self->errors);
767 Py_CLEAR(self->raw);
768 self->decoded_chars_used = 0;
769 self->pending_bytes_count = 0;
770 self->encodefunc = NULL;
771
772 if (encoding == NULL) {
773 /* Try os.device_encoding(fileno) */
774 PyObject *fileno;
775 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
776 /* Ignore only AttributeError and UnsupportedOperation */
777 if (fileno == NULL) {
778 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
779 PyErr_ExceptionMatches(state->unsupported_operation)) {
780 PyErr_Clear();
781 }
782 else {
783 goto error;
784 }
785 }
786 else {
787 self->encoding = PyObject_CallMethod(state->os_module,
788 "device_encoding",
789 "N", fileno);
790 if (self->encoding == NULL)
791 goto error;
792 else if (!PyUnicode_Check(self->encoding))
793 Py_CLEAR(self->encoding);
794 }
795 }
796 if (encoding == NULL && self->encoding == NULL) {
797 if (state->locale_module == NULL) {
798 state->locale_module = PyImport_ImportModule("locale");
799 if (state->locale_module == NULL)
800 goto catch_ImportError;
801 else
802 goto use_locale;
803 }
804 else {
805 use_locale:
806 self->encoding = PyObject_CallMethod(
807 state->locale_module, "getpreferredencoding", NULL);
808 if (self->encoding == NULL) {
809 catch_ImportError:
810 /*
811 Importing locale can raise a ImportError because of
812 _functools, and locale.getpreferredencoding can raise a
813 ImportError if _locale is not available. These will happen
814 during module building.
815 */
816 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
817 PyErr_Clear();
818 self->encoding = PyUnicode_FromString("ascii");
819 }
820 else
821 goto error;
822 }
823 else if (!PyUnicode_Check(self->encoding))
824 Py_CLEAR(self->encoding);
825 }
826 }
827 if (self->encoding != NULL)
828 encoding = _PyUnicode_AsString(self->encoding);
829 else if (encoding != NULL) {
830 self->encoding = PyUnicode_FromString(encoding);
831 if (self->encoding == NULL)
832 goto error;
833 }
834 else {
835 PyErr_SetString(PyExc_IOError,
836 "could not determine default encoding");
837 }
838
839 if (errors == NULL)
840 errors = "strict";
841 self->errors = PyBytes_FromString(errors);
842 if (self->errors == NULL)
843 goto error;
844
845 self->chunk_size = 8192;
846 self->readuniversal = (newline == NULL || newline[0] == '\0');
847 self->line_buffering = line_buffering;
848 self->readtranslate = (newline == NULL);
849 if (newline) {
850 self->readnl = PyUnicode_FromString(newline);
851 if (self->readnl == NULL)
852 return -1;
853 }
854 self->writetranslate = (newline == NULL || newline[0] != '\0');
855 if (!self->readuniversal && self->readnl) {
856 self->writenl = _PyUnicode_AsString(self->readnl);
857 if (!strcmp(self->writenl, "\n"))
858 self->writenl = NULL;
859 }
860#ifdef MS_WINDOWS
861 else
862 self->writenl = "\r\n";
863#endif
864
865 /* Build the decoder object */
866 res = PyObject_CallMethod(buffer, "readable", NULL);
867 if (res == NULL)
868 goto error;
869 r = PyObject_IsTrue(res);
870 Py_DECREF(res);
871 if (r == -1)
872 goto error;
873 if (r == 1) {
874 self->decoder = PyCodec_IncrementalDecoder(
875 encoding, errors);
876 if (self->decoder == NULL)
877 goto error;
878
879 if (self->readuniversal) {
880 PyObject *incrementalDecoder = PyObject_CallFunction(
881 (PyObject *)&PyIncrementalNewlineDecoder_Type,
882 "Oi", self->decoder, (int)self->readtranslate);
883 if (incrementalDecoder == NULL)
884 goto error;
885 Py_CLEAR(self->decoder);
886 self->decoder = incrementalDecoder;
887 }
888 }
889
890 /* Build the encoder object */
891 res = PyObject_CallMethod(buffer, "writable", NULL);
892 if (res == NULL)
893 goto error;
894 r = PyObject_IsTrue(res);
895 Py_DECREF(res);
896 if (r == -1)
897 goto error;
898 if (r == 1) {
899 PyObject *ci;
900 self->encoder = PyCodec_IncrementalEncoder(
901 encoding, errors);
902 if (self->encoder == NULL)
903 goto error;
904 /* Get the normalized named of the codec */
905 ci = _PyCodec_Lookup(encoding);
906 if (ci == NULL)
907 goto error;
908 res = PyObject_GetAttrString(ci, "name");
909 Py_DECREF(ci);
910 if (res == NULL)
911 PyErr_Clear();
912 else if (PyUnicode_Check(res)) {
913 encodefuncentry *e = encodefuncs;
914 while (e->name != NULL) {
915 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
916 self->encodefunc = e->encodefunc;
917 break;
918 }
919 e++;
920 }
921 }
922 Py_XDECREF(res);
923 }
924
925 self->buffer = buffer;
926 Py_INCREF(buffer);
927
928 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
929 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
930 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
931 raw = PyObject_GetAttrString(buffer, "raw");
932 /* Cache the raw FileIO object to speed up 'closed' checks */
933 if (raw == NULL)
934 PyErr_Clear();
935 else if (Py_TYPE(raw) == &PyFileIO_Type)
936 self->raw = raw;
937 else
938 Py_DECREF(raw);
939 }
940
941 res = PyObject_CallMethod(buffer, "seekable", NULL);
942 if (res == NULL)
943 goto error;
944 self->seekable = self->telling = PyObject_IsTrue(res);
945 Py_DECREF(res);
946
947 self->ok = 1;
948 return 0;
949
950 error:
951 return -1;
952}
953
954static int
955_TextIOWrapper_clear(PyTextIOWrapperObject *self)
956{
957 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
958 return -1;
959 self->ok = 0;
960 Py_CLEAR(self->buffer);
961 Py_CLEAR(self->encoding);
962 Py_CLEAR(self->encoder);
963 Py_CLEAR(self->decoder);
964 Py_CLEAR(self->readnl);
965 Py_CLEAR(self->decoded_chars);
966 Py_CLEAR(self->pending_bytes);
967 Py_CLEAR(self->snapshot);
968 Py_CLEAR(self->errors);
969 Py_CLEAR(self->raw);
970 return 0;
971}
972
973static void
974TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
975{
976 if (_TextIOWrapper_clear(self) < 0)
977 return;
978 _PyObject_GC_UNTRACK(self);
979 if (self->weakreflist != NULL)
980 PyObject_ClearWeakRefs((PyObject *)self);
981 Py_CLEAR(self->dict);
982 Py_TYPE(self)->tp_free((PyObject *)self);
983}
984
985static int
986TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
987{
988 Py_VISIT(self->buffer);
989 Py_VISIT(self->encoding);
990 Py_VISIT(self->encoder);
991 Py_VISIT(self->decoder);
992 Py_VISIT(self->readnl);
993 Py_VISIT(self->decoded_chars);
994 Py_VISIT(self->pending_bytes);
995 Py_VISIT(self->snapshot);
996 Py_VISIT(self->errors);
997 Py_VISIT(self->raw);
998
999 Py_VISIT(self->dict);
1000 return 0;
1001}
1002
1003static int
1004TextIOWrapper_clear(PyTextIOWrapperObject *self)
1005{
1006 if (_TextIOWrapper_clear(self) < 0)
1007 return -1;
1008 Py_CLEAR(self->dict);
1009 return 0;
1010}
1011
1012static PyObject *
1013TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1014
1015/* This macro takes some shortcuts to make the common case faster. */
1016#define CHECK_CLOSED(self) \
1017 do { \
1018 int r; \
1019 PyObject *_res; \
1020 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1021 if (self->raw != NULL) \
1022 r = _PyFileIO_closed(self->raw); \
1023 else { \
1024 _res = TextIOWrapper_closed_get(self, NULL); \
1025 if (_res == NULL) \
1026 return NULL; \
1027 r = PyObject_IsTrue(_res); \
1028 Py_DECREF(_res); \
1029 if (r < 0) \
1030 return NULL; \
1031 } \
1032 if (r > 0) { \
1033 PyErr_SetString(PyExc_ValueError, \
1034 "I/O operation on closed file."); \
1035 return NULL; \
1036 } \
1037 } \
1038 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1039 return NULL; \
1040 } while (0)
1041
1042#define CHECK_INITIALIZED(self) \
1043 if (self->ok <= 0) { \
1044 PyErr_SetString(PyExc_ValueError, \
1045 "I/O operation on uninitialized object"); \
1046 return NULL; \
1047 }
1048
1049#define CHECK_INITIALIZED_INT(self) \
1050 if (self->ok <= 0) { \
1051 PyErr_SetString(PyExc_ValueError, \
1052 "I/O operation on uninitialized object"); \
1053 return -1; \
1054 }
1055
1056
1057Py_LOCAL_INLINE(const Py_UNICODE *)
1058findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1059{
1060 /* like wcschr, but doesn't stop at NULL characters */
1061 while (size-- > 0) {
1062 if (*s == ch)
1063 return s;
1064 s++;
1065 }
1066 return NULL;
1067}
1068
1069/* Flush the internal write buffer. This doesn't explicitly flush the
1070 underlying buffered object, though. */
1071static int
1072_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1073{
1074 PyObject *b, *ret;
1075
1076 if (self->pending_bytes == NULL)
1077 return 0;
1078 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1079 if (b == NULL)
1080 return -1;
1081 ret = PyObject_CallMethodObjArgs(self->buffer,
1082 _PyIO_str_write, b, NULL);
1083 Py_DECREF(b);
1084 if (ret == NULL)
1085 return -1;
1086 Py_DECREF(ret);
1087 Py_CLEAR(self->pending_bytes);
1088 self->pending_bytes_count = 0;
1089 return 0;
1090}
1091
1092static PyObject *
1093TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1094{
1095 PyObject *ret;
1096 PyObject *text; /* owned reference */
1097 PyObject *b;
1098 Py_ssize_t textlen;
1099 int haslf = 0;
1100 int needflush = 0;
1101
1102 CHECK_INITIALIZED(self);
1103
1104 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1105 return NULL;
1106 }
1107
1108 CHECK_CLOSED(self);
1109
1110 Py_INCREF(text);
1111
1112 textlen = PyUnicode_GetSize(text);
1113
1114 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1115 if (findchar(PyUnicode_AS_UNICODE(text),
1116 PyUnicode_GET_SIZE(text), '\n'))
1117 haslf = 1;
1118
1119 if (haslf && self->writetranslate && self->writenl != NULL) {
1120 PyObject *newtext = PyObject_CallMethod(
1121 text, "replace", "ss", "\n", self->writenl);
1122 Py_DECREF(text);
1123 if (newtext == NULL)
1124 return NULL;
1125 text = newtext;
1126 }
1127
1128 if (self->line_buffering &&
1129 (haslf ||
1130 findchar(PyUnicode_AS_UNICODE(text),
1131 PyUnicode_GET_SIZE(text), '\r')))
1132 needflush = 1;
1133
1134 /* XXX What if we were just reading? */
1135 if (self->encodefunc != NULL)
1136 b = (*self->encodefunc)((PyObject *) self, text);
1137 else
1138 b = PyObject_CallMethodObjArgs(self->encoder,
1139 _PyIO_str_encode, text, NULL);
1140 Py_DECREF(text);
1141 if (b == NULL)
1142 return NULL;
1143
1144 if (self->pending_bytes == NULL) {
1145 self->pending_bytes = PyList_New(0);
1146 if (self->pending_bytes == NULL) {
1147 Py_DECREF(b);
1148 return NULL;
1149 }
1150 self->pending_bytes_count = 0;
1151 }
1152 if (PyList_Append(self->pending_bytes, b) < 0) {
1153 Py_DECREF(b);
1154 return NULL;
1155 }
1156 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1157 Py_DECREF(b);
1158 if (self->pending_bytes_count > self->chunk_size || needflush) {
1159 if (_TextIOWrapper_writeflush(self) < 0)
1160 return NULL;
1161 }
1162
1163 if (needflush) {
1164 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1165 if (ret == NULL)
1166 return NULL;
1167 Py_DECREF(ret);
1168 }
1169
1170 Py_CLEAR(self->snapshot);
1171
1172 if (self->decoder) {
1173 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1174 if (ret == NULL)
1175 return NULL;
1176 Py_DECREF(ret);
1177 }
1178
1179 return PyLong_FromSsize_t(textlen);
1180}
1181
1182/* Steal a reference to chars and store it in the decoded_char buffer;
1183 */
1184static void
1185TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1186{
1187 Py_CLEAR(self->decoded_chars);
1188 self->decoded_chars = chars;
1189 self->decoded_chars_used = 0;
1190}
1191
1192static PyObject *
1193TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1194{
1195 PyObject *chars;
1196 Py_ssize_t avail;
1197
1198 if (self->decoded_chars == NULL)
1199 return PyUnicode_FromStringAndSize(NULL, 0);
1200
1201 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1202 - self->decoded_chars_used);
1203
1204 assert(avail >= 0);
1205
1206 if (n < 0 || n > avail)
1207 n = avail;
1208
1209 if (self->decoded_chars_used > 0 || n < avail) {
1210 chars = PyUnicode_FromUnicode(
1211 PyUnicode_AS_UNICODE(self->decoded_chars)
1212 + self->decoded_chars_used, n);
1213 if (chars == NULL)
1214 return NULL;
1215 }
1216 else {
1217 chars = self->decoded_chars;
1218 Py_INCREF(chars);
1219 }
1220
1221 self->decoded_chars_used += n;
1222 return chars;
1223}
1224
1225/* Read and decode the next chunk of data from the BufferedReader.
1226 */
1227static int
1228TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1229{
1230 PyObject *dec_buffer = NULL;
1231 PyObject *dec_flags = NULL;
1232 PyObject *input_chunk = NULL;
1233 PyObject *decoded_chars, *chunk_size;
1234 int eof;
1235
1236 /* The return value is True unless EOF was reached. The decoded string is
1237 * placed in self._decoded_chars (replacing its previous value). The
1238 * entire input chunk is sent to the decoder, though some of it may remain
1239 * buffered in the decoder, yet to be converted.
1240 */
1241
1242 if (self->decoder == NULL) {
1243 PyErr_SetString(PyExc_ValueError, "no decoder");
1244 return -1;
1245 }
1246
1247 if (self->telling) {
1248 /* To prepare for tell(), we need to snapshot a point in the file
1249 * where the decoder's input buffer is empty.
1250 */
1251
1252 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1253 _PyIO_str_getstate, NULL);
1254 if (state == NULL)
1255 return -1;
1256 /* Given this, we know there was a valid snapshot point
1257 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1258 */
1259 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1260 Py_DECREF(state);
1261 return -1;
1262 }
1263 Py_INCREF(dec_buffer);
1264 Py_INCREF(dec_flags);
1265 Py_DECREF(state);
1266 }
1267
1268 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1269 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1270 if (chunk_size == NULL)
1271 goto fail;
1272 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1273 _PyIO_str_read1, chunk_size, NULL);
1274 Py_DECREF(chunk_size);
1275 if (input_chunk == NULL)
1276 goto fail;
1277 assert(PyBytes_Check(input_chunk));
1278
1279 eof = (PyBytes_Size(input_chunk) == 0);
1280
1281 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1282 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1283 self->decoder, input_chunk, eof);
1284 }
1285 else {
1286 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1287 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1288 }
1289
1290 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1291 if (decoded_chars == NULL)
1292 goto fail;
1293 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1294 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1295 eof = 0;
1296
1297 if (self->telling) {
1298 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1299 * next input to be decoded is dec_buffer + input_chunk.
1300 */
1301 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1302 if (next_input == NULL)
1303 goto fail;
1304 assert (PyBytes_Check(next_input));
1305 Py_DECREF(dec_buffer);
1306 Py_CLEAR(self->snapshot);
1307 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1308 }
1309 Py_DECREF(input_chunk);
1310
1311 return (eof == 0);
1312
1313 fail:
1314 Py_XDECREF(dec_buffer);
1315 Py_XDECREF(dec_flags);
1316 Py_XDECREF(input_chunk);
1317 return -1;
1318}
1319
1320static PyObject *
1321TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1322{
1323 Py_ssize_t n = -1;
1324 PyObject *result = NULL, *chunks = NULL;
1325
1326 CHECK_INITIALIZED(self);
1327
1328 if (!PyArg_ParseTuple(args, "|n:read", &n))
1329 return NULL;
1330
1331 CHECK_CLOSED(self);
1332
1333 if (_TextIOWrapper_writeflush(self) < 0)
1334 return NULL;
1335
1336 if (n < 0) {
1337 /* Read everything */
1338 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1339 PyObject *decoded;
1340 if (bytes == NULL)
1341 goto fail;
1342 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1343 bytes, Py_True, NULL);
1344 Py_DECREF(bytes);
1345 if (decoded == NULL)
1346 goto fail;
1347
1348 result = TextIOWrapper_get_decoded_chars(self, -1);
1349
1350 if (result == NULL) {
1351 Py_DECREF(decoded);
1352 return NULL;
1353 }
1354
1355 PyUnicode_AppendAndDel(&result, decoded);
1356 if (result == NULL)
1357 goto fail;
1358
1359 Py_CLEAR(self->snapshot);
1360 return result;
1361 }
1362 else {
1363 int res = 1;
1364 Py_ssize_t remaining = n;
1365
1366 result = TextIOWrapper_get_decoded_chars(self, n);
1367 if (result == NULL)
1368 goto fail;
1369 remaining -= PyUnicode_GET_SIZE(result);
1370
1371 /* Keep reading chunks until we have n characters to return */
1372 while (remaining > 0) {
1373 res = TextIOWrapper_read_chunk(self);
1374 if (res < 0)
1375 goto fail;
1376 if (res == 0) /* EOF */
1377 break;
1378 if (chunks == NULL) {
1379 chunks = PyList_New(0);
1380 if (chunks == NULL)
1381 goto fail;
1382 }
1383 if (PyList_Append(chunks, result) < 0)
1384 goto fail;
1385 Py_DECREF(result);
1386 result = TextIOWrapper_get_decoded_chars(self, remaining);
1387 if (result == NULL)
1388 goto fail;
1389 remaining -= PyUnicode_GET_SIZE(result);
1390 }
1391 if (chunks != NULL) {
1392 if (result != NULL && PyList_Append(chunks, result) < 0)
1393 goto fail;
1394 Py_CLEAR(result);
1395 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1396 if (result == NULL)
1397 goto fail;
1398 Py_CLEAR(chunks);
1399 }
1400 return result;
1401 }
1402 fail:
1403 Py_XDECREF(result);
1404 Py_XDECREF(chunks);
1405 return NULL;
1406}
1407
1408
1409/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1410 that is to the NUL character. Otherwise the function will produce
1411 incorrect results. */
1412static Py_UNICODE *
1413find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1414{
1415 Py_UNICODE *s = start;
1416 for (;;) {
1417 while (*s > ch)
1418 s++;
1419 if (*s == ch)
1420 return s;
1421 if (s == end)
1422 return NULL;
1423 s++;
1424 }
1425}
1426
1427Py_ssize_t
1428_PyIO_find_line_ending(
1429 int translated, int universal, PyObject *readnl,
1430 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1431{
1432 Py_ssize_t len = end - start;
1433
1434 if (translated) {
1435 /* Newlines are already translated, only search for \n */
1436 Py_UNICODE *pos = find_control_char(start, end, '\n');
1437 if (pos != NULL)
1438 return pos - start + 1;
1439 else {
1440 *consumed = len;
1441 return -1;
1442 }
1443 }
1444 else if (universal) {
1445 /* Universal newline search. Find any of \r, \r\n, \n
1446 * The decoder ensures that \r\n are not split in two pieces
1447 */
1448 Py_UNICODE *s = start;
1449 for (;;) {
1450 Py_UNICODE ch;
1451 /* Fast path for non-control chars. The loop always ends
1452 since the Py_UNICODE storage is NUL-terminated. */
1453 while (*s > '\r')
1454 s++;
1455 if (s >= end) {
1456 *consumed = len;
1457 return -1;
1458 }
1459 ch = *s++;
1460 if (ch == '\n')
1461 return s - start;
1462 if (ch == '\r') {
1463 if (*s == '\n')
1464 return s - start + 1;
1465 else
1466 return s - start;
1467 }
1468 }
1469 }
1470 else {
1471 /* Non-universal mode. */
1472 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1473 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1474 if (readnl_len == 1) {
1475 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1476 if (pos != NULL)
1477 return pos - start + 1;
1478 *consumed = len;
1479 return -1;
1480 }
1481 else {
1482 Py_UNICODE *s = start;
1483 Py_UNICODE *e = end - readnl_len + 1;
1484 Py_UNICODE *pos;
1485 if (e < s)
1486 e = s;
1487 while (s < e) {
1488 Py_ssize_t i;
1489 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1490 if (pos == NULL || pos >= e)
1491 break;
1492 for (i = 1; i < readnl_len; i++) {
1493 if (pos[i] != nl[i])
1494 break;
1495 }
1496 if (i == readnl_len)
1497 return pos - start + readnl_len;
1498 s = pos + 1;
1499 }
1500 pos = find_control_char(e, end, nl[0]);
1501 if (pos == NULL)
1502 *consumed = len;
1503 else
1504 *consumed = pos - start;
1505 return -1;
1506 }
1507 }
1508}
1509
1510static PyObject *
1511_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1512{
1513 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1514 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1515 int res;
1516
1517 CHECK_CLOSED(self);
1518
1519 if (_TextIOWrapper_writeflush(self) < 0)
1520 return NULL;
1521
1522 chunked = 0;
1523
1524 while (1) {
1525 Py_UNICODE *ptr;
1526 Py_ssize_t line_len;
1527 Py_ssize_t consumed = 0;
1528
1529 /* First, get some data if necessary */
1530 res = 1;
1531 while (!self->decoded_chars ||
1532 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1533 res = TextIOWrapper_read_chunk(self);
1534 if (res < 0)
1535 goto error;
1536 if (res == 0)
1537 break;
1538 }
1539 if (res == 0) {
1540 /* end of file */
1541 TextIOWrapper_set_decoded_chars(self, NULL);
1542 Py_CLEAR(self->snapshot);
1543 start = endpos = offset_to_buffer = 0;
1544 break;
1545 }
1546
1547 if (remaining == NULL) {
1548 line = self->decoded_chars;
1549 start = self->decoded_chars_used;
1550 offset_to_buffer = 0;
1551 Py_INCREF(line);
1552 }
1553 else {
1554 assert(self->decoded_chars_used == 0);
1555 line = PyUnicode_Concat(remaining, self->decoded_chars);
1556 start = 0;
1557 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1558 Py_CLEAR(remaining);
1559 if (line == NULL)
1560 goto error;
1561 }
1562
1563 ptr = PyUnicode_AS_UNICODE(line);
1564 line_len = PyUnicode_GET_SIZE(line);
1565
1566 endpos = _PyIO_find_line_ending(
1567 self->readtranslate, self->readuniversal, self->readnl,
1568 ptr + start, ptr + line_len, &consumed);
1569 if (endpos >= 0) {
1570 endpos += start;
1571 if (limit >= 0 && (endpos - start) + chunked >= limit)
1572 endpos = start + limit - chunked;
1573 break;
1574 }
1575
1576 /* We can put aside up to `endpos` */
1577 endpos = consumed + start;
1578 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1579 /* Didn't find line ending, but reached length limit */
1580 endpos = start + limit - chunked;
1581 break;
1582 }
1583
1584 if (endpos > start) {
1585 /* No line ending seen yet - put aside current data */
1586 PyObject *s;
1587 if (chunks == NULL) {
1588 chunks = PyList_New(0);
1589 if (chunks == NULL)
1590 goto error;
1591 }
1592 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1593 if (s == NULL)
1594 goto error;
1595 if (PyList_Append(chunks, s) < 0) {
1596 Py_DECREF(s);
1597 goto error;
1598 }
1599 chunked += PyUnicode_GET_SIZE(s);
1600 Py_DECREF(s);
1601 }
1602 /* There may be some remaining bytes we'll have to prepend to the
1603 next chunk of data */
1604 if (endpos < line_len) {
1605 remaining = PyUnicode_FromUnicode(
1606 ptr + endpos, line_len - endpos);
1607 if (remaining == NULL)
1608 goto error;
1609 }
1610 Py_CLEAR(line);
1611 /* We have consumed the buffer */
1612 TextIOWrapper_set_decoded_chars(self, NULL);
1613 }
1614
1615 if (line != NULL) {
1616 /* Our line ends in the current buffer */
1617 self->decoded_chars_used = endpos - offset_to_buffer;
1618 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1619 if (start == 0 && Py_REFCNT(line) == 1) {
1620 if (PyUnicode_Resize(&line, endpos) < 0)
1621 goto error;
1622 }
1623 else {
1624 PyObject *s = PyUnicode_FromUnicode(
1625 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1626 Py_CLEAR(line);
1627 if (s == NULL)
1628 goto error;
1629 line = s;
1630 }
1631 }
1632 }
1633 if (remaining != NULL) {
1634 if (chunks == NULL) {
1635 chunks = PyList_New(0);
1636 if (chunks == NULL)
1637 goto error;
1638 }
1639 if (PyList_Append(chunks, remaining) < 0)
1640 goto error;
1641 Py_CLEAR(remaining);
1642 }
1643 if (chunks != NULL) {
1644 if (line != NULL && PyList_Append(chunks, line) < 0)
1645 goto error;
1646 Py_CLEAR(line);
1647 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1648 if (line == NULL)
1649 goto error;
1650 Py_DECREF(chunks);
1651 }
1652 if (line == NULL)
1653 line = PyUnicode_FromStringAndSize(NULL, 0);
1654
1655 return line;
1656
1657 error:
1658 Py_XDECREF(chunks);
1659 Py_XDECREF(remaining);
1660 Py_XDECREF(line);
1661 return NULL;
1662}
1663
1664static PyObject *
1665TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1666{
1667 Py_ssize_t limit = -1;
1668
1669 CHECK_INITIALIZED(self);
1670 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1671 return NULL;
1672 }
1673 return _TextIOWrapper_readline(self, limit);
1674}
1675
1676/* Seek and Tell */
1677
1678typedef struct {
1679 Py_off_t start_pos;
1680 int dec_flags;
1681 int bytes_to_feed;
1682 int chars_to_skip;
1683 char need_eof;
1684} CookieStruct;
1685
1686/*
1687 To speed up cookie packing/unpacking, we store the fields in a temporary
1688 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1689 The following macros define at which offsets in the intermediary byte
1690 string the various CookieStruct fields will be stored.
1691 */
1692
1693#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1694
1695#if defined(WORDS_BIGENDIAN)
1696
1697# define IS_LITTLE_ENDIAN 0
1698
1699/* We want the least significant byte of start_pos to also be the least
1700 significant byte of the cookie, which means that in big-endian mode we
1701 must copy the fields in reverse order. */
1702
1703# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1704# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1705# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1706# define OFF_CHARS_TO_SKIP (sizeof(char))
1707# define OFF_NEED_EOF 0
1708
1709#else
1710
1711# define IS_LITTLE_ENDIAN 1
1712
1713/* Little-endian mode: the least significant byte of start_pos will
1714 naturally end up the least significant byte of the cookie. */
1715
1716# define OFF_START_POS 0
1717# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1718# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1719# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1720# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1721
1722#endif
1723
1724static int
1725TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1726{
1727 unsigned char buffer[COOKIE_BUF_LEN];
1728 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1729 if (cookieLong == NULL)
1730 return -1;
1731
1732 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1733 IS_LITTLE_ENDIAN, 0) < 0) {
1734 Py_DECREF(cookieLong);
1735 return -1;
1736 }
1737 Py_DECREF(cookieLong);
1738
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001739 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1740 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1741 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1742 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1743 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744
1745 return 0;
1746}
1747
1748static PyObject *
1749TextIOWrapper_buildCookie(CookieStruct *cookie)
1750{
1751 unsigned char buffer[COOKIE_BUF_LEN];
1752
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001753 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1754 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1755 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1756 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1757 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758
1759 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1760}
1761#undef IS_LITTLE_ENDIAN
1762
1763static int
1764_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1765 CookieStruct *cookie)
1766{
1767 PyObject *res;
1768 /* When seeking to the start of the stream, we call decoder.reset()
1769 rather than decoder.getstate().
1770 This is for a few decoders such as utf-16 for which the state value
1771 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1772 utf-16, that we are expecting a BOM).
1773 */
1774 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1775 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1776 else
1777 res = PyObject_CallMethod(self->decoder, "setstate",
1778 "((yi))", "", cookie->dec_flags);
1779 if (res == NULL)
1780 return -1;
1781 Py_DECREF(res);
1782 return 0;
1783}
1784
1785static PyObject *
1786TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1787{
1788 PyObject *cookieObj, *posobj;
1789 CookieStruct cookie;
1790 int whence = 0;
1791 static PyObject *zero = NULL;
1792 PyObject *res;
1793 int cmp;
1794
1795 CHECK_INITIALIZED(self);
1796
1797 if (zero == NULL) {
1798 zero = PyLong_FromLong(0L);
1799 if (zero == NULL)
1800 return NULL;
1801 }
1802
1803 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1804 return NULL;
1805 CHECK_CLOSED(self);
1806
1807 Py_INCREF(cookieObj);
1808
1809 if (!self->seekable) {
1810 PyErr_SetString(PyExc_IOError,
1811 "underlying stream is not seekable");
1812 goto fail;
1813 }
1814
1815 if (whence == 1) {
1816 /* seek relative to current position */
1817 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1818 if (cmp < 0)
1819 goto fail;
1820
1821 if (cmp == 0) {
1822 PyErr_SetString(PyExc_IOError,
1823 "can't do nonzero cur-relative seeks");
1824 goto fail;
1825 }
1826
1827 /* Seeking to the current position should attempt to
1828 * sync the underlying buffer with the current position.
1829 */
1830 Py_DECREF(cookieObj);
1831 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1832 if (cookieObj == NULL)
1833 goto fail;
1834 }
1835 else if (whence == 2) {
1836 /* seek relative to end of file */
1837
1838 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1839 if (cmp < 0)
1840 goto fail;
1841
1842 if (cmp == 0) {
1843 PyErr_SetString(PyExc_IOError,
1844 "can't do nonzero end-relative seeks");
1845 goto fail;
1846 }
1847
1848 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1849 if (res == NULL)
1850 goto fail;
1851 Py_DECREF(res);
1852
1853 TextIOWrapper_set_decoded_chars(self, NULL);
1854 Py_CLEAR(self->snapshot);
1855 if (self->decoder) {
1856 res = PyObject_CallMethod(self->decoder, "reset", NULL);
1857 if (res == NULL)
1858 goto fail;
1859 Py_DECREF(res);
1860 }
1861
1862 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
1863 Py_XDECREF(cookieObj);
1864 return res;
1865 }
1866 else if (whence != 0) {
1867 PyErr_Format(PyExc_ValueError,
1868 "invalid whence (%d, should be 0, 1 or 2)", whence);
1869 goto fail;
1870 }
1871
1872 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
1873 if (cmp < 0)
1874 goto fail;
1875
1876 if (cmp == 1) {
1877 PyErr_Format(PyExc_ValueError,
1878 "negative seek position %R", cookieObj);
1879 goto fail;
1880 }
1881
1882 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1883 if (res == NULL)
1884 goto fail;
1885 Py_DECREF(res);
1886
1887 /* The strategy of seek() is to go back to the safe start point
1888 * and replay the effect of read(chars_to_skip) from there.
1889 */
1890 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
1891 goto fail;
1892
1893 /* Seek back to the safe start point. */
1894 posobj = PyLong_FromOff_t(cookie.start_pos);
1895 if (posobj == NULL)
1896 goto fail;
1897 res = PyObject_CallMethodObjArgs(self->buffer,
1898 _PyIO_str_seek, posobj, NULL);
1899 Py_DECREF(posobj);
1900 if (res == NULL)
1901 goto fail;
1902 Py_DECREF(res);
1903
1904 TextIOWrapper_set_decoded_chars(self, NULL);
1905 Py_CLEAR(self->snapshot);
1906
1907 /* Restore the decoder to its state from the safe start point. */
1908 if (self->decoder) {
1909 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
1910 goto fail;
1911 }
1912
1913 if (cookie.chars_to_skip) {
1914 /* Just like _read_chunk, feed the decoder and save a snapshot. */
1915 PyObject *input_chunk = PyObject_CallMethod(
1916 self->buffer, "read", "i", cookie.bytes_to_feed);
1917 PyObject *decoded;
1918
1919 if (input_chunk == NULL)
1920 goto fail;
1921
1922 assert (PyBytes_Check(input_chunk));
1923
1924 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
1925 if (self->snapshot == NULL) {
1926 Py_DECREF(input_chunk);
1927 goto fail;
1928 }
1929
1930 decoded = PyObject_CallMethod(self->decoder, "decode",
1931 "Oi", input_chunk, (int)cookie.need_eof);
1932
1933 if (decoded == NULL)
1934 goto fail;
1935
1936 TextIOWrapper_set_decoded_chars(self, decoded);
1937
1938 /* Skip chars_to_skip of the decoded characters. */
1939 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
1940 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
1941 goto fail;
1942 }
1943 self->decoded_chars_used = cookie.chars_to_skip;
1944 }
1945 else {
1946 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
1947 if (self->snapshot == NULL)
1948 goto fail;
1949 }
1950
1951 return cookieObj;
1952 fail:
1953 Py_XDECREF(cookieObj);
1954 return NULL;
1955
1956}
1957
1958static PyObject *
1959TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
1960{
1961 PyObject *res;
1962 PyObject *posobj = NULL;
1963 CookieStruct cookie = {0,0,0,0,0};
1964 PyObject *next_input;
1965 Py_ssize_t chars_to_skip, chars_decoded;
1966 PyObject *saved_state = NULL;
1967 char *input, *input_end;
1968
1969 CHECK_INITIALIZED(self);
1970 CHECK_CLOSED(self);
1971
1972 if (!self->seekable) {
1973 PyErr_SetString(PyExc_IOError,
1974 "underlying stream is not seekable");
1975 goto fail;
1976 }
1977 if (!self->telling) {
1978 PyErr_SetString(PyExc_IOError,
1979 "telling position disabled by next() call");
1980 goto fail;
1981 }
1982
1983 if (_TextIOWrapper_writeflush(self) < 0)
1984 return NULL;
1985 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1986 if (res == NULL)
1987 goto fail;
1988 Py_DECREF(res);
1989
1990 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
1991 if (posobj == NULL)
1992 goto fail;
1993
1994 if (self->decoder == NULL || self->snapshot == NULL) {
1995 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
1996 return posobj;
1997 }
1998
1999#if defined(HAVE_LARGEFILE_SUPPORT)
2000 cookie.start_pos = PyLong_AsLongLong(posobj);
2001#else
2002 cookie.start_pos = PyLong_AsLong(posobj);
2003#endif
2004 if (PyErr_Occurred())
2005 goto fail;
2006
2007 /* Skip backward to the snapshot point (see _read_chunk). */
2008 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2009 goto fail;
2010
2011 assert (PyBytes_Check(next_input));
2012
2013 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2014
2015 /* How many decoded characters have been used up since the snapshot? */
2016 if (self->decoded_chars_used == 0) {
2017 /* We haven't moved from the snapshot point. */
2018 Py_DECREF(posobj);
2019 return TextIOWrapper_buildCookie(&cookie);
2020 }
2021
2022 chars_to_skip = self->decoded_chars_used;
2023
2024 /* Starting from the snapshot position, we will walk the decoder
2025 * forward until it gives us enough decoded characters.
2026 */
2027 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2028 _PyIO_str_getstate, NULL);
2029 if (saved_state == NULL)
2030 goto fail;
2031
2032 /* Note our initial start point. */
2033 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2034 goto fail;
2035
2036 /* Feed the decoder one byte at a time. As we go, note the
2037 * nearest "safe start point" before the current location
2038 * (a point where the decoder has nothing buffered, so seek()
2039 * can safely start from there and advance to this location).
2040 */
2041 chars_decoded = 0;
2042 input = PyBytes_AS_STRING(next_input);
2043 input_end = input + PyBytes_GET_SIZE(next_input);
2044 while (input < input_end) {
2045 PyObject *state;
2046 char *dec_buffer;
2047 Py_ssize_t dec_buffer_len;
2048 int dec_flags;
2049
2050 PyObject *decoded = PyObject_CallMethod(
2051 self->decoder, "decode", "y#", input, 1);
2052 if (decoded == NULL)
2053 goto fail;
2054 assert (PyUnicode_Check(decoded));
2055 chars_decoded += PyUnicode_GET_SIZE(decoded);
2056 Py_DECREF(decoded);
2057
2058 cookie.bytes_to_feed += 1;
2059
2060 state = PyObject_CallMethodObjArgs(self->decoder,
2061 _PyIO_str_getstate, NULL);
2062 if (state == NULL)
2063 goto fail;
2064 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2065 Py_DECREF(state);
2066 goto fail;
2067 }
2068 Py_DECREF(state);
2069
2070 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2071 /* Decoder buffer is empty, so this is a safe start point. */
2072 cookie.start_pos += cookie.bytes_to_feed;
2073 chars_to_skip -= chars_decoded;
2074 cookie.dec_flags = dec_flags;
2075 cookie.bytes_to_feed = 0;
2076 chars_decoded = 0;
2077 }
2078 if (chars_decoded >= chars_to_skip)
2079 break;
2080 input++;
2081 }
2082 if (input == input_end) {
2083 /* We didn't get enough decoded data; signal EOF to get more. */
2084 PyObject *decoded = PyObject_CallMethod(
2085 self->decoder, "decode", "yi", "", /* final = */ 1);
2086 if (decoded == NULL)
2087 goto fail;
2088 assert (PyUnicode_Check(decoded));
2089 chars_decoded += PyUnicode_GET_SIZE(decoded);
2090 Py_DECREF(decoded);
2091 cookie.need_eof = 1;
2092
2093 if (chars_decoded < chars_to_skip) {
2094 PyErr_SetString(PyExc_IOError,
2095 "can't reconstruct logical file position");
2096 goto fail;
2097 }
2098 }
2099
2100 /* finally */
2101 Py_XDECREF(posobj);
2102 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2103 Py_DECREF(saved_state);
2104 if (res == NULL)
2105 return NULL;
2106 Py_DECREF(res);
2107
2108 /* The returned cookie corresponds to the last safe start point. */
2109 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2110 return TextIOWrapper_buildCookie(&cookie);
2111
2112 fail:
2113 Py_XDECREF(posobj);
2114 if (saved_state) {
2115 PyObject *type, *value, *traceback;
2116 PyErr_Fetch(&type, &value, &traceback);
2117
2118 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2119 Py_DECREF(saved_state);
2120 if (res == NULL)
2121 return NULL;
2122 Py_DECREF(res);
2123
2124 PyErr_Restore(type, value, traceback);
2125 }
2126 return NULL;
2127}
2128
2129static PyObject *
2130TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2131{
2132 PyObject *pos = Py_None;
2133 PyObject *res;
2134
2135 CHECK_INITIALIZED(self)
2136 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2137 return NULL;
2138 }
2139
2140 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2141 if (res == NULL)
2142 return NULL;
2143 Py_DECREF(res);
2144
2145 if (pos != Py_None) {
2146 res = PyObject_CallMethodObjArgs((PyObject *) self,
2147 _PyIO_str_seek, pos, NULL);
2148 if (res == NULL)
2149 return NULL;
2150 Py_DECREF(res);
2151 }
2152
2153 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2154}
2155
2156/* Inquiries */
2157
2158static PyObject *
2159TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2160{
2161 CHECK_INITIALIZED(self);
2162 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2163}
2164
2165static PyObject *
2166TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2167{
2168 CHECK_INITIALIZED(self);
2169 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2170}
2171
2172static PyObject *
2173TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2174{
2175 CHECK_INITIALIZED(self);
2176 return PyObject_CallMethod(self->buffer, "readable", NULL);
2177}
2178
2179static PyObject *
2180TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2181{
2182 CHECK_INITIALIZED(self);
2183 return PyObject_CallMethod(self->buffer, "writable", NULL);
2184}
2185
2186static PyObject *
2187TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2188{
2189 CHECK_INITIALIZED(self);
2190 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2191}
2192
2193static PyObject *
2194TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2195{
2196 CHECK_INITIALIZED(self);
2197 CHECK_CLOSED(self);
2198 self->telling = self->seekable;
2199 if (_TextIOWrapper_writeflush(self) < 0)
2200 return NULL;
2201 return PyObject_CallMethod(self->buffer, "flush", NULL);
2202}
2203
2204static PyObject *
2205TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2206{
2207 PyObject *res;
2208 CHECK_INITIALIZED(self);
2209 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2210 if (res == NULL) {
2211 /* If flush() fails, just give up */
2212 PyErr_Clear();
2213 }
2214 else
2215 Py_DECREF(res);
2216
2217 return PyObject_CallMethod(self->buffer, "close", NULL);
2218}
2219
2220static PyObject *
2221TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2222{
2223 PyObject *line;
2224
2225 CHECK_INITIALIZED(self);
2226
2227 self->telling = 0;
2228 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2229 /* Skip method call overhead for speed */
2230 line = _TextIOWrapper_readline(self, -1);
2231 }
2232 else {
2233 line = PyObject_CallMethodObjArgs((PyObject *)self,
2234 _PyIO_str_readline, NULL);
2235 if (line && !PyUnicode_Check(line)) {
2236 PyErr_Format(PyExc_IOError,
2237 "readline() should have returned an str object, "
2238 "not '%.200s'", Py_TYPE(line)->tp_name);
2239 Py_DECREF(line);
2240 return NULL;
2241 }
2242 }
2243
2244 if (line == NULL)
2245 return NULL;
2246
2247 if (PyUnicode_GET_SIZE(line) == 0) {
2248 /* Reached EOF or would have blocked */
2249 Py_DECREF(line);
2250 Py_CLEAR(self->snapshot);
2251 self->telling = self->seekable;
2252 return NULL;
2253 }
2254
2255 return line;
2256}
2257
2258static PyObject *
2259TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2260{
2261 CHECK_INITIALIZED(self);
2262 return PyObject_GetAttrString(self->buffer, "name");
2263}
2264
2265static PyObject *
2266TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2267{
2268 CHECK_INITIALIZED(self);
2269 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2270}
2271
2272static PyObject *
2273TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2274{
2275 PyObject *res;
2276 CHECK_INITIALIZED(self);
2277 if (self->decoder == NULL)
2278 Py_RETURN_NONE;
2279 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2280 if (res == NULL) {
2281 PyErr_Clear();
2282 Py_RETURN_NONE;
2283 }
2284 return res;
2285}
2286
2287static PyObject *
2288TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2289{
2290 CHECK_INITIALIZED(self);
2291 return PyLong_FromSsize_t(self->chunk_size);
2292}
2293
2294static int
2295TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2296 PyObject *arg, void *context)
2297{
2298 Py_ssize_t n;
2299 CHECK_INITIALIZED_INT(self);
2300 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2301 if (n == -1 && PyErr_Occurred())
2302 return -1;
2303 if (n <= 0) {
2304 PyErr_SetString(PyExc_ValueError,
2305 "a strictly positive integer is required");
2306 return -1;
2307 }
2308 self->chunk_size = n;
2309 return 0;
2310}
2311
2312static PyMethodDef TextIOWrapper_methods[] = {
2313 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2314 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2315 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2316 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2317 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2318
2319 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2320 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2321 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2322 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2323 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2324
2325 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2326 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2327 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2328 {NULL, NULL}
2329};
2330
2331static PyMemberDef TextIOWrapper_members[] = {
2332 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2333 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2334 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2335 {NULL}
2336};
2337
2338static PyGetSetDef TextIOWrapper_getset[] = {
2339 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2340 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2341/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2342*/
2343 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2344 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2345 (setter)TextIOWrapper_chunk_size_set, NULL},
2346 {0}
2347};
2348
2349PyTypeObject PyTextIOWrapper_Type = {
2350 PyVarObject_HEAD_INIT(NULL, 0)
2351 "_io.TextIOWrapper", /*tp_name*/
2352 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2353 0, /*tp_itemsize*/
2354 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2355 0, /*tp_print*/
2356 0, /*tp_getattr*/
2357 0, /*tp_setattr*/
2358 0, /*tp_compare */
2359 0, /*tp_repr*/
2360 0, /*tp_as_number*/
2361 0, /*tp_as_sequence*/
2362 0, /*tp_as_mapping*/
2363 0, /*tp_hash */
2364 0, /*tp_call*/
2365 0, /*tp_str*/
2366 0, /*tp_getattro*/
2367 0, /*tp_setattro*/
2368 0, /*tp_as_buffer*/
2369 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2370 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2371 TextIOWrapper_doc, /* tp_doc */
2372 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2373 (inquiry)TextIOWrapper_clear, /* tp_clear */
2374 0, /* tp_richcompare */
2375 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2376 0, /* tp_iter */
2377 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2378 TextIOWrapper_methods, /* tp_methods */
2379 TextIOWrapper_members, /* tp_members */
2380 TextIOWrapper_getset, /* tp_getset */
2381 0, /* tp_base */
2382 0, /* tp_dict */
2383 0, /* tp_descr_get */
2384 0, /* tp_descr_set */
2385 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2386 (initproc)TextIOWrapper_init, /* tp_init */
2387 0, /* tp_alloc */
2388 PyType_GenericNew, /* tp_new */
2389};