blob: c70b1dd55b4fa194554649b7508863ad4aeb36de [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(TextIOBase_read_doc,
32 "Read at most n characters from stream.\n"
33 "\n"
34 "Read from underlying buffer until we have n characters or we hit EOF.\n"
35 "If n is negative or omitted, read until EOF.\n"
36 );
37
38static PyObject *
39TextIOBase_read(PyObject *self, PyObject *args)
40{
41 return _unsupported("read");
42}
43
44PyDoc_STRVAR(TextIOBase_readline_doc,
45 "Read until newline or EOF.\n"
46 "\n"
47 "Returns an empty string if EOF is hit immediately.\n"
48 );
49
50static PyObject *
51TextIOBase_readline(PyObject *self, PyObject *args)
52{
53 return _unsupported("readline");
54}
55
56PyDoc_STRVAR(TextIOBase_write_doc,
57 "Write string to stream.\n"
58 "Returns the number of characters written (which is always equal to\n"
59 "the length of the string).\n"
60 );
61
62static PyObject *
63TextIOBase_write(PyObject *self, PyObject *args)
64{
65 return _unsupported("write");
66}
67
68PyDoc_STRVAR(TextIOBase_encoding_doc,
69 "Encoding of the text stream.\n"
70 "\n"
71 "Subclasses should override.\n"
72 );
73
74static PyObject *
75TextIOBase_encoding_get(PyObject *self, void *context)
76{
77 Py_RETURN_NONE;
78}
79
80PyDoc_STRVAR(TextIOBase_newlines_doc,
81 "Line endings translated so far.\n"
82 "\n"
83 "Only line endings translated during reading are considered.\n"
84 "\n"
85 "Subclasses should override.\n"
86 );
87
88static PyObject *
89TextIOBase_newlines_get(PyObject *self, void *context)
90{
91 Py_RETURN_NONE;
92}
93
94
95static PyMethodDef TextIOBase_methods[] = {
96 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
97 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
98 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
99 {NULL, NULL}
100};
101
102static PyGetSetDef TextIOBase_getset[] = {
103 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
104 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
105 {0}
106};
107
108PyTypeObject PyTextIOBase_Type = {
109 PyVarObject_HEAD_INIT(NULL, 0)
110 "_io._TextIOBase", /*tp_name*/
111 0, /*tp_basicsize*/
112 0, /*tp_itemsize*/
113 0, /*tp_dealloc*/
114 0, /*tp_print*/
115 0, /*tp_getattr*/
116 0, /*tp_setattr*/
117 0, /*tp_compare */
118 0, /*tp_repr*/
119 0, /*tp_as_number*/
120 0, /*tp_as_sequence*/
121 0, /*tp_as_mapping*/
122 0, /*tp_hash */
123 0, /*tp_call*/
124 0, /*tp_str*/
125 0, /*tp_getattro*/
126 0, /*tp_setattro*/
127 0, /*tp_as_buffer*/
128 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
129 TextIOBase_doc, /* tp_doc */
130 0, /* tp_traverse */
131 0, /* tp_clear */
132 0, /* tp_richcompare */
133 0, /* tp_weaklistoffset */
134 0, /* tp_iter */
135 0, /* tp_iternext */
136 TextIOBase_methods, /* tp_methods */
137 0, /* tp_members */
138 TextIOBase_getset, /* tp_getset */
139 &PyIOBase_Type, /* tp_base */
140 0, /* tp_dict */
141 0, /* tp_descr_get */
142 0, /* tp_descr_set */
143 0, /* tp_dictoffset */
144 0, /* tp_init */
145 0, /* tp_alloc */
146 0, /* tp_new */
147};
148
149
150/* IncrementalNewlineDecoder */
151
152PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
153 "Codec used when reading a file in universal newlines mode. It wraps\n"
154 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
155 "records the types of newlines encountered. When used with\n"
156 "translate=False, it ensures that the newline sequence is returned in\n"
157 "one piece. When used with decoder=None, it expects unicode strings as\n"
158 "decode input and translates newlines without first invoking an external\n"
159 "decoder.\n"
160 );
161
162typedef struct {
163 PyObject_HEAD
164 PyObject *decoder;
165 PyObject *errors;
166 int pendingcr:1;
167 int translate:1;
168 unsigned int seennl:3;
169} PyNewLineDecoderObject;
170
171static int
172IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
173 PyObject *args, PyObject *kwds)
174{
175 PyObject *decoder;
176 int translate;
177 PyObject *errors = NULL;
178 char *kwlist[] = {"decoder", "translate", "errors", NULL};
179
180 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
181 kwlist, &decoder, &translate, &errors))
182 return -1;
183
184 self->decoder = decoder;
185 Py_INCREF(decoder);
186
187 if (errors == NULL) {
188 self->errors = PyUnicode_FromString("strict");
189 if (self->errors == NULL)
190 return -1;
191 }
192 else {
193 Py_INCREF(errors);
194 self->errors = errors;
195 }
196
197 self->translate = translate;
198 self->seennl = 0;
199 self->pendingcr = 0;
200
201 return 0;
202}
203
204static void
205IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
206{
207 Py_CLEAR(self->decoder);
208 Py_CLEAR(self->errors);
209 Py_TYPE(self)->tp_free((PyObject *)self);
210}
211
212#define SEEN_CR 1
213#define SEEN_LF 2
214#define SEEN_CRLF 4
215#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
216
217PyObject *
218_PyIncrementalNewlineDecoder_decode(PyObject *_self,
219 PyObject *input, int final)
220{
221 PyObject *output;
222 Py_ssize_t output_len;
223 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
224
225 if (self->decoder == NULL) {
226 PyErr_SetString(PyExc_ValueError,
227 "IncrementalNewlineDecoder.__init__ not called");
228 return NULL;
229 }
230
231 /* decode input (with the eventual \r from a previous pass) */
232 if (self->decoder != Py_None) {
233 output = PyObject_CallMethodObjArgs(self->decoder,
234 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
235 }
236 else {
237 output = input;
238 Py_INCREF(output);
239 }
240
241 if (output == NULL)
242 return NULL;
243
244 if (!PyUnicode_Check(output)) {
245 PyErr_SetString(PyExc_TypeError,
246 "decoder should return a string result");
247 goto error;
248 }
249
250 output_len = PyUnicode_GET_SIZE(output);
251 if (self->pendingcr && (final || output_len > 0)) {
252 Py_UNICODE *out;
253 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
254 if (modified == NULL)
255 goto error;
256 out = PyUnicode_AS_UNICODE(modified);
257 out[0] = '\r';
258 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
259 output_len * sizeof(Py_UNICODE));
260 Py_DECREF(output);
261 output = modified;
262 self->pendingcr = 0;
263 output_len++;
264 }
265
266 /* retain last \r even when not translating data:
267 * then readline() is sure to get \r\n in one pass
268 */
269 if (!final) {
270 if (output_len > 0
271 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
272
273 if (Py_REFCNT(output) == 1) {
274 if (PyUnicode_Resize(&output, output_len - 1) < 0)
275 goto error;
276 }
277 else {
278 PyObject *modified = PyUnicode_FromUnicode(
279 PyUnicode_AS_UNICODE(output),
280 output_len - 1);
281 if (modified == NULL)
282 goto error;
283 Py_DECREF(output);
284 output = modified;
285 }
286 self->pendingcr = 1;
287 }
288 }
289
290 /* Record which newlines are read and do newline translation if desired,
291 all in one pass. */
292 {
293 Py_UNICODE *in_str;
294 Py_ssize_t len;
295 int seennl = self->seennl;
296 int only_lf = 0;
297
298 in_str = PyUnicode_AS_UNICODE(output);
299 len = PyUnicode_GET_SIZE(output);
300
301 if (len == 0)
302 return output;
303
304 /* If, up to now, newlines are consistently \n, do a quick check
305 for the \r *byte* with the libc's optimized memchr.
306 */
307 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000308 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 }
310
Antoine Pitrou66913e22009-03-06 23:40:56 +0000311 if (only_lf) {
312 /* If not already seen, quick scan for a possible "\n" character.
313 (there's nothing else to be done, even when in translation mode)
314 */
315 if (seennl == 0 &&
316 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
317 Py_UNICODE *s, *end;
318 s = in_str;
319 end = in_str + len;
320 for (;;) {
321 Py_UNICODE c;
322 /* Fast loop for non-control characters */
323 while (*s > '\n')
324 s++;
325 c = *s++;
326 if (c == '\n') {
327 seennl |= SEEN_LF;
328 break;
329 }
330 if (s > end)
331 break;
332 }
333 }
334 /* Finished: we have scanned for newlines, and none of them
335 need translating */
336 }
337 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000339 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (seennl == SEEN_ALL)
341 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 s = in_str;
343 end = in_str + len;
344 for (;;) {
345 Py_UNICODE c;
346 /* Fast loop for non-control characters */
347 while (*s > '\r')
348 s++;
349 c = *s++;
350 if (c == '\n')
351 seennl |= SEEN_LF;
352 else if (c == '\r') {
353 if (*s == '\n') {
354 seennl |= SEEN_CRLF;
355 s++;
356 }
357 else
358 seennl |= SEEN_CR;
359 }
360 if (s > end)
361 break;
362 if (seennl == SEEN_ALL)
363 break;
364 }
365 endscan:
366 ;
367 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000368 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 PyObject *translated = NULL;
370 Py_UNICODE *out_str;
371 Py_UNICODE *in, *out, *end;
372 if (Py_REFCNT(output) != 1) {
373 /* We could try to optimize this so that we only do a copy
374 when there is something to translate. On the other hand,
375 most decoders should only output non-shared strings, i.e.
376 translation is done in place. */
377 translated = PyUnicode_FromUnicode(NULL, len);
378 if (translated == NULL)
379 goto error;
380 assert(Py_REFCNT(translated) == 1);
381 memcpy(PyUnicode_AS_UNICODE(translated),
382 PyUnicode_AS_UNICODE(output),
383 len * sizeof(Py_UNICODE));
384 }
385 else {
386 translated = output;
387 }
388 out_str = PyUnicode_AS_UNICODE(translated);
389 in = in_str;
390 out = out_str;
391 end = in_str + len;
392 for (;;) {
393 Py_UNICODE c;
394 /* Fast loop for non-control characters */
395 while ((c = *in++) > '\r')
396 *out++ = c;
397 if (c == '\n') {
398 *out++ = c;
399 seennl |= SEEN_LF;
400 continue;
401 }
402 if (c == '\r') {
403 if (*in == '\n') {
404 in++;
405 seennl |= SEEN_CRLF;
406 }
407 else
408 seennl |= SEEN_CR;
409 *out++ = '\n';
410 continue;
411 }
412 if (in > end)
413 break;
414 *out++ = c;
415 }
416 if (translated != output) {
417 Py_DECREF(output);
418 output = translated;
419 }
420 if (out - out_str != len) {
421 if (PyUnicode_Resize(&output, out - out_str) < 0)
422 goto error;
423 }
424 }
425 self->seennl |= seennl;
426 }
427
428 return output;
429
430 error:
431 Py_DECREF(output);
432 return NULL;
433}
434
435static PyObject *
436IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
437 PyObject *args, PyObject *kwds)
438{
439 char *kwlist[] = {"input", "final", NULL};
440 PyObject *input;
441 int final = 0;
442
443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
444 kwlist, &input, &final))
445 return NULL;
446 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
447}
448
449static PyObject *
450IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
451{
452 PyObject *buffer;
453 unsigned PY_LONG_LONG flag;
454
455 if (self->decoder != Py_None) {
456 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
457 _PyIO_str_getstate, NULL);
458 if (state == NULL)
459 return NULL;
460 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
461 Py_DECREF(state);
462 return NULL;
463 }
464 Py_INCREF(buffer);
465 Py_DECREF(state);
466 }
467 else {
468 buffer = PyBytes_FromString("");
469 flag = 0;
470 }
471 flag <<= 1;
472 if (self->pendingcr)
473 flag |= 1;
474 return Py_BuildValue("NK", buffer, flag);
475}
476
477static PyObject *
478IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
479{
480 PyObject *buffer;
481 unsigned PY_LONG_LONG flag;
482
483 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
484 return NULL;
485
486 self->pendingcr = (int) flag & 1;
487 flag >>= 1;
488
489 if (self->decoder != Py_None)
490 return PyObject_CallMethod(self->decoder,
491 "setstate", "((OK))", buffer, flag);
492 else
493 Py_RETURN_NONE;
494}
495
496static PyObject *
497IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
498{
499 self->seennl = 0;
500 self->pendingcr = 0;
501 if (self->decoder != Py_None)
502 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
503 else
504 Py_RETURN_NONE;
505}
506
507static PyObject *
508IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
509{
510 switch (self->seennl) {
511 case SEEN_CR:
512 return PyUnicode_FromString("\r");
513 case SEEN_LF:
514 return PyUnicode_FromString("\n");
515 case SEEN_CRLF:
516 return PyUnicode_FromString("\r\n");
517 case SEEN_CR | SEEN_LF:
518 return Py_BuildValue("ss", "\r", "\n");
519 case SEEN_CR | SEEN_CRLF:
520 return Py_BuildValue("ss", "\r", "\r\n");
521 case SEEN_LF | SEEN_CRLF:
522 return Py_BuildValue("ss", "\n", "\r\n");
523 case SEEN_CR | SEEN_LF | SEEN_CRLF:
524 return Py_BuildValue("sss", "\r", "\n", "\r\n");
525 default:
526 Py_RETURN_NONE;
527 }
528
529}
530
531
532static PyMethodDef IncrementalNewlineDecoder_methods[] = {
533 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
534 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
535 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
536 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
537 {0}
538};
539
540static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
541 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
542 {0}
543};
544
545PyTypeObject PyIncrementalNewlineDecoder_Type = {
546 PyVarObject_HEAD_INIT(NULL, 0)
547 "_io.IncrementalNewlineDecoder", /*tp_name*/
548 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
549 0, /*tp_itemsize*/
550 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
551 0, /*tp_print*/
552 0, /*tp_getattr*/
553 0, /*tp_setattr*/
554 0, /*tp_compare */
555 0, /*tp_repr*/
556 0, /*tp_as_number*/
557 0, /*tp_as_sequence*/
558 0, /*tp_as_mapping*/
559 0, /*tp_hash */
560 0, /*tp_call*/
561 0, /*tp_str*/
562 0, /*tp_getattro*/
563 0, /*tp_setattro*/
564 0, /*tp_as_buffer*/
565 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
566 IncrementalNewlineDecoder_doc, /* tp_doc */
567 0, /* tp_traverse */
568 0, /* tp_clear */
569 0, /* tp_richcompare */
570 0, /*tp_weaklistoffset*/
571 0, /* tp_iter */
572 0, /* tp_iternext */
573 IncrementalNewlineDecoder_methods, /* tp_methods */
574 0, /* tp_members */
575 IncrementalNewlineDecoder_getset, /* tp_getset */
576 0, /* tp_base */
577 0, /* tp_dict */
578 0, /* tp_descr_get */
579 0, /* tp_descr_set */
580 0, /* tp_dictoffset */
581 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
582 0, /* tp_alloc */
583 PyType_GenericNew, /* tp_new */
584};
585
586
587/* TextIOWrapper */
588
589PyDoc_STRVAR(TextIOWrapper_doc,
590 "Character and line based layer over a BufferedIOBase object, buffer.\n"
591 "\n"
592 "encoding gives the name of the encoding that the stream will be\n"
593 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
594 "\n"
595 "errors determines the strictness of encoding and decoding (see the\n"
596 "codecs.register) and defaults to \"strict\".\n"
597 "\n"
598 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
599 "handling of line endings. If it is None, universal newlines is\n"
600 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
601 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
602 "caller. Conversely, on output, '\\n' is translated to the system\n"
603 "default line seperator, os.linesep. If newline is any other of its\n"
604 "legal values, that newline becomes the newline when the file is read\n"
605 "and it is returned untranslated. On output, '\\n' is converted to the\n"
606 "newline.\n"
607 "\n"
608 "If line_buffering is True, a call to flush is implied when a call to\n"
609 "write contains a newline character."
610 );
611
612typedef PyObject *
613 (*encodefunc_t)(PyObject *, PyObject *);
614
615typedef struct
616{
617 PyObject_HEAD
618 int ok; /* initialized? */
619 Py_ssize_t chunk_size;
620 PyObject *buffer;
621 PyObject *encoding;
622 PyObject *encoder;
623 PyObject *decoder;
624 PyObject *readnl;
625 PyObject *errors;
626 const char *writenl; /* utf-8 encoded, NULL stands for \n */
627 char line_buffering;
628 char readuniversal;
629 char readtranslate;
630 char writetranslate;
631 char seekable;
632 char telling;
633 /* Specialized encoding func (see below) */
634 encodefunc_t encodefunc;
635
636 /* Reads and writes are internally buffered in order to speed things up.
637 However, any read will first flush the write buffer if itsn't empty.
638
639 Please also note that text to be written is first encoded before being
640 buffered. This is necessary so that encoding errors are immediately
641 reported to the caller, but it unfortunately means that the
642 IncrementalEncoder (whose encode() method is always written in Python)
643 becomes a bottleneck for small writes.
644 */
645 PyObject *decoded_chars; /* buffer for text returned from decoder */
646 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
647 PyObject *pending_bytes; /* list of bytes objects waiting to be
648 written, or NULL */
649 Py_ssize_t pending_bytes_count;
650 PyObject *snapshot;
651 /* snapshot is either None, or a tuple (dec_flags, next_input) where
652 * dec_flags is the second (integer) item of the decoder state and
653 * next_input is the chunk of input bytes that comes next after the
654 * snapshot point. We use this to reconstruct decoder states in tell().
655 */
656
657 /* Cache raw object if it's a FileIO object */
658 PyObject *raw;
659
660 PyObject *weakreflist;
661 PyObject *dict;
662} PyTextIOWrapperObject;
663
664
665/* A couple of specialized cases in order to bypass the slow incremental
666 encoding methods for the most popular encodings. */
667
668static PyObject *
669ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
670{
671 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
672 PyUnicode_GET_SIZE(text),
673 PyBytes_AS_STRING(self->errors));
674}
675
676static PyObject *
677utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
678{
679 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
680 PyUnicode_GET_SIZE(text),
681 PyBytes_AS_STRING(self->errors), 1);
682}
683
684static PyObject *
685utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
686{
687 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
688 PyUnicode_GET_SIZE(text),
689 PyBytes_AS_STRING(self->errors), -1);
690}
691
692static PyObject *
693utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
694{
695 PyObject *res;
696 res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
697 PyUnicode_GET_SIZE(text),
698 PyBytes_AS_STRING(self->errors), 0);
699 if (res == NULL)
700 return NULL;
701 /* Next writes will skip the BOM and use native byte ordering */
702#if defined(WORDS_BIGENDIAN)
703 self->encodefunc = (encodefunc_t) utf16be_encode;
704#else
705 self->encodefunc = (encodefunc_t) utf16le_encode;
706#endif
707 return res;
708}
709
710
711static PyObject *
712utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
713{
714 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
715 PyUnicode_GET_SIZE(text),
716 PyBytes_AS_STRING(self->errors));
717}
718
719static PyObject *
720latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
721{
722 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
723 PyUnicode_GET_SIZE(text),
724 PyBytes_AS_STRING(self->errors));
725}
726
727/* Map normalized encoding names onto the specialized encoding funcs */
728
729typedef struct {
730 const char *name;
731 encodefunc_t encodefunc;
732} encodefuncentry;
733
734encodefuncentry encodefuncs[] = {
735 {"ascii", (encodefunc_t) ascii_encode},
736 {"iso8859-1", (encodefunc_t) latin1_encode},
737 {"utf-16-be", (encodefunc_t) utf16be_encode},
738 {"utf-16-le", (encodefunc_t) utf16le_encode},
739 {"utf-16", (encodefunc_t) utf16_encode},
740 {"utf-8", (encodefunc_t) utf8_encode},
741 {NULL, NULL}
742};
743
744
745static int
746TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
747{
748 char *kwlist[] = {"buffer", "encoding", "errors",
749 "newline", "line_buffering",
750 NULL};
751 PyObject *buffer, *raw;
752 char *encoding = NULL;
753 char *errors = NULL;
754 char *newline = NULL;
755 int line_buffering = 0;
756 _PyIO_State *state = IO_STATE;
757
758 PyObject *res;
759 int r;
760
761 self->ok = 0;
762 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
763 kwlist, &buffer, &encoding, &errors,
764 &newline, &line_buffering))
765 return -1;
766
767 if (newline && newline[0] != '\0'
768 && !(newline[0] == '\n' && newline[1] == '\0')
769 && !(newline[0] == '\r' && newline[1] == '\0')
770 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
771 PyErr_Format(PyExc_ValueError,
772 "illegal newline value: %s", newline);
773 return -1;
774 }
775
776 Py_CLEAR(self->buffer);
777 Py_CLEAR(self->encoding);
778 Py_CLEAR(self->encoder);
779 Py_CLEAR(self->decoder);
780 Py_CLEAR(self->readnl);
781 Py_CLEAR(self->decoded_chars);
782 Py_CLEAR(self->pending_bytes);
783 Py_CLEAR(self->snapshot);
784 Py_CLEAR(self->errors);
785 Py_CLEAR(self->raw);
786 self->decoded_chars_used = 0;
787 self->pending_bytes_count = 0;
788 self->encodefunc = NULL;
789
790 if (encoding == NULL) {
791 /* Try os.device_encoding(fileno) */
792 PyObject *fileno;
793 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
794 /* Ignore only AttributeError and UnsupportedOperation */
795 if (fileno == NULL) {
796 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
797 PyErr_ExceptionMatches(state->unsupported_operation)) {
798 PyErr_Clear();
799 }
800 else {
801 goto error;
802 }
803 }
804 else {
805 self->encoding = PyObject_CallMethod(state->os_module,
806 "device_encoding",
807 "N", fileno);
808 if (self->encoding == NULL)
809 goto error;
810 else if (!PyUnicode_Check(self->encoding))
811 Py_CLEAR(self->encoding);
812 }
813 }
814 if (encoding == NULL && self->encoding == NULL) {
815 if (state->locale_module == NULL) {
816 state->locale_module = PyImport_ImportModule("locale");
817 if (state->locale_module == NULL)
818 goto catch_ImportError;
819 else
820 goto use_locale;
821 }
822 else {
823 use_locale:
824 self->encoding = PyObject_CallMethod(
825 state->locale_module, "getpreferredencoding", NULL);
826 if (self->encoding == NULL) {
827 catch_ImportError:
828 /*
829 Importing locale can raise a ImportError because of
830 _functools, and locale.getpreferredencoding can raise a
831 ImportError if _locale is not available. These will happen
832 during module building.
833 */
834 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
835 PyErr_Clear();
836 self->encoding = PyUnicode_FromString("ascii");
837 }
838 else
839 goto error;
840 }
841 else if (!PyUnicode_Check(self->encoding))
842 Py_CLEAR(self->encoding);
843 }
844 }
845 if (self->encoding != NULL)
846 encoding = _PyUnicode_AsString(self->encoding);
847 else if (encoding != NULL) {
848 self->encoding = PyUnicode_FromString(encoding);
849 if (self->encoding == NULL)
850 goto error;
851 }
852 else {
853 PyErr_SetString(PyExc_IOError,
854 "could not determine default encoding");
855 }
856
857 if (errors == NULL)
858 errors = "strict";
859 self->errors = PyBytes_FromString(errors);
860 if (self->errors == NULL)
861 goto error;
862
863 self->chunk_size = 8192;
864 self->readuniversal = (newline == NULL || newline[0] == '\0');
865 self->line_buffering = line_buffering;
866 self->readtranslate = (newline == NULL);
867 if (newline) {
868 self->readnl = PyUnicode_FromString(newline);
869 if (self->readnl == NULL)
870 return -1;
871 }
872 self->writetranslate = (newline == NULL || newline[0] != '\0');
873 if (!self->readuniversal && self->readnl) {
874 self->writenl = _PyUnicode_AsString(self->readnl);
875 if (!strcmp(self->writenl, "\n"))
876 self->writenl = NULL;
877 }
878#ifdef MS_WINDOWS
879 else
880 self->writenl = "\r\n";
881#endif
882
883 /* Build the decoder object */
884 res = PyObject_CallMethod(buffer, "readable", NULL);
885 if (res == NULL)
886 goto error;
887 r = PyObject_IsTrue(res);
888 Py_DECREF(res);
889 if (r == -1)
890 goto error;
891 if (r == 1) {
892 self->decoder = PyCodec_IncrementalDecoder(
893 encoding, errors);
894 if (self->decoder == NULL)
895 goto error;
896
897 if (self->readuniversal) {
898 PyObject *incrementalDecoder = PyObject_CallFunction(
899 (PyObject *)&PyIncrementalNewlineDecoder_Type,
900 "Oi", self->decoder, (int)self->readtranslate);
901 if (incrementalDecoder == NULL)
902 goto error;
903 Py_CLEAR(self->decoder);
904 self->decoder = incrementalDecoder;
905 }
906 }
907
908 /* Build the encoder object */
909 res = PyObject_CallMethod(buffer, "writable", NULL);
910 if (res == NULL)
911 goto error;
912 r = PyObject_IsTrue(res);
913 Py_DECREF(res);
914 if (r == -1)
915 goto error;
916 if (r == 1) {
917 PyObject *ci;
918 self->encoder = PyCodec_IncrementalEncoder(
919 encoding, errors);
920 if (self->encoder == NULL)
921 goto error;
922 /* Get the normalized named of the codec */
923 ci = _PyCodec_Lookup(encoding);
924 if (ci == NULL)
925 goto error;
926 res = PyObject_GetAttrString(ci, "name");
927 Py_DECREF(ci);
928 if (res == NULL)
929 PyErr_Clear();
930 else if (PyUnicode_Check(res)) {
931 encodefuncentry *e = encodefuncs;
932 while (e->name != NULL) {
933 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
934 self->encodefunc = e->encodefunc;
935 break;
936 }
937 e++;
938 }
939 }
940 Py_XDECREF(res);
941 }
942
943 self->buffer = buffer;
944 Py_INCREF(buffer);
945
946 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
947 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
948 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
949 raw = PyObject_GetAttrString(buffer, "raw");
950 /* Cache the raw FileIO object to speed up 'closed' checks */
951 if (raw == NULL)
952 PyErr_Clear();
953 else if (Py_TYPE(raw) == &PyFileIO_Type)
954 self->raw = raw;
955 else
956 Py_DECREF(raw);
957 }
958
959 res = PyObject_CallMethod(buffer, "seekable", NULL);
960 if (res == NULL)
961 goto error;
962 self->seekable = self->telling = PyObject_IsTrue(res);
963 Py_DECREF(res);
964
965 self->ok = 1;
966 return 0;
967
968 error:
969 return -1;
970}
971
972static int
973_TextIOWrapper_clear(PyTextIOWrapperObject *self)
974{
975 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
976 return -1;
977 self->ok = 0;
978 Py_CLEAR(self->buffer);
979 Py_CLEAR(self->encoding);
980 Py_CLEAR(self->encoder);
981 Py_CLEAR(self->decoder);
982 Py_CLEAR(self->readnl);
983 Py_CLEAR(self->decoded_chars);
984 Py_CLEAR(self->pending_bytes);
985 Py_CLEAR(self->snapshot);
986 Py_CLEAR(self->errors);
987 Py_CLEAR(self->raw);
988 return 0;
989}
990
991static void
992TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
993{
994 if (_TextIOWrapper_clear(self) < 0)
995 return;
996 _PyObject_GC_UNTRACK(self);
997 if (self->weakreflist != NULL)
998 PyObject_ClearWeakRefs((PyObject *)self);
999 Py_CLEAR(self->dict);
1000 Py_TYPE(self)->tp_free((PyObject *)self);
1001}
1002
1003static int
1004TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1005{
1006 Py_VISIT(self->buffer);
1007 Py_VISIT(self->encoding);
1008 Py_VISIT(self->encoder);
1009 Py_VISIT(self->decoder);
1010 Py_VISIT(self->readnl);
1011 Py_VISIT(self->decoded_chars);
1012 Py_VISIT(self->pending_bytes);
1013 Py_VISIT(self->snapshot);
1014 Py_VISIT(self->errors);
1015 Py_VISIT(self->raw);
1016
1017 Py_VISIT(self->dict);
1018 return 0;
1019}
1020
1021static int
1022TextIOWrapper_clear(PyTextIOWrapperObject *self)
1023{
1024 if (_TextIOWrapper_clear(self) < 0)
1025 return -1;
1026 Py_CLEAR(self->dict);
1027 return 0;
1028}
1029
1030static PyObject *
1031TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1032
1033/* This macro takes some shortcuts to make the common case faster. */
1034#define CHECK_CLOSED(self) \
1035 do { \
1036 int r; \
1037 PyObject *_res; \
1038 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1039 if (self->raw != NULL) \
1040 r = _PyFileIO_closed(self->raw); \
1041 else { \
1042 _res = TextIOWrapper_closed_get(self, NULL); \
1043 if (_res == NULL) \
1044 return NULL; \
1045 r = PyObject_IsTrue(_res); \
1046 Py_DECREF(_res); \
1047 if (r < 0) \
1048 return NULL; \
1049 } \
1050 if (r > 0) { \
1051 PyErr_SetString(PyExc_ValueError, \
1052 "I/O operation on closed file."); \
1053 return NULL; \
1054 } \
1055 } \
1056 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1057 return NULL; \
1058 } while (0)
1059
1060#define CHECK_INITIALIZED(self) \
1061 if (self->ok <= 0) { \
1062 PyErr_SetString(PyExc_ValueError, \
1063 "I/O operation on uninitialized object"); \
1064 return NULL; \
1065 }
1066
1067#define CHECK_INITIALIZED_INT(self) \
1068 if (self->ok <= 0) { \
1069 PyErr_SetString(PyExc_ValueError, \
1070 "I/O operation on uninitialized object"); \
1071 return -1; \
1072 }
1073
1074
1075Py_LOCAL_INLINE(const Py_UNICODE *)
1076findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1077{
1078 /* like wcschr, but doesn't stop at NULL characters */
1079 while (size-- > 0) {
1080 if (*s == ch)
1081 return s;
1082 s++;
1083 }
1084 return NULL;
1085}
1086
1087/* Flush the internal write buffer. This doesn't explicitly flush the
1088 underlying buffered object, though. */
1089static int
1090_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1091{
1092 PyObject *b, *ret;
1093
1094 if (self->pending_bytes == NULL)
1095 return 0;
1096 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1097 if (b == NULL)
1098 return -1;
1099 ret = PyObject_CallMethodObjArgs(self->buffer,
1100 _PyIO_str_write, b, NULL);
1101 Py_DECREF(b);
1102 if (ret == NULL)
1103 return -1;
1104 Py_DECREF(ret);
1105 Py_CLEAR(self->pending_bytes);
1106 self->pending_bytes_count = 0;
1107 return 0;
1108}
1109
1110static PyObject *
1111TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1112{
1113 PyObject *ret;
1114 PyObject *text; /* owned reference */
1115 PyObject *b;
1116 Py_ssize_t textlen;
1117 int haslf = 0;
1118 int needflush = 0;
1119
1120 CHECK_INITIALIZED(self);
1121
1122 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1123 return NULL;
1124 }
1125
1126 CHECK_CLOSED(self);
1127
1128 Py_INCREF(text);
1129
1130 textlen = PyUnicode_GetSize(text);
1131
1132 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1133 if (findchar(PyUnicode_AS_UNICODE(text),
1134 PyUnicode_GET_SIZE(text), '\n'))
1135 haslf = 1;
1136
1137 if (haslf && self->writetranslate && self->writenl != NULL) {
1138 PyObject *newtext = PyObject_CallMethod(
1139 text, "replace", "ss", "\n", self->writenl);
1140 Py_DECREF(text);
1141 if (newtext == NULL)
1142 return NULL;
1143 text = newtext;
1144 }
1145
1146 if (self->line_buffering &&
1147 (haslf ||
1148 findchar(PyUnicode_AS_UNICODE(text),
1149 PyUnicode_GET_SIZE(text), '\r')))
1150 needflush = 1;
1151
1152 /* XXX What if we were just reading? */
1153 if (self->encodefunc != NULL)
1154 b = (*self->encodefunc)((PyObject *) self, text);
1155 else
1156 b = PyObject_CallMethodObjArgs(self->encoder,
1157 _PyIO_str_encode, text, NULL);
1158 Py_DECREF(text);
1159 if (b == NULL)
1160 return NULL;
1161
1162 if (self->pending_bytes == NULL) {
1163 self->pending_bytes = PyList_New(0);
1164 if (self->pending_bytes == NULL) {
1165 Py_DECREF(b);
1166 return NULL;
1167 }
1168 self->pending_bytes_count = 0;
1169 }
1170 if (PyList_Append(self->pending_bytes, b) < 0) {
1171 Py_DECREF(b);
1172 return NULL;
1173 }
1174 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1175 Py_DECREF(b);
1176 if (self->pending_bytes_count > self->chunk_size || needflush) {
1177 if (_TextIOWrapper_writeflush(self) < 0)
1178 return NULL;
1179 }
1180
1181 if (needflush) {
1182 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1183 if (ret == NULL)
1184 return NULL;
1185 Py_DECREF(ret);
1186 }
1187
1188 Py_CLEAR(self->snapshot);
1189
1190 if (self->decoder) {
1191 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1192 if (ret == NULL)
1193 return NULL;
1194 Py_DECREF(ret);
1195 }
1196
1197 return PyLong_FromSsize_t(textlen);
1198}
1199
1200/* Steal a reference to chars and store it in the decoded_char buffer;
1201 */
1202static void
1203TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1204{
1205 Py_CLEAR(self->decoded_chars);
1206 self->decoded_chars = chars;
1207 self->decoded_chars_used = 0;
1208}
1209
1210static PyObject *
1211TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1212{
1213 PyObject *chars;
1214 Py_ssize_t avail;
1215
1216 if (self->decoded_chars == NULL)
1217 return PyUnicode_FromStringAndSize(NULL, 0);
1218
1219 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1220 - self->decoded_chars_used);
1221
1222 assert(avail >= 0);
1223
1224 if (n < 0 || n > avail)
1225 n = avail;
1226
1227 if (self->decoded_chars_used > 0 || n < avail) {
1228 chars = PyUnicode_FromUnicode(
1229 PyUnicode_AS_UNICODE(self->decoded_chars)
1230 + self->decoded_chars_used, n);
1231 if (chars == NULL)
1232 return NULL;
1233 }
1234 else {
1235 chars = self->decoded_chars;
1236 Py_INCREF(chars);
1237 }
1238
1239 self->decoded_chars_used += n;
1240 return chars;
1241}
1242
1243/* Read and decode the next chunk of data from the BufferedReader.
1244 */
1245static int
1246TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1247{
1248 PyObject *dec_buffer = NULL;
1249 PyObject *dec_flags = NULL;
1250 PyObject *input_chunk = NULL;
1251 PyObject *decoded_chars, *chunk_size;
1252 int eof;
1253
1254 /* The return value is True unless EOF was reached. The decoded string is
1255 * placed in self._decoded_chars (replacing its previous value). The
1256 * entire input chunk is sent to the decoder, though some of it may remain
1257 * buffered in the decoder, yet to be converted.
1258 */
1259
1260 if (self->decoder == NULL) {
1261 PyErr_SetString(PyExc_ValueError, "no decoder");
1262 return -1;
1263 }
1264
1265 if (self->telling) {
1266 /* To prepare for tell(), we need to snapshot a point in the file
1267 * where the decoder's input buffer is empty.
1268 */
1269
1270 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1271 _PyIO_str_getstate, NULL);
1272 if (state == NULL)
1273 return -1;
1274 /* Given this, we know there was a valid snapshot point
1275 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1276 */
1277 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1278 Py_DECREF(state);
1279 return -1;
1280 }
1281 Py_INCREF(dec_buffer);
1282 Py_INCREF(dec_flags);
1283 Py_DECREF(state);
1284 }
1285
1286 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1287 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1288 if (chunk_size == NULL)
1289 goto fail;
1290 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1291 _PyIO_str_read1, chunk_size, NULL);
1292 Py_DECREF(chunk_size);
1293 if (input_chunk == NULL)
1294 goto fail;
1295 assert(PyBytes_Check(input_chunk));
1296
1297 eof = (PyBytes_Size(input_chunk) == 0);
1298
1299 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1300 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1301 self->decoder, input_chunk, eof);
1302 }
1303 else {
1304 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1305 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1306 }
1307
1308 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1309 if (decoded_chars == NULL)
1310 goto fail;
1311 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1312 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1313 eof = 0;
1314
1315 if (self->telling) {
1316 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1317 * next input to be decoded is dec_buffer + input_chunk.
1318 */
1319 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1320 if (next_input == NULL)
1321 goto fail;
1322 assert (PyBytes_Check(next_input));
1323 Py_DECREF(dec_buffer);
1324 Py_CLEAR(self->snapshot);
1325 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1326 }
1327 Py_DECREF(input_chunk);
1328
1329 return (eof == 0);
1330
1331 fail:
1332 Py_XDECREF(dec_buffer);
1333 Py_XDECREF(dec_flags);
1334 Py_XDECREF(input_chunk);
1335 return -1;
1336}
1337
1338static PyObject *
1339TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1340{
1341 Py_ssize_t n = -1;
1342 PyObject *result = NULL, *chunks = NULL;
1343
1344 CHECK_INITIALIZED(self);
1345
1346 if (!PyArg_ParseTuple(args, "|n:read", &n))
1347 return NULL;
1348
1349 CHECK_CLOSED(self);
1350
1351 if (_TextIOWrapper_writeflush(self) < 0)
1352 return NULL;
1353
1354 if (n < 0) {
1355 /* Read everything */
1356 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1357 PyObject *decoded;
1358 if (bytes == NULL)
1359 goto fail;
1360 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1361 bytes, Py_True, NULL);
1362 Py_DECREF(bytes);
1363 if (decoded == NULL)
1364 goto fail;
1365
1366 result = TextIOWrapper_get_decoded_chars(self, -1);
1367
1368 if (result == NULL) {
1369 Py_DECREF(decoded);
1370 return NULL;
1371 }
1372
1373 PyUnicode_AppendAndDel(&result, decoded);
1374 if (result == NULL)
1375 goto fail;
1376
1377 Py_CLEAR(self->snapshot);
1378 return result;
1379 }
1380 else {
1381 int res = 1;
1382 Py_ssize_t remaining = n;
1383
1384 result = TextIOWrapper_get_decoded_chars(self, n);
1385 if (result == NULL)
1386 goto fail;
1387 remaining -= PyUnicode_GET_SIZE(result);
1388
1389 /* Keep reading chunks until we have n characters to return */
1390 while (remaining > 0) {
1391 res = TextIOWrapper_read_chunk(self);
1392 if (res < 0)
1393 goto fail;
1394 if (res == 0) /* EOF */
1395 break;
1396 if (chunks == NULL) {
1397 chunks = PyList_New(0);
1398 if (chunks == NULL)
1399 goto fail;
1400 }
1401 if (PyList_Append(chunks, result) < 0)
1402 goto fail;
1403 Py_DECREF(result);
1404 result = TextIOWrapper_get_decoded_chars(self, remaining);
1405 if (result == NULL)
1406 goto fail;
1407 remaining -= PyUnicode_GET_SIZE(result);
1408 }
1409 if (chunks != NULL) {
1410 if (result != NULL && PyList_Append(chunks, result) < 0)
1411 goto fail;
1412 Py_CLEAR(result);
1413 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1414 if (result == NULL)
1415 goto fail;
1416 Py_CLEAR(chunks);
1417 }
1418 return result;
1419 }
1420 fail:
1421 Py_XDECREF(result);
1422 Py_XDECREF(chunks);
1423 return NULL;
1424}
1425
1426
1427/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1428 that is to the NUL character. Otherwise the function will produce
1429 incorrect results. */
1430static Py_UNICODE *
1431find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1432{
1433 Py_UNICODE *s = start;
1434 for (;;) {
1435 while (*s > ch)
1436 s++;
1437 if (*s == ch)
1438 return s;
1439 if (s == end)
1440 return NULL;
1441 s++;
1442 }
1443}
1444
1445Py_ssize_t
1446_PyIO_find_line_ending(
1447 int translated, int universal, PyObject *readnl,
1448 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1449{
1450 Py_ssize_t len = end - start;
1451
1452 if (translated) {
1453 /* Newlines are already translated, only search for \n */
1454 Py_UNICODE *pos = find_control_char(start, end, '\n');
1455 if (pos != NULL)
1456 return pos - start + 1;
1457 else {
1458 *consumed = len;
1459 return -1;
1460 }
1461 }
1462 else if (universal) {
1463 /* Universal newline search. Find any of \r, \r\n, \n
1464 * The decoder ensures that \r\n are not split in two pieces
1465 */
1466 Py_UNICODE *s = start;
1467 for (;;) {
1468 Py_UNICODE ch;
1469 /* Fast path for non-control chars. The loop always ends
1470 since the Py_UNICODE storage is NUL-terminated. */
1471 while (*s > '\r')
1472 s++;
1473 if (s >= end) {
1474 *consumed = len;
1475 return -1;
1476 }
1477 ch = *s++;
1478 if (ch == '\n')
1479 return s - start;
1480 if (ch == '\r') {
1481 if (*s == '\n')
1482 return s - start + 1;
1483 else
1484 return s - start;
1485 }
1486 }
1487 }
1488 else {
1489 /* Non-universal mode. */
1490 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1491 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1492 if (readnl_len == 1) {
1493 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1494 if (pos != NULL)
1495 return pos - start + 1;
1496 *consumed = len;
1497 return -1;
1498 }
1499 else {
1500 Py_UNICODE *s = start;
1501 Py_UNICODE *e = end - readnl_len + 1;
1502 Py_UNICODE *pos;
1503 if (e < s)
1504 e = s;
1505 while (s < e) {
1506 Py_ssize_t i;
1507 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1508 if (pos == NULL || pos >= e)
1509 break;
1510 for (i = 1; i < readnl_len; i++) {
1511 if (pos[i] != nl[i])
1512 break;
1513 }
1514 if (i == readnl_len)
1515 return pos - start + readnl_len;
1516 s = pos + 1;
1517 }
1518 pos = find_control_char(e, end, nl[0]);
1519 if (pos == NULL)
1520 *consumed = len;
1521 else
1522 *consumed = pos - start;
1523 return -1;
1524 }
1525 }
1526}
1527
1528static PyObject *
1529_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1530{
1531 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1532 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1533 int res;
1534
1535 CHECK_CLOSED(self);
1536
1537 if (_TextIOWrapper_writeflush(self) < 0)
1538 return NULL;
1539
1540 chunked = 0;
1541
1542 while (1) {
1543 Py_UNICODE *ptr;
1544 Py_ssize_t line_len;
1545 Py_ssize_t consumed = 0;
1546
1547 /* First, get some data if necessary */
1548 res = 1;
1549 while (!self->decoded_chars ||
1550 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1551 res = TextIOWrapper_read_chunk(self);
1552 if (res < 0)
1553 goto error;
1554 if (res == 0)
1555 break;
1556 }
1557 if (res == 0) {
1558 /* end of file */
1559 TextIOWrapper_set_decoded_chars(self, NULL);
1560 Py_CLEAR(self->snapshot);
1561 start = endpos = offset_to_buffer = 0;
1562 break;
1563 }
1564
1565 if (remaining == NULL) {
1566 line = self->decoded_chars;
1567 start = self->decoded_chars_used;
1568 offset_to_buffer = 0;
1569 Py_INCREF(line);
1570 }
1571 else {
1572 assert(self->decoded_chars_used == 0);
1573 line = PyUnicode_Concat(remaining, self->decoded_chars);
1574 start = 0;
1575 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1576 Py_CLEAR(remaining);
1577 if (line == NULL)
1578 goto error;
1579 }
1580
1581 ptr = PyUnicode_AS_UNICODE(line);
1582 line_len = PyUnicode_GET_SIZE(line);
1583
1584 endpos = _PyIO_find_line_ending(
1585 self->readtranslate, self->readuniversal, self->readnl,
1586 ptr + start, ptr + line_len, &consumed);
1587 if (endpos >= 0) {
1588 endpos += start;
1589 if (limit >= 0 && (endpos - start) + chunked >= limit)
1590 endpos = start + limit - chunked;
1591 break;
1592 }
1593
1594 /* We can put aside up to `endpos` */
1595 endpos = consumed + start;
1596 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1597 /* Didn't find line ending, but reached length limit */
1598 endpos = start + limit - chunked;
1599 break;
1600 }
1601
1602 if (endpos > start) {
1603 /* No line ending seen yet - put aside current data */
1604 PyObject *s;
1605 if (chunks == NULL) {
1606 chunks = PyList_New(0);
1607 if (chunks == NULL)
1608 goto error;
1609 }
1610 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1611 if (s == NULL)
1612 goto error;
1613 if (PyList_Append(chunks, s) < 0) {
1614 Py_DECREF(s);
1615 goto error;
1616 }
1617 chunked += PyUnicode_GET_SIZE(s);
1618 Py_DECREF(s);
1619 }
1620 /* There may be some remaining bytes we'll have to prepend to the
1621 next chunk of data */
1622 if (endpos < line_len) {
1623 remaining = PyUnicode_FromUnicode(
1624 ptr + endpos, line_len - endpos);
1625 if (remaining == NULL)
1626 goto error;
1627 }
1628 Py_CLEAR(line);
1629 /* We have consumed the buffer */
1630 TextIOWrapper_set_decoded_chars(self, NULL);
1631 }
1632
1633 if (line != NULL) {
1634 /* Our line ends in the current buffer */
1635 self->decoded_chars_used = endpos - offset_to_buffer;
1636 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1637 if (start == 0 && Py_REFCNT(line) == 1) {
1638 if (PyUnicode_Resize(&line, endpos) < 0)
1639 goto error;
1640 }
1641 else {
1642 PyObject *s = PyUnicode_FromUnicode(
1643 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1644 Py_CLEAR(line);
1645 if (s == NULL)
1646 goto error;
1647 line = s;
1648 }
1649 }
1650 }
1651 if (remaining != NULL) {
1652 if (chunks == NULL) {
1653 chunks = PyList_New(0);
1654 if (chunks == NULL)
1655 goto error;
1656 }
1657 if (PyList_Append(chunks, remaining) < 0)
1658 goto error;
1659 Py_CLEAR(remaining);
1660 }
1661 if (chunks != NULL) {
1662 if (line != NULL && PyList_Append(chunks, line) < 0)
1663 goto error;
1664 Py_CLEAR(line);
1665 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1666 if (line == NULL)
1667 goto error;
1668 Py_DECREF(chunks);
1669 }
1670 if (line == NULL)
1671 line = PyUnicode_FromStringAndSize(NULL, 0);
1672
1673 return line;
1674
1675 error:
1676 Py_XDECREF(chunks);
1677 Py_XDECREF(remaining);
1678 Py_XDECREF(line);
1679 return NULL;
1680}
1681
1682static PyObject *
1683TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1684{
1685 Py_ssize_t limit = -1;
1686
1687 CHECK_INITIALIZED(self);
1688 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1689 return NULL;
1690 }
1691 return _TextIOWrapper_readline(self, limit);
1692}
1693
1694/* Seek and Tell */
1695
1696typedef struct {
1697 Py_off_t start_pos;
1698 int dec_flags;
1699 int bytes_to_feed;
1700 int chars_to_skip;
1701 char need_eof;
1702} CookieStruct;
1703
1704/*
1705 To speed up cookie packing/unpacking, we store the fields in a temporary
1706 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1707 The following macros define at which offsets in the intermediary byte
1708 string the various CookieStruct fields will be stored.
1709 */
1710
1711#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1712
1713#if defined(WORDS_BIGENDIAN)
1714
1715# define IS_LITTLE_ENDIAN 0
1716
1717/* We want the least significant byte of start_pos to also be the least
1718 significant byte of the cookie, which means that in big-endian mode we
1719 must copy the fields in reverse order. */
1720
1721# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1722# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1723# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1724# define OFF_CHARS_TO_SKIP (sizeof(char))
1725# define OFF_NEED_EOF 0
1726
1727#else
1728
1729# define IS_LITTLE_ENDIAN 1
1730
1731/* Little-endian mode: the least significant byte of start_pos will
1732 naturally end up the least significant byte of the cookie. */
1733
1734# define OFF_START_POS 0
1735# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1736# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1737# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1738# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1739
1740#endif
1741
1742static int
1743TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1744{
1745 unsigned char buffer[COOKIE_BUF_LEN];
1746 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1747 if (cookieLong == NULL)
1748 return -1;
1749
1750 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1751 IS_LITTLE_ENDIAN, 0) < 0) {
1752 Py_DECREF(cookieLong);
1753 return -1;
1754 }
1755 Py_DECREF(cookieLong);
1756
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001757 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1758 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1759 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1760 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1761 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762
1763 return 0;
1764}
1765
1766static PyObject *
1767TextIOWrapper_buildCookie(CookieStruct *cookie)
1768{
1769 unsigned char buffer[COOKIE_BUF_LEN];
1770
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001771 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1772 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1773 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1774 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1775 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776
1777 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1778}
1779#undef IS_LITTLE_ENDIAN
1780
1781static int
1782_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1783 CookieStruct *cookie)
1784{
1785 PyObject *res;
1786 /* When seeking to the start of the stream, we call decoder.reset()
1787 rather than decoder.getstate().
1788 This is for a few decoders such as utf-16 for which the state value
1789 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1790 utf-16, that we are expecting a BOM).
1791 */
1792 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1793 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1794 else
1795 res = PyObject_CallMethod(self->decoder, "setstate",
1796 "((yi))", "", cookie->dec_flags);
1797 if (res == NULL)
1798 return -1;
1799 Py_DECREF(res);
1800 return 0;
1801}
1802
1803static PyObject *
1804TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1805{
1806 PyObject *cookieObj, *posobj;
1807 CookieStruct cookie;
1808 int whence = 0;
1809 static PyObject *zero = NULL;
1810 PyObject *res;
1811 int cmp;
1812
1813 CHECK_INITIALIZED(self);
1814
1815 if (zero == NULL) {
1816 zero = PyLong_FromLong(0L);
1817 if (zero == NULL)
1818 return NULL;
1819 }
1820
1821 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1822 return NULL;
1823 CHECK_CLOSED(self);
1824
1825 Py_INCREF(cookieObj);
1826
1827 if (!self->seekable) {
1828 PyErr_SetString(PyExc_IOError,
1829 "underlying stream is not seekable");
1830 goto fail;
1831 }
1832
1833 if (whence == 1) {
1834 /* seek relative to current position */
1835 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1836 if (cmp < 0)
1837 goto fail;
1838
1839 if (cmp == 0) {
1840 PyErr_SetString(PyExc_IOError,
1841 "can't do nonzero cur-relative seeks");
1842 goto fail;
1843 }
1844
1845 /* Seeking to the current position should attempt to
1846 * sync the underlying buffer with the current position.
1847 */
1848 Py_DECREF(cookieObj);
1849 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1850 if (cookieObj == NULL)
1851 goto fail;
1852 }
1853 else if (whence == 2) {
1854 /* seek relative to end of file */
1855
1856 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1857 if (cmp < 0)
1858 goto fail;
1859
1860 if (cmp == 0) {
1861 PyErr_SetString(PyExc_IOError,
1862 "can't do nonzero end-relative seeks");
1863 goto fail;
1864 }
1865
1866 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1867 if (res == NULL)
1868 goto fail;
1869 Py_DECREF(res);
1870
1871 TextIOWrapper_set_decoded_chars(self, NULL);
1872 Py_CLEAR(self->snapshot);
1873 if (self->decoder) {
1874 res = PyObject_CallMethod(self->decoder, "reset", NULL);
1875 if (res == NULL)
1876 goto fail;
1877 Py_DECREF(res);
1878 }
1879
1880 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
1881 Py_XDECREF(cookieObj);
1882 return res;
1883 }
1884 else if (whence != 0) {
1885 PyErr_Format(PyExc_ValueError,
1886 "invalid whence (%d, should be 0, 1 or 2)", whence);
1887 goto fail;
1888 }
1889
1890 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
1891 if (cmp < 0)
1892 goto fail;
1893
1894 if (cmp == 1) {
1895 PyErr_Format(PyExc_ValueError,
1896 "negative seek position %R", cookieObj);
1897 goto fail;
1898 }
1899
1900 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1901 if (res == NULL)
1902 goto fail;
1903 Py_DECREF(res);
1904
1905 /* The strategy of seek() is to go back to the safe start point
1906 * and replay the effect of read(chars_to_skip) from there.
1907 */
1908 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
1909 goto fail;
1910
1911 /* Seek back to the safe start point. */
1912 posobj = PyLong_FromOff_t(cookie.start_pos);
1913 if (posobj == NULL)
1914 goto fail;
1915 res = PyObject_CallMethodObjArgs(self->buffer,
1916 _PyIO_str_seek, posobj, NULL);
1917 Py_DECREF(posobj);
1918 if (res == NULL)
1919 goto fail;
1920 Py_DECREF(res);
1921
1922 TextIOWrapper_set_decoded_chars(self, NULL);
1923 Py_CLEAR(self->snapshot);
1924
1925 /* Restore the decoder to its state from the safe start point. */
1926 if (self->decoder) {
1927 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
1928 goto fail;
1929 }
1930
1931 if (cookie.chars_to_skip) {
1932 /* Just like _read_chunk, feed the decoder and save a snapshot. */
1933 PyObject *input_chunk = PyObject_CallMethod(
1934 self->buffer, "read", "i", cookie.bytes_to_feed);
1935 PyObject *decoded;
1936
1937 if (input_chunk == NULL)
1938 goto fail;
1939
1940 assert (PyBytes_Check(input_chunk));
1941
1942 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
1943 if (self->snapshot == NULL) {
1944 Py_DECREF(input_chunk);
1945 goto fail;
1946 }
1947
1948 decoded = PyObject_CallMethod(self->decoder, "decode",
1949 "Oi", input_chunk, (int)cookie.need_eof);
1950
1951 if (decoded == NULL)
1952 goto fail;
1953
1954 TextIOWrapper_set_decoded_chars(self, decoded);
1955
1956 /* Skip chars_to_skip of the decoded characters. */
1957 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
1958 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
1959 goto fail;
1960 }
1961 self->decoded_chars_used = cookie.chars_to_skip;
1962 }
1963 else {
1964 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
1965 if (self->snapshot == NULL)
1966 goto fail;
1967 }
1968
1969 return cookieObj;
1970 fail:
1971 Py_XDECREF(cookieObj);
1972 return NULL;
1973
1974}
1975
1976static PyObject *
1977TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
1978{
1979 PyObject *res;
1980 PyObject *posobj = NULL;
1981 CookieStruct cookie = {0,0,0,0,0};
1982 PyObject *next_input;
1983 Py_ssize_t chars_to_skip, chars_decoded;
1984 PyObject *saved_state = NULL;
1985 char *input, *input_end;
1986
1987 CHECK_INITIALIZED(self);
1988 CHECK_CLOSED(self);
1989
1990 if (!self->seekable) {
1991 PyErr_SetString(PyExc_IOError,
1992 "underlying stream is not seekable");
1993 goto fail;
1994 }
1995 if (!self->telling) {
1996 PyErr_SetString(PyExc_IOError,
1997 "telling position disabled by next() call");
1998 goto fail;
1999 }
2000
2001 if (_TextIOWrapper_writeflush(self) < 0)
2002 return NULL;
2003 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2004 if (res == NULL)
2005 goto fail;
2006 Py_DECREF(res);
2007
2008 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2009 if (posobj == NULL)
2010 goto fail;
2011
2012 if (self->decoder == NULL || self->snapshot == NULL) {
2013 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2014 return posobj;
2015 }
2016
2017#if defined(HAVE_LARGEFILE_SUPPORT)
2018 cookie.start_pos = PyLong_AsLongLong(posobj);
2019#else
2020 cookie.start_pos = PyLong_AsLong(posobj);
2021#endif
2022 if (PyErr_Occurred())
2023 goto fail;
2024
2025 /* Skip backward to the snapshot point (see _read_chunk). */
2026 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2027 goto fail;
2028
2029 assert (PyBytes_Check(next_input));
2030
2031 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2032
2033 /* How many decoded characters have been used up since the snapshot? */
2034 if (self->decoded_chars_used == 0) {
2035 /* We haven't moved from the snapshot point. */
2036 Py_DECREF(posobj);
2037 return TextIOWrapper_buildCookie(&cookie);
2038 }
2039
2040 chars_to_skip = self->decoded_chars_used;
2041
2042 /* Starting from the snapshot position, we will walk the decoder
2043 * forward until it gives us enough decoded characters.
2044 */
2045 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2046 _PyIO_str_getstate, NULL);
2047 if (saved_state == NULL)
2048 goto fail;
2049
2050 /* Note our initial start point. */
2051 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2052 goto fail;
2053
2054 /* Feed the decoder one byte at a time. As we go, note the
2055 * nearest "safe start point" before the current location
2056 * (a point where the decoder has nothing buffered, so seek()
2057 * can safely start from there and advance to this location).
2058 */
2059 chars_decoded = 0;
2060 input = PyBytes_AS_STRING(next_input);
2061 input_end = input + PyBytes_GET_SIZE(next_input);
2062 while (input < input_end) {
2063 PyObject *state;
2064 char *dec_buffer;
2065 Py_ssize_t dec_buffer_len;
2066 int dec_flags;
2067
2068 PyObject *decoded = PyObject_CallMethod(
2069 self->decoder, "decode", "y#", input, 1);
2070 if (decoded == NULL)
2071 goto fail;
2072 assert (PyUnicode_Check(decoded));
2073 chars_decoded += PyUnicode_GET_SIZE(decoded);
2074 Py_DECREF(decoded);
2075
2076 cookie.bytes_to_feed += 1;
2077
2078 state = PyObject_CallMethodObjArgs(self->decoder,
2079 _PyIO_str_getstate, NULL);
2080 if (state == NULL)
2081 goto fail;
2082 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2083 Py_DECREF(state);
2084 goto fail;
2085 }
2086 Py_DECREF(state);
2087
2088 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2089 /* Decoder buffer is empty, so this is a safe start point. */
2090 cookie.start_pos += cookie.bytes_to_feed;
2091 chars_to_skip -= chars_decoded;
2092 cookie.dec_flags = dec_flags;
2093 cookie.bytes_to_feed = 0;
2094 chars_decoded = 0;
2095 }
2096 if (chars_decoded >= chars_to_skip)
2097 break;
2098 input++;
2099 }
2100 if (input == input_end) {
2101 /* We didn't get enough decoded data; signal EOF to get more. */
2102 PyObject *decoded = PyObject_CallMethod(
2103 self->decoder, "decode", "yi", "", /* final = */ 1);
2104 if (decoded == NULL)
2105 goto fail;
2106 assert (PyUnicode_Check(decoded));
2107 chars_decoded += PyUnicode_GET_SIZE(decoded);
2108 Py_DECREF(decoded);
2109 cookie.need_eof = 1;
2110
2111 if (chars_decoded < chars_to_skip) {
2112 PyErr_SetString(PyExc_IOError,
2113 "can't reconstruct logical file position");
2114 goto fail;
2115 }
2116 }
2117
2118 /* finally */
2119 Py_XDECREF(posobj);
2120 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2121 Py_DECREF(saved_state);
2122 if (res == NULL)
2123 return NULL;
2124 Py_DECREF(res);
2125
2126 /* The returned cookie corresponds to the last safe start point. */
2127 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2128 return TextIOWrapper_buildCookie(&cookie);
2129
2130 fail:
2131 Py_XDECREF(posobj);
2132 if (saved_state) {
2133 PyObject *type, *value, *traceback;
2134 PyErr_Fetch(&type, &value, &traceback);
2135
2136 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2137 Py_DECREF(saved_state);
2138 if (res == NULL)
2139 return NULL;
2140 Py_DECREF(res);
2141
2142 PyErr_Restore(type, value, traceback);
2143 }
2144 return NULL;
2145}
2146
2147static PyObject *
2148TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2149{
2150 PyObject *pos = Py_None;
2151 PyObject *res;
2152
2153 CHECK_INITIALIZED(self)
2154 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2155 return NULL;
2156 }
2157
2158 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2159 if (res == NULL)
2160 return NULL;
2161 Py_DECREF(res);
2162
2163 if (pos != Py_None) {
2164 res = PyObject_CallMethodObjArgs((PyObject *) self,
2165 _PyIO_str_seek, pos, NULL);
2166 if (res == NULL)
2167 return NULL;
2168 Py_DECREF(res);
2169 }
2170
2171 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2172}
2173
2174/* Inquiries */
2175
2176static PyObject *
2177TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2178{
2179 CHECK_INITIALIZED(self);
2180 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2181}
2182
2183static PyObject *
2184TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2185{
2186 CHECK_INITIALIZED(self);
2187 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2188}
2189
2190static PyObject *
2191TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2192{
2193 CHECK_INITIALIZED(self);
2194 return PyObject_CallMethod(self->buffer, "readable", NULL);
2195}
2196
2197static PyObject *
2198TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2199{
2200 CHECK_INITIALIZED(self);
2201 return PyObject_CallMethod(self->buffer, "writable", NULL);
2202}
2203
2204static PyObject *
2205TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2206{
2207 CHECK_INITIALIZED(self);
2208 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2209}
2210
2211static PyObject *
2212TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2213{
2214 CHECK_INITIALIZED(self);
2215 CHECK_CLOSED(self);
2216 self->telling = self->seekable;
2217 if (_TextIOWrapper_writeflush(self) < 0)
2218 return NULL;
2219 return PyObject_CallMethod(self->buffer, "flush", NULL);
2220}
2221
2222static PyObject *
2223TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2224{
2225 PyObject *res;
2226 CHECK_INITIALIZED(self);
2227 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2228 if (res == NULL) {
2229 /* If flush() fails, just give up */
2230 PyErr_Clear();
2231 }
2232 else
2233 Py_DECREF(res);
2234
2235 return PyObject_CallMethod(self->buffer, "close", NULL);
2236}
2237
2238static PyObject *
2239TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2240{
2241 PyObject *line;
2242
2243 CHECK_INITIALIZED(self);
2244
2245 self->telling = 0;
2246 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2247 /* Skip method call overhead for speed */
2248 line = _TextIOWrapper_readline(self, -1);
2249 }
2250 else {
2251 line = PyObject_CallMethodObjArgs((PyObject *)self,
2252 _PyIO_str_readline, NULL);
2253 if (line && !PyUnicode_Check(line)) {
2254 PyErr_Format(PyExc_IOError,
2255 "readline() should have returned an str object, "
2256 "not '%.200s'", Py_TYPE(line)->tp_name);
2257 Py_DECREF(line);
2258 return NULL;
2259 }
2260 }
2261
2262 if (line == NULL)
2263 return NULL;
2264
2265 if (PyUnicode_GET_SIZE(line) == 0) {
2266 /* Reached EOF or would have blocked */
2267 Py_DECREF(line);
2268 Py_CLEAR(self->snapshot);
2269 self->telling = self->seekable;
2270 return NULL;
2271 }
2272
2273 return line;
2274}
2275
2276static PyObject *
2277TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2278{
2279 CHECK_INITIALIZED(self);
2280 return PyObject_GetAttrString(self->buffer, "name");
2281}
2282
2283static PyObject *
2284TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2285{
2286 CHECK_INITIALIZED(self);
2287 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2288}
2289
2290static PyObject *
2291TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2292{
2293 PyObject *res;
2294 CHECK_INITIALIZED(self);
2295 if (self->decoder == NULL)
2296 Py_RETURN_NONE;
2297 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2298 if (res == NULL) {
2299 PyErr_Clear();
2300 Py_RETURN_NONE;
2301 }
2302 return res;
2303}
2304
2305static PyObject *
2306TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2307{
2308 CHECK_INITIALIZED(self);
2309 return PyLong_FromSsize_t(self->chunk_size);
2310}
2311
2312static int
2313TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2314 PyObject *arg, void *context)
2315{
2316 Py_ssize_t n;
2317 CHECK_INITIALIZED_INT(self);
2318 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2319 if (n == -1 && PyErr_Occurred())
2320 return -1;
2321 if (n <= 0) {
2322 PyErr_SetString(PyExc_ValueError,
2323 "a strictly positive integer is required");
2324 return -1;
2325 }
2326 self->chunk_size = n;
2327 return 0;
2328}
2329
2330static PyMethodDef TextIOWrapper_methods[] = {
2331 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2332 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2333 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2334 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2335 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2336
2337 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2338 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2339 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2340 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2341 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2342
2343 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2344 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2345 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2346 {NULL, NULL}
2347};
2348
2349static PyMemberDef TextIOWrapper_members[] = {
2350 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2351 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2352 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2353 {NULL}
2354};
2355
2356static PyGetSetDef TextIOWrapper_getset[] = {
2357 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2358 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2359/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2360*/
2361 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2362 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2363 (setter)TextIOWrapper_chunk_size_set, NULL},
2364 {0}
2365};
2366
2367PyTypeObject PyTextIOWrapper_Type = {
2368 PyVarObject_HEAD_INIT(NULL, 0)
2369 "_io.TextIOWrapper", /*tp_name*/
2370 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2371 0, /*tp_itemsize*/
2372 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2373 0, /*tp_print*/
2374 0, /*tp_getattr*/
2375 0, /*tp_setattr*/
2376 0, /*tp_compare */
2377 0, /*tp_repr*/
2378 0, /*tp_as_number*/
2379 0, /*tp_as_sequence*/
2380 0, /*tp_as_mapping*/
2381 0, /*tp_hash */
2382 0, /*tp_call*/
2383 0, /*tp_str*/
2384 0, /*tp_getattro*/
2385 0, /*tp_setattro*/
2386 0, /*tp_as_buffer*/
2387 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2388 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2389 TextIOWrapper_doc, /* tp_doc */
2390 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2391 (inquiry)TextIOWrapper_clear, /* tp_clear */
2392 0, /* tp_richcompare */
2393 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2394 0, /* tp_iter */
2395 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2396 TextIOWrapper_methods, /* tp_methods */
2397 TextIOWrapper_members, /* tp_members */
2398 TextIOWrapper_getset, /* tp_getset */
2399 0, /* tp_base */
2400 0, /* tp_dict */
2401 0, /* tp_descr_get */
2402 0, /* tp_descr_set */
2403 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2404 (initproc)TextIOWrapper_init, /* tp_init */
2405 0, /* tp_alloc */
2406 PyType_GenericNew, /* tp_new */
2407};