blob: bb4c5efae8b5c353444befbf356cd7944cce4974 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116 Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou2a466582009-09-21 21:17:48 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Antoine Pitrou19690592009-06-12 20:14:08 +0000196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
Serhiy Storchaka354d50e2013-02-03 17:10:42 +0200239static int
240check_decoded(PyObject *decoded)
241{
242 if (decoded == NULL)
243 return -1;
244 if (!PyUnicode_Check(decoded)) {
245 PyErr_Format(PyExc_TypeError,
246 "decoder should return a string result, not '%.200s'",
247 Py_TYPE(decoded)->tp_name);
248 Py_DECREF(decoded);
249 return -1;
250 }
251 return 0;
252}
253
Antoine Pitrou19690592009-06-12 20:14:08 +0000254#define SEEN_CR 1
255#define SEEN_LF 2
256#define SEEN_CRLF 4
257#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258
259PyObject *
260_PyIncrementalNewlineDecoder_decode(PyObject *_self,
261 PyObject *input, int final)
262{
263 PyObject *output;
264 Py_ssize_t output_len;
265 nldecoder_object *self = (nldecoder_object *) _self;
266
267 if (self->decoder == NULL) {
268 PyErr_SetString(PyExc_ValueError,
269 "IncrementalNewlineDecoder.__init__ not called");
270 return NULL;
271 }
272
273 /* decode input (with the eventual \r from a previous pass) */
274 if (self->decoder != Py_None) {
275 output = PyObject_CallMethodObjArgs(self->decoder,
276 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277 }
278 else {
279 output = input;
280 Py_INCREF(output);
281 }
282
Serhiy Storchaka354d50e2013-02-03 17:10:42 +0200283 if (check_decoded(output) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +0000284 return NULL;
285
Antoine Pitrou19690592009-06-12 20:14:08 +0000286 output_len = PyUnicode_GET_SIZE(output);
287 if (self->pendingcr && (final || output_len > 0)) {
288 Py_UNICODE *out;
289 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290 if (modified == NULL)
291 goto error;
292 out = PyUnicode_AS_UNICODE(modified);
293 out[0] = '\r';
294 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295 output_len * sizeof(Py_UNICODE));
296 Py_DECREF(output);
297 output = modified;
298 self->pendingcr = 0;
299 output_len++;
300 }
301
302 /* retain last \r even when not translating data:
303 * then readline() is sure to get \r\n in one pass
304 */
305 if (!final) {
306 if (output_len > 0
307 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308
309 if (Py_REFCNT(output) == 1) {
310 if (PyUnicode_Resize(&output, output_len - 1) < 0)
311 goto error;
312 }
313 else {
314 PyObject *modified = PyUnicode_FromUnicode(
315 PyUnicode_AS_UNICODE(output),
316 output_len - 1);
317 if (modified == NULL)
318 goto error;
319 Py_DECREF(output);
320 output = modified;
321 }
322 self->pendingcr = 1;
323 }
324 }
325
326 /* Record which newlines are read and do newline translation if desired,
327 all in one pass. */
328 {
329 Py_UNICODE *in_str;
330 Py_ssize_t len;
331 int seennl = self->seennl;
332 int only_lf = 0;
333
334 in_str = PyUnicode_AS_UNICODE(output);
335 len = PyUnicode_GET_SIZE(output);
336
337 if (len == 0)
338 return output;
339
340 /* If, up to now, newlines are consistently \n, do a quick check
341 for the \r *byte* with the libc's optimized memchr.
342 */
343 if (seennl == SEEN_LF || seennl == 0) {
344 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
345 }
346
347 if (only_lf) {
348 /* If not already seen, quick scan for a possible "\n" character.
349 (there's nothing else to be done, even when in translation mode)
350 */
351 if (seennl == 0 &&
352 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353 Py_UNICODE *s, *end;
354 s = in_str;
355 end = in_str + len;
356 for (;;) {
357 Py_UNICODE c;
358 /* Fast loop for non-control characters */
359 while (*s > '\n')
360 s++;
361 c = *s++;
362 if (c == '\n') {
363 seennl |= SEEN_LF;
364 break;
365 }
366 if (s > end)
367 break;
368 }
369 }
370 /* Finished: we have scanned for newlines, and none of them
371 need translating */
372 }
373 else if (!self->translate) {
374 Py_UNICODE *s, *end;
375 /* We have already seen all newline types, no need to scan again */
376 if (seennl == SEEN_ALL)
377 goto endscan;
378 s = in_str;
379 end = in_str + len;
380 for (;;) {
381 Py_UNICODE c;
382 /* Fast loop for non-control characters */
383 while (*s > '\r')
384 s++;
385 c = *s++;
386 if (c == '\n')
387 seennl |= SEEN_LF;
388 else if (c == '\r') {
389 if (*s == '\n') {
390 seennl |= SEEN_CRLF;
391 s++;
392 }
393 else
394 seennl |= SEEN_CR;
395 }
396 if (s > end)
397 break;
398 if (seennl == SEEN_ALL)
399 break;
400 }
401 endscan:
402 ;
403 }
404 else {
405 PyObject *translated = NULL;
406 Py_UNICODE *out_str;
407 Py_UNICODE *in, *out, *end;
408 if (Py_REFCNT(output) != 1) {
409 /* We could try to optimize this so that we only do a copy
410 when there is something to translate. On the other hand,
411 most decoders should only output non-shared strings, i.e.
412 translation is done in place. */
413 translated = PyUnicode_FromUnicode(NULL, len);
414 if (translated == NULL)
415 goto error;
416 assert(Py_REFCNT(translated) == 1);
417 memcpy(PyUnicode_AS_UNICODE(translated),
418 PyUnicode_AS_UNICODE(output),
419 len * sizeof(Py_UNICODE));
420 }
421 else {
422 translated = output;
423 }
424 out_str = PyUnicode_AS_UNICODE(translated);
425 in = in_str;
426 out = out_str;
427 end = in_str + len;
428 for (;;) {
429 Py_UNICODE c;
430 /* Fast loop for non-control characters */
431 while ((c = *in++) > '\r')
432 *out++ = c;
433 if (c == '\n') {
434 *out++ = c;
435 seennl |= SEEN_LF;
436 continue;
437 }
438 if (c == '\r') {
439 if (*in == '\n') {
440 in++;
441 seennl |= SEEN_CRLF;
442 }
443 else
444 seennl |= SEEN_CR;
445 *out++ = '\n';
446 continue;
447 }
448 if (in > end)
449 break;
450 *out++ = c;
451 }
452 if (translated != output) {
453 Py_DECREF(output);
454 output = translated;
455 }
456 if (out - out_str != len) {
457 if (PyUnicode_Resize(&output, out - out_str) < 0)
458 goto error;
459 }
460 }
461 self->seennl |= seennl;
462 }
463
464 return output;
465
466 error:
467 Py_DECREF(output);
468 return NULL;
469}
470
471static PyObject *
472incrementalnewlinedecoder_decode(nldecoder_object *self,
473 PyObject *args, PyObject *kwds)
474{
475 char *kwlist[] = {"input", "final", NULL};
476 PyObject *input;
477 int final = 0;
478
479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 kwlist, &input, &final))
481 return NULL;
482 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483}
484
485static PyObject *
486incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
487{
488 PyObject *buffer;
489 unsigned PY_LONG_LONG flag;
490
491 if (self->decoder != Py_None) {
492 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 _PyIO_str_getstate, NULL);
494 if (state == NULL)
495 return NULL;
496 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 Py_DECREF(state);
498 return NULL;
499 }
500 Py_INCREF(buffer);
501 Py_DECREF(state);
502 }
503 else {
504 buffer = PyBytes_FromString("");
505 flag = 0;
506 }
507 flag <<= 1;
508 if (self->pendingcr)
509 flag |= 1;
510 return Py_BuildValue("NK", buffer, flag);
511}
512
513static PyObject *
514incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
515{
516 PyObject *buffer;
517 unsigned PY_LONG_LONG flag;
518
519 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 return NULL;
521
522 self->pendingcr = (int) flag & 1;
523 flag >>= 1;
524
525 if (self->decoder != Py_None)
526 return PyObject_CallMethod(self->decoder,
527 "setstate", "((OK))", buffer, flag);
528 else
529 Py_RETURN_NONE;
530}
531
532static PyObject *
533incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
534{
535 self->seennl = 0;
536 self->pendingcr = 0;
537 if (self->decoder != Py_None)
538 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 else
540 Py_RETURN_NONE;
541}
542
543static PyObject *
544incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
545{
546 switch (self->seennl) {
547 case SEEN_CR:
548 return PyUnicode_FromString("\r");
549 case SEEN_LF:
550 return PyUnicode_FromString("\n");
551 case SEEN_CRLF:
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR | SEEN_LF:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR | SEEN_CRLF:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF | SEEN_CRLF:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 default:
562 Py_RETURN_NONE;
563 }
564
565}
566
567
568static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
573 {NULL}
574};
575
576static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
578 {NULL}
579};
580
581PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 PyVarObject_HEAD_INIT(NULL, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
584 sizeof(nldecoder_object), /*tp_basicsize*/
585 0, /*tp_itemsize*/
586 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
587 0, /*tp_print*/
588 0, /*tp_getattr*/
589 0, /*tp_setattr*/
590 0, /*tp_compare */
591 0, /*tp_repr*/
592 0, /*tp_as_number*/
593 0, /*tp_as_sequence*/
594 0, /*tp_as_mapping*/
595 0, /*tp_hash */
596 0, /*tp_call*/
597 0, /*tp_str*/
598 0, /*tp_getattro*/
599 0, /*tp_setattro*/
600 0, /*tp_as_buffer*/
601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
602 incrementalnewlinedecoder_doc, /* tp_doc */
603 0, /* tp_traverse */
604 0, /* tp_clear */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
607 0, /* tp_iter */
608 0, /* tp_iternext */
609 incrementalnewlinedecoder_methods, /* tp_methods */
610 0, /* tp_members */
611 incrementalnewlinedecoder_getset, /* tp_getset */
612 0, /* tp_base */
613 0, /* tp_dict */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
617 (initproc)incrementalnewlinedecoder_init, /* tp_init */
618 0, /* tp_alloc */
619 PyType_GenericNew, /* tp_new */
620};
621
622
623/* TextIOWrapper */
624
625PyDoc_STRVAR(textiowrapper_doc,
626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 "\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 "\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
633 "\n"
Antoine Pitrou76370f42012-08-04 00:55:38 +0200634 "newline controls how line endings are handled. It can be None, '',\n"
635 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
636 "\n"
637 "* On input, if newline is None, universal newlines mode is\n"
638 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 " these are translated into '\\n' before being returned to the\n"
640 " caller. If it is '', universal newline mode is enabled, but line\n"
641 " endings are returned to the caller untranslated. If it has any of\n"
642 " the other legal values, input lines are only terminated by the given\n"
643 " string, and the line ending is returned to the caller untranslated.\n"
644 "\n"
645 "* On output, if newline is None, any '\\n' characters written are\n"
646 " translated to the system default line separator, os.linesep. If\n"
647 " newline is '', no translation takes place. If newline is any of the\n"
648 " other legal values, any '\\n' characters written are translated to\n"
649 " the given string.\n"
Antoine Pitrou19690592009-06-12 20:14:08 +0000650 "\n"
651 "If line_buffering is True, a call to flush is implied when a call to\n"
652 "write contains a newline character."
653 );
654
655typedef PyObject *
656 (*encodefunc_t)(PyObject *, PyObject *);
657
658typedef struct
659{
660 PyObject_HEAD
661 int ok; /* initialized? */
662 int detached;
663 Py_ssize_t chunk_size;
664 PyObject *buffer;
665 PyObject *encoding;
666 PyObject *encoder;
667 PyObject *decoder;
668 PyObject *readnl;
669 PyObject *errors;
670 const char *writenl; /* utf-8 encoded, NULL stands for \n */
671 char line_buffering;
672 char readuniversal;
673 char readtranslate;
674 char writetranslate;
675 char seekable;
676 char telling;
677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc;
679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream;
681
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
684
685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
690 */
691 PyObject *decoded_chars; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 PyObject *pending_bytes; /* list of bytes objects waiting to be
694 written, or NULL */
695 Py_ssize_t pending_bytes_count;
696 PyObject *snapshot;
697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
701 */
702
703 /* Cache raw object if it's a FileIO object */
704 PyObject *raw;
705
706 PyObject *weakreflist;
707 PyObject *dict;
708} textio;
709
710
711/* A couple of specialized cases in order to bypass the slow incremental
712 encoding methods for the most popular encodings. */
713
714static PyObject *
715ascii_encode(textio *self, PyObject *text)
716{
717 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors));
720}
721
722static PyObject *
723utf16be_encode(textio *self, PyObject *text)
724{
725 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
726 PyUnicode_GET_SIZE(text),
727 PyBytes_AS_STRING(self->errors), 1);
728}
729
730static PyObject *
731utf16le_encode(textio *self, PyObject *text)
732{
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), -1);
736}
737
738static PyObject *
739utf16_encode(textio *self, PyObject *text)
740{
741 if (!self->encoding_start_of_stream) {
742 /* Skip the BOM and use native byte ordering */
743#if defined(WORDS_BIGENDIAN)
744 return utf16be_encode(self, text);
745#else
746 return utf16le_encode(self, text);
747#endif
748 }
749 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), 0);
752}
753
754static PyObject *
755utf32be_encode(textio *self, PyObject *text)
756{
757 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
758 PyUnicode_GET_SIZE(text),
759 PyBytes_AS_STRING(self->errors), 1);
760}
761
762static PyObject *
763utf32le_encode(textio *self, PyObject *text)
764{
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), -1);
768}
769
770static PyObject *
771utf32_encode(textio *self, PyObject *text)
772{
773 if (!self->encoding_start_of_stream) {
774 /* Skip the BOM and use native byte ordering */
775#if defined(WORDS_BIGENDIAN)
776 return utf32be_encode(self, text);
777#else
778 return utf32le_encode(self, text);
779#endif
780 }
781 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors), 0);
784}
785
786static PyObject *
787utf8_encode(textio *self, PyObject *text)
788{
789 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
790 PyUnicode_GET_SIZE(text),
791 PyBytes_AS_STRING(self->errors));
792}
793
794static PyObject *
795latin1_encode(textio *self, PyObject *text)
796{
797 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
798 PyUnicode_GET_SIZE(text),
799 PyBytes_AS_STRING(self->errors));
800}
801
802/* Map normalized encoding names onto the specialized encoding funcs */
803
804typedef struct {
805 const char *name;
806 encodefunc_t encodefunc;
807} encodefuncentry;
808
809static encodefuncentry encodefuncs[] = {
810 {"ascii", (encodefunc_t) ascii_encode},
811 {"iso8859-1", (encodefunc_t) latin1_encode},
812 {"utf-8", (encodefunc_t) utf8_encode},
813 {"utf-16-be", (encodefunc_t) utf16be_encode},
814 {"utf-16-le", (encodefunc_t) utf16le_encode},
815 {"utf-16", (encodefunc_t) utf16_encode},
816 {"utf-32-be", (encodefunc_t) utf32be_encode},
817 {"utf-32-le", (encodefunc_t) utf32le_encode},
818 {"utf-32", (encodefunc_t) utf32_encode},
819 {NULL, NULL}
820};
821
822
823static int
824textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
825{
826 char *kwlist[] = {"buffer", "encoding", "errors",
827 "newline", "line_buffering",
828 NULL};
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300829 PyObject *buffer, *raw, *codec_info = NULL;
Antoine Pitrou19690592009-06-12 20:14:08 +0000830 char *encoding = NULL;
831 char *errors = NULL;
832 char *newline = NULL;
833 int line_buffering = 0;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
839 self->detached = 0;
840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
841 kwlist, &buffer, &encoding, &errors,
842 &newline, &line_buffering))
843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
867 self->writenl = NULL;
868
869 if (encoding == NULL && self->encoding == NULL) {
870 if (_PyIO_locale_module == NULL) {
871 _PyIO_locale_module = PyImport_ImportModule("locale");
872 if (_PyIO_locale_module == NULL)
873 goto catch_ImportError;
874 else
875 goto use_locale;
876 }
877 else {
878 use_locale:
879 self->encoding = PyObject_CallMethod(
880 _PyIO_locale_module, "getpreferredencoding", NULL);
881 if (self->encoding == NULL) {
882 catch_ImportError:
883 /*
884 Importing locale can raise a ImportError because of
885 _functools, and locale.getpreferredencoding can raise a
886 ImportError if _locale is not available. These will happen
887 during module building.
888 */
889 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
890 PyErr_Clear();
891 self->encoding = PyString_FromString("ascii");
892 }
893 else
894 goto error;
895 }
896 else if (!PyString_Check(self->encoding))
897 Py_CLEAR(self->encoding);
898 }
899 }
900 if (self->encoding != NULL)
901 encoding = PyString_AsString(self->encoding);
902 else if (encoding != NULL) {
903 self->encoding = PyString_FromString(encoding);
904 if (self->encoding == NULL)
905 goto error;
906 }
907 else {
908 PyErr_SetString(PyExc_IOError,
909 "could not determine default encoding");
910 }
911
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300912 /* Check we have been asked for a real text encoding */
913 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
914 if (codec_info == NULL) {
915 Py_CLEAR(self->encoding);
916 goto error;
917 }
918
919 /* XXX: Failures beyond this point have the potential to leak elements
920 * of the partially constructed object (like self->encoding)
921 */
922
Antoine Pitrou19690592009-06-12 20:14:08 +0000923 if (errors == NULL)
924 errors = "strict";
925 self->errors = PyBytes_FromString(errors);
926 if (self->errors == NULL)
927 goto error;
928
929 self->chunk_size = 8192;
930 self->readuniversal = (newline == NULL || newline[0] == '\0');
931 self->line_buffering = line_buffering;
932 self->readtranslate = (newline == NULL);
933 if (newline) {
934 self->readnl = PyString_FromString(newline);
935 if (self->readnl == NULL)
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300936 goto error;
Antoine Pitrou19690592009-06-12 20:14:08 +0000937 }
938 self->writetranslate = (newline == NULL || newline[0] != '\0');
939 if (!self->readuniversal && self->writetranslate) {
940 self->writenl = PyString_AsString(self->readnl);
941 if (!strcmp(self->writenl, "\n"))
942 self->writenl = NULL;
943 }
944#ifdef MS_WINDOWS
945 else
946 self->writenl = "\r\n";
947#endif
948
949 /* Build the decoder object */
950 res = PyObject_CallMethod(buffer, "readable", NULL);
951 if (res == NULL)
952 goto error;
953 r = PyObject_IsTrue(res);
954 Py_DECREF(res);
955 if (r == -1)
956 goto error;
957 if (r == 1) {
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300958 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
959 errors);
Antoine Pitrou19690592009-06-12 20:14:08 +0000960 if (self->decoder == NULL)
961 goto error;
962
963 if (self->readuniversal) {
964 PyObject *incrementalDecoder = PyObject_CallFunction(
965 (PyObject *)&PyIncrementalNewlineDecoder_Type,
966 "Oi", self->decoder, (int)self->readtranslate);
967 if (incrementalDecoder == NULL)
968 goto error;
Serhiy Storchaka8688aca2015-12-27 12:38:48 +0200969 Py_SETREF(self->decoder, incrementalDecoder);
Antoine Pitrou19690592009-06-12 20:14:08 +0000970 }
971 }
972
973 /* Build the encoder object */
974 res = PyObject_CallMethod(buffer, "writable", NULL);
975 if (res == NULL)
976 goto error;
977 r = PyObject_IsTrue(res);
978 Py_DECREF(res);
979 if (r == -1)
980 goto error;
981 if (r == 1) {
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300982 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
983 errors);
Antoine Pitrou19690592009-06-12 20:14:08 +0000984 if (self->encoder == NULL)
985 goto error;
986 /* Get the normalized named of the codec */
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300987 res = PyObject_GetAttrString(codec_info, "name");
Antoine Pitrou19690592009-06-12 20:14:08 +0000988 if (res == NULL) {
989 if (PyErr_ExceptionMatches(PyExc_AttributeError))
990 PyErr_Clear();
991 else
992 goto error;
993 }
994 else if (PyString_Check(res)) {
995 encodefuncentry *e = encodefuncs;
996 while (e->name != NULL) {
997 if (!strcmp(PyString_AS_STRING(res), e->name)) {
998 self->encodefunc = e->encodefunc;
999 break;
1000 }
1001 e++;
1002 }
1003 }
1004 Py_XDECREF(res);
1005 }
1006
Serhiy Storchakac7797dc2015-05-31 20:21:00 +03001007 /* Finished sorting out the codec details */
1008 Py_DECREF(codec_info);
1009
Antoine Pitrou19690592009-06-12 20:14:08 +00001010 self->buffer = buffer;
1011 Py_INCREF(buffer);
1012
1013 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1014 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1015 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1016 raw = PyObject_GetAttrString(buffer, "raw");
1017 /* Cache the raw FileIO object to speed up 'closed' checks */
1018 if (raw == NULL) {
1019 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 PyErr_Clear();
1021 else
1022 goto error;
1023 }
1024 else if (Py_TYPE(raw) == &PyFileIO_Type)
1025 self->raw = raw;
1026 else
1027 Py_DECREF(raw);
1028 }
1029
1030 res = PyObject_CallMethod(buffer, "seekable", NULL);
1031 if (res == NULL)
1032 goto error;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001033 r = PyObject_IsTrue(res);
Antoine Pitrou19690592009-06-12 20:14:08 +00001034 Py_DECREF(res);
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001035 if (r < 0)
1036 goto error;
1037 self->seekable = self->telling = r;
Antoine Pitrou19690592009-06-12 20:14:08 +00001038
1039 self->encoding_start_of_stream = 0;
1040 if (self->seekable && self->encoder) {
1041 PyObject *cookieObj;
1042 int cmp;
1043
1044 self->encoding_start_of_stream = 1;
1045
1046 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1047 if (cookieObj == NULL)
1048 goto error;
1049
1050 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1051 Py_DECREF(cookieObj);
1052 if (cmp < 0) {
1053 goto error;
1054 }
1055
1056 if (cmp == 0) {
1057 self->encoding_start_of_stream = 0;
1058 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1059 _PyIO_zero, NULL);
1060 if (res == NULL)
1061 goto error;
1062 Py_DECREF(res);
1063 }
1064 }
1065
1066 self->ok = 1;
1067 return 0;
1068
1069 error:
Serhiy Storchakac7797dc2015-05-31 20:21:00 +03001070 Py_XDECREF(codec_info);
Antoine Pitrou19690592009-06-12 20:14:08 +00001071 return -1;
1072}
1073
1074static int
1075_textiowrapper_clear(textio *self)
1076{
1077 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1078 return -1;
1079 self->ok = 0;
1080 Py_CLEAR(self->buffer);
1081 Py_CLEAR(self->encoding);
1082 Py_CLEAR(self->encoder);
1083 Py_CLEAR(self->decoder);
1084 Py_CLEAR(self->readnl);
1085 Py_CLEAR(self->decoded_chars);
1086 Py_CLEAR(self->pending_bytes);
1087 Py_CLEAR(self->snapshot);
1088 Py_CLEAR(self->errors);
1089 Py_CLEAR(self->raw);
1090 return 0;
1091}
1092
1093static void
1094textiowrapper_dealloc(textio *self)
1095{
1096 if (_textiowrapper_clear(self) < 0)
1097 return;
1098 _PyObject_GC_UNTRACK(self);
1099 if (self->weakreflist != NULL)
1100 PyObject_ClearWeakRefs((PyObject *)self);
1101 Py_CLEAR(self->dict);
1102 Py_TYPE(self)->tp_free((PyObject *)self);
1103}
1104
1105static int
1106textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1107{
1108 Py_VISIT(self->buffer);
1109 Py_VISIT(self->encoding);
1110 Py_VISIT(self->encoder);
1111 Py_VISIT(self->decoder);
1112 Py_VISIT(self->readnl);
1113 Py_VISIT(self->decoded_chars);
1114 Py_VISIT(self->pending_bytes);
1115 Py_VISIT(self->snapshot);
1116 Py_VISIT(self->errors);
1117 Py_VISIT(self->raw);
1118
1119 Py_VISIT(self->dict);
1120 return 0;
1121}
1122
1123static int
1124textiowrapper_clear(textio *self)
1125{
1126 if (_textiowrapper_clear(self) < 0)
1127 return -1;
1128 Py_CLEAR(self->dict);
1129 return 0;
1130}
1131
1132static PyObject *
1133textiowrapper_closed_get(textio *self, void *context);
1134
1135/* This macro takes some shortcuts to make the common case faster. */
1136#define CHECK_CLOSED(self) \
1137 do { \
1138 int r; \
1139 PyObject *_res; \
1140 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1141 if (self->raw != NULL) \
1142 r = _PyFileIO_closed(self->raw); \
1143 else { \
1144 _res = textiowrapper_closed_get(self, NULL); \
1145 if (_res == NULL) \
1146 return NULL; \
1147 r = PyObject_IsTrue(_res); \
1148 Py_DECREF(_res); \
1149 if (r < 0) \
1150 return NULL; \
1151 } \
1152 if (r > 0) { \
1153 PyErr_SetString(PyExc_ValueError, \
1154 "I/O operation on closed file."); \
1155 return NULL; \
1156 } \
1157 } \
1158 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1159 return NULL; \
1160 } while (0)
1161
1162#define CHECK_INITIALIZED(self) \
1163 if (self->ok <= 0) { \
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001164 PyErr_SetString(PyExc_ValueError, \
1165 "I/O operation on uninitialized object"); \
Antoine Pitrou19690592009-06-12 20:14:08 +00001166 return NULL; \
1167 }
1168
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001169#define CHECK_ATTACHED(self) \
1170 CHECK_INITIALIZED(self); \
1171 if (self->detached) { \
1172 PyErr_SetString(PyExc_ValueError, \
1173 "underlying buffer has been detached"); \
1174 return NULL; \
1175 }
1176
1177#define CHECK_ATTACHED_INT(self) \
Antoine Pitrou19690592009-06-12 20:14:08 +00001178 if (self->ok <= 0) { \
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001179 PyErr_SetString(PyExc_ValueError, \
1180 "I/O operation on uninitialized object"); \
1181 return -1; \
1182 } else if (self->detached) { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "underlying buffer has been detached"); \
Antoine Pitrou19690592009-06-12 20:14:08 +00001185 return -1; \
1186 }
1187
1188
1189static PyObject *
1190textiowrapper_detach(textio *self)
1191{
1192 PyObject *buffer, *res;
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001193 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00001194 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1195 if (res == NULL)
1196 return NULL;
1197 Py_DECREF(res);
1198 buffer = self->buffer;
1199 self->buffer = NULL;
1200 self->detached = 1;
Antoine Pitrou19690592009-06-12 20:14:08 +00001201 return buffer;
1202}
1203
1204Py_LOCAL_INLINE(const Py_UNICODE *)
1205findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1206{
1207 /* like wcschr, but doesn't stop at NULL characters */
1208 while (size-- > 0) {
1209 if (*s == ch)
1210 return s;
1211 s++;
1212 }
1213 return NULL;
1214}
1215
1216/* Flush the internal write buffer. This doesn't explicitly flush the
1217 underlying buffered object, though. */
1218static int
1219_textiowrapper_writeflush(textio *self)
1220{
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001221 PyObject *pending, *b, *ret;
Antoine Pitrou19690592009-06-12 20:14:08 +00001222
1223 if (self->pending_bytes == NULL)
1224 return 0;
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001225
1226 pending = self->pending_bytes;
1227 Py_INCREF(pending);
1228 self->pending_bytes_count = 0;
1229 Py_CLEAR(self->pending_bytes);
1230
1231 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1232 Py_DECREF(pending);
Antoine Pitrou19690592009-06-12 20:14:08 +00001233 if (b == NULL)
1234 return -1;
Gregory P. Smitha998ad02013-02-01 13:02:59 -08001235 ret = NULL;
1236 do {
1237 ret = PyObject_CallMethodObjArgs(self->buffer,
1238 _PyIO_str_write, b, NULL);
1239 } while (ret == NULL && _PyIO_trap_eintr());
Antoine Pitrou19690592009-06-12 20:14:08 +00001240 Py_DECREF(b);
1241 if (ret == NULL)
1242 return -1;
1243 Py_DECREF(ret);
Antoine Pitrou19690592009-06-12 20:14:08 +00001244 return 0;
1245}
1246
1247static PyObject *
1248textiowrapper_write(textio *self, PyObject *args)
1249{
1250 PyObject *ret;
1251 PyObject *text; /* owned reference */
1252 PyObject *b;
1253 Py_ssize_t textlen;
1254 int haslf = 0;
1255 int needflush = 0;
1256
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001257 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00001258
1259 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1260 return NULL;
1261 }
1262
1263 CHECK_CLOSED(self);
1264
1265 if (self->encoder == NULL) {
1266 PyErr_SetString(PyExc_IOError, "not writable");
1267 return NULL;
1268 }
1269
1270 Py_INCREF(text);
1271
1272 textlen = PyUnicode_GetSize(text);
1273
1274 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1275 if (findchar(PyUnicode_AS_UNICODE(text),
1276 PyUnicode_GET_SIZE(text), '\n'))
1277 haslf = 1;
1278
1279 if (haslf && self->writetranslate && self->writenl != NULL) {
1280 PyObject *newtext = PyObject_CallMethod(
1281 text, "replace", "ss", "\n", self->writenl);
1282 Py_DECREF(text);
1283 if (newtext == NULL)
1284 return NULL;
1285 text = newtext;
1286 }
1287
1288 if (self->line_buffering &&
1289 (haslf ||
1290 findchar(PyUnicode_AS_UNICODE(text),
1291 PyUnicode_GET_SIZE(text), '\r')))
1292 needflush = 1;
1293
1294 /* XXX What if we were just reading? */
1295 if (self->encodefunc != NULL) {
1296 b = (*self->encodefunc)((PyObject *) self, text);
1297 self->encoding_start_of_stream = 0;
1298 }
1299 else
1300 b = PyObject_CallMethodObjArgs(self->encoder,
1301 _PyIO_str_encode, text, NULL);
1302 Py_DECREF(text);
1303 if (b == NULL)
1304 return NULL;
1305
1306 if (self->pending_bytes == NULL) {
1307 self->pending_bytes = PyList_New(0);
1308 if (self->pending_bytes == NULL) {
1309 Py_DECREF(b);
1310 return NULL;
1311 }
1312 self->pending_bytes_count = 0;
1313 }
1314 if (PyList_Append(self->pending_bytes, b) < 0) {
1315 Py_DECREF(b);
1316 return NULL;
1317 }
1318 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1319 Py_DECREF(b);
1320 if (self->pending_bytes_count > self->chunk_size || needflush) {
1321 if (_textiowrapper_writeflush(self) < 0)
1322 return NULL;
1323 }
1324
1325 if (needflush) {
1326 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1327 if (ret == NULL)
1328 return NULL;
1329 Py_DECREF(ret);
1330 }
1331
1332 Py_CLEAR(self->snapshot);
1333
1334 if (self->decoder) {
1335 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1336 if (ret == NULL)
1337 return NULL;
1338 Py_DECREF(ret);
1339 }
1340
1341 return PyLong_FromSsize_t(textlen);
1342}
1343
1344/* Steal a reference to chars and store it in the decoded_char buffer;
1345 */
1346static void
1347textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1348{
Serhiy Storchaka8688aca2015-12-27 12:38:48 +02001349 Py_SETREF(self->decoded_chars, chars);
Antoine Pitrou19690592009-06-12 20:14:08 +00001350 self->decoded_chars_used = 0;
1351}
1352
1353static PyObject *
1354textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1355{
1356 PyObject *chars;
1357 Py_ssize_t avail;
1358
1359 if (self->decoded_chars == NULL)
1360 return PyUnicode_FromStringAndSize(NULL, 0);
1361
1362 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1363 - self->decoded_chars_used);
1364
1365 assert(avail >= 0);
1366
1367 if (n < 0 || n > avail)
1368 n = avail;
1369
1370 if (self->decoded_chars_used > 0 || n < avail) {
1371 chars = PyUnicode_FromUnicode(
1372 PyUnicode_AS_UNICODE(self->decoded_chars)
1373 + self->decoded_chars_used, n);
1374 if (chars == NULL)
1375 return NULL;
1376 }
1377 else {
1378 chars = self->decoded_chars;
1379 Py_INCREF(chars);
1380 }
1381
1382 self->decoded_chars_used += n;
1383 return chars;
1384}
1385
1386/* Read and decode the next chunk of data from the BufferedReader.
1387 */
1388static int
1389textiowrapper_read_chunk(textio *self)
1390{
1391 PyObject *dec_buffer = NULL;
1392 PyObject *dec_flags = NULL;
1393 PyObject *input_chunk = NULL;
1394 PyObject *decoded_chars, *chunk_size;
1395 int eof;
1396
1397 /* The return value is True unless EOF was reached. The decoded string is
1398 * placed in self._decoded_chars (replacing its previous value). The
1399 * entire input chunk is sent to the decoder, though some of it may remain
1400 * buffered in the decoder, yet to be converted.
1401 */
1402
1403 if (self->decoder == NULL) {
1404 PyErr_SetString(PyExc_IOError, "not readable");
1405 return -1;
1406 }
1407
1408 if (self->telling) {
1409 /* To prepare for tell(), we need to snapshot a point in the file
1410 * where the decoder's input buffer is empty.
1411 */
1412
1413 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1414 _PyIO_str_getstate, NULL);
1415 if (state == NULL)
1416 return -1;
1417 /* Given this, we know there was a valid snapshot point
1418 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1419 */
1420 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1421 Py_DECREF(state);
1422 return -1;
1423 }
1424 Py_INCREF(dec_buffer);
1425 Py_INCREF(dec_flags);
1426 Py_DECREF(state);
1427 }
1428
1429 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1430 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1431 if (chunk_size == NULL)
1432 goto fail;
1433 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1434 _PyIO_str_read1, chunk_size, NULL);
1435 Py_DECREF(chunk_size);
1436 if (input_chunk == NULL)
1437 goto fail;
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02001438 if (!PyBytes_Check(input_chunk)) {
1439 PyErr_Format(PyExc_TypeError,
1440 "underlying read1() should have returned a bytes object, "
1441 "not '%.200s'", Py_TYPE(input_chunk)->tp_name);
1442 goto fail;
1443 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001444
1445 eof = (PyBytes_Size(input_chunk) == 0);
1446
1447 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1448 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1449 self->decoder, input_chunk, eof);
1450 }
1451 else {
1452 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1453 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1454 }
1455
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02001456 if (check_decoded(decoded_chars) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +00001457 goto fail;
1458 textiowrapper_set_decoded_chars(self, decoded_chars);
1459 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1460 eof = 0;
1461
1462 if (self->telling) {
1463 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1464 * next input to be decoded is dec_buffer + input_chunk.
1465 */
1466 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1467 if (next_input == NULL)
1468 goto fail;
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02001469 if (!PyBytes_Check(next_input)) {
1470 PyErr_Format(PyExc_TypeError,
1471 "decoder getstate() should have returned a bytes "
1472 "object, not '%.200s'",
1473 Py_TYPE(next_input)->tp_name);
1474 Py_DECREF(next_input);
1475 goto fail;
1476 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001477 Py_DECREF(dec_buffer);
Serhiy Storchaka8688aca2015-12-27 12:38:48 +02001478 Py_SETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
Antoine Pitrou19690592009-06-12 20:14:08 +00001479 }
1480 Py_DECREF(input_chunk);
1481
1482 return (eof == 0);
1483
1484 fail:
1485 Py_XDECREF(dec_buffer);
1486 Py_XDECREF(dec_flags);
1487 Py_XDECREF(input_chunk);
1488 return -1;
1489}
1490
1491static PyObject *
1492textiowrapper_read(textio *self, PyObject *args)
1493{
1494 Py_ssize_t n = -1;
1495 PyObject *result = NULL, *chunks = NULL;
1496
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001497 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00001498
Benjamin Petersonddd392c2009-12-13 19:19:07 +00001499 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Antoine Pitrou19690592009-06-12 20:14:08 +00001500 return NULL;
1501
1502 CHECK_CLOSED(self);
1503
1504 if (self->decoder == NULL) {
1505 PyErr_SetString(PyExc_IOError, "not readable");
1506 return NULL;
1507 }
1508
1509 if (_textiowrapper_writeflush(self) < 0)
1510 return NULL;
1511
1512 if (n < 0) {
1513 /* Read everything */
1514 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1515 PyObject *decoded, *final;
1516 if (bytes == NULL)
1517 goto fail;
1518 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1519 bytes, Py_True, NULL);
1520 Py_DECREF(bytes);
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02001521 if (check_decoded(decoded) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +00001522 goto fail;
1523
1524 result = textiowrapper_get_decoded_chars(self, -1);
1525
1526 if (result == NULL) {
1527 Py_DECREF(decoded);
1528 return NULL;
1529 }
1530
1531 final = PyUnicode_Concat(result, decoded);
1532 Py_DECREF(result);
1533 Py_DECREF(decoded);
1534 if (final == NULL)
1535 goto fail;
1536
1537 Py_CLEAR(self->snapshot);
1538 return final;
1539 }
1540 else {
1541 int res = 1;
1542 Py_ssize_t remaining = n;
1543
1544 result = textiowrapper_get_decoded_chars(self, n);
1545 if (result == NULL)
1546 goto fail;
1547 remaining -= PyUnicode_GET_SIZE(result);
1548
1549 /* Keep reading chunks until we have n characters to return */
1550 while (remaining > 0) {
1551 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001552 if (res < 0) {
1553 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1554 when EINTR occurs so we needn't do it ourselves. */
1555 if (_PyIO_trap_eintr()) {
1556 continue;
1557 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001558 goto fail;
Gregory P. Smith99716162012-10-12 13:02:06 -07001559 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001560 if (res == 0) /* EOF */
1561 break;
1562 if (chunks == NULL) {
1563 chunks = PyList_New(0);
1564 if (chunks == NULL)
1565 goto fail;
1566 }
1567 if (PyList_Append(chunks, result) < 0)
1568 goto fail;
1569 Py_DECREF(result);
1570 result = textiowrapper_get_decoded_chars(self, remaining);
1571 if (result == NULL)
1572 goto fail;
1573 remaining -= PyUnicode_GET_SIZE(result);
1574 }
1575 if (chunks != NULL) {
1576 if (result != NULL && PyList_Append(chunks, result) < 0)
1577 goto fail;
Serhiy Storchaka8688aca2015-12-27 12:38:48 +02001578 Py_SETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Antoine Pitrou19690592009-06-12 20:14:08 +00001579 if (result == NULL)
1580 goto fail;
1581 Py_CLEAR(chunks);
1582 }
1583 return result;
1584 }
1585 fail:
1586 Py_XDECREF(result);
1587 Py_XDECREF(chunks);
1588 return NULL;
1589}
1590
1591
1592/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1593 that is to the NUL character. Otherwise the function will produce
1594 incorrect results. */
1595static Py_UNICODE *
1596find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1597{
1598 Py_UNICODE *s = start;
1599 for (;;) {
1600 while (*s > ch)
1601 s++;
1602 if (*s == ch)
1603 return s;
1604 if (s == end)
1605 return NULL;
1606 s++;
1607 }
1608}
1609
1610Py_ssize_t
1611_PyIO_find_line_ending(
1612 int translated, int universal, PyObject *readnl,
1613 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1614{
1615 Py_ssize_t len = end - start;
1616
1617 if (translated) {
1618 /* Newlines are already translated, only search for \n */
1619 Py_UNICODE *pos = find_control_char(start, end, '\n');
1620 if (pos != NULL)
1621 return pos - start + 1;
1622 else {
1623 *consumed = len;
1624 return -1;
1625 }
1626 }
1627 else if (universal) {
1628 /* Universal newline search. Find any of \r, \r\n, \n
1629 * The decoder ensures that \r\n are not split in two pieces
1630 */
1631 Py_UNICODE *s = start;
1632 for (;;) {
1633 Py_UNICODE ch;
1634 /* Fast path for non-control chars. The loop always ends
1635 since the Py_UNICODE storage is NUL-terminated. */
1636 while (*s > '\r')
1637 s++;
1638 if (s >= end) {
1639 *consumed = len;
1640 return -1;
1641 }
1642 ch = *s++;
1643 if (ch == '\n')
1644 return s - start;
1645 if (ch == '\r') {
1646 if (*s == '\n')
1647 return s - start + 1;
1648 else
1649 return s - start;
1650 }
1651 }
1652 }
1653 else {
1654 /* Non-universal mode. */
1655 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1656 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1657 if (readnl_len == 1) {
1658 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1659 if (pos != NULL)
1660 return pos - start + 1;
1661 *consumed = len;
1662 return -1;
1663 }
1664 else {
1665 Py_UNICODE *s = start;
1666 Py_UNICODE *e = end - readnl_len + 1;
1667 Py_UNICODE *pos;
1668 if (e < s)
1669 e = s;
1670 while (s < e) {
1671 Py_ssize_t i;
1672 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1673 if (pos == NULL || pos >= e)
1674 break;
1675 for (i = 1; i < readnl_len; i++) {
1676 if (pos[i] != nl[i])
1677 break;
1678 }
1679 if (i == readnl_len)
1680 return pos - start + readnl_len;
1681 s = pos + 1;
1682 }
1683 pos = find_control_char(e, end, nl[0]);
1684 if (pos == NULL)
1685 *consumed = len;
1686 else
1687 *consumed = pos - start;
1688 return -1;
1689 }
1690 }
1691}
1692
1693static PyObject *
1694_textiowrapper_readline(textio *self, Py_ssize_t limit)
1695{
1696 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1697 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1698 int res;
1699
1700 CHECK_CLOSED(self);
1701
1702 if (_textiowrapper_writeflush(self) < 0)
1703 return NULL;
1704
1705 chunked = 0;
1706
1707 while (1) {
1708 Py_UNICODE *ptr;
1709 Py_ssize_t line_len;
1710 Py_ssize_t consumed = 0;
1711
1712 /* First, get some data if necessary */
1713 res = 1;
1714 while (!self->decoded_chars ||
1715 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1716 res = textiowrapper_read_chunk(self);
Gregory P. Smith99716162012-10-12 13:02:06 -07001717 if (res < 0) {
1718 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1719 when EINTR occurs so we needn't do it ourselves. */
1720 if (_PyIO_trap_eintr()) {
1721 continue;
1722 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001723 goto error;
Gregory P. Smith99716162012-10-12 13:02:06 -07001724 }
Antoine Pitrou19690592009-06-12 20:14:08 +00001725 if (res == 0)
1726 break;
1727 }
1728 if (res == 0) {
1729 /* end of file */
1730 textiowrapper_set_decoded_chars(self, NULL);
1731 Py_CLEAR(self->snapshot);
1732 start = endpos = offset_to_buffer = 0;
1733 break;
1734 }
1735
1736 if (remaining == NULL) {
1737 line = self->decoded_chars;
1738 start = self->decoded_chars_used;
1739 offset_to_buffer = 0;
1740 Py_INCREF(line);
1741 }
1742 else {
1743 assert(self->decoded_chars_used == 0);
1744 line = PyUnicode_Concat(remaining, self->decoded_chars);
1745 start = 0;
1746 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1747 Py_CLEAR(remaining);
1748 if (line == NULL)
1749 goto error;
1750 }
1751
1752 ptr = PyUnicode_AS_UNICODE(line);
1753 line_len = PyUnicode_GET_SIZE(line);
1754
1755 endpos = _PyIO_find_line_ending(
1756 self->readtranslate, self->readuniversal, self->readnl,
1757 ptr + start, ptr + line_len, &consumed);
1758 if (endpos >= 0) {
1759 endpos += start;
1760 if (limit >= 0 && (endpos - start) + chunked >= limit)
1761 endpos = start + limit - chunked;
1762 break;
1763 }
1764
1765 /* We can put aside up to `endpos` */
1766 endpos = consumed + start;
1767 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1768 /* Didn't find line ending, but reached length limit */
1769 endpos = start + limit - chunked;
1770 break;
1771 }
1772
1773 if (endpos > start) {
1774 /* No line ending seen yet - put aside current data */
1775 PyObject *s;
1776 if (chunks == NULL) {
1777 chunks = PyList_New(0);
1778 if (chunks == NULL)
1779 goto error;
1780 }
1781 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1782 if (s == NULL)
1783 goto error;
1784 if (PyList_Append(chunks, s) < 0) {
1785 Py_DECREF(s);
1786 goto error;
1787 }
1788 chunked += PyUnicode_GET_SIZE(s);
1789 Py_DECREF(s);
1790 }
1791 /* There may be some remaining bytes we'll have to prepend to the
1792 next chunk of data */
1793 if (endpos < line_len) {
1794 remaining = PyUnicode_FromUnicode(
1795 ptr + endpos, line_len - endpos);
1796 if (remaining == NULL)
1797 goto error;
1798 }
1799 Py_CLEAR(line);
1800 /* We have consumed the buffer */
1801 textiowrapper_set_decoded_chars(self, NULL);
1802 }
1803
1804 if (line != NULL) {
1805 /* Our line ends in the current buffer */
1806 self->decoded_chars_used = endpos - offset_to_buffer;
1807 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1808 if (start == 0 && Py_REFCNT(line) == 1) {
1809 if (PyUnicode_Resize(&line, endpos) < 0)
1810 goto error;
1811 }
1812 else {
1813 PyObject *s = PyUnicode_FromUnicode(
1814 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1815 Py_CLEAR(line);
1816 if (s == NULL)
1817 goto error;
1818 line = s;
1819 }
1820 }
1821 }
1822 if (remaining != NULL) {
1823 if (chunks == NULL) {
1824 chunks = PyList_New(0);
1825 if (chunks == NULL)
1826 goto error;
1827 }
1828 if (PyList_Append(chunks, remaining) < 0)
1829 goto error;
1830 Py_CLEAR(remaining);
1831 }
1832 if (chunks != NULL) {
1833 if (line != NULL && PyList_Append(chunks, line) < 0)
1834 goto error;
Serhiy Storchaka8688aca2015-12-27 12:38:48 +02001835 Py_SETREF(line, PyUnicode_Join(_PyIO_empty_str, chunks));
Antoine Pitrou19690592009-06-12 20:14:08 +00001836 if (line == NULL)
1837 goto error;
1838 Py_DECREF(chunks);
1839 }
1840 if (line == NULL)
1841 line = PyUnicode_FromStringAndSize(NULL, 0);
1842
1843 return line;
1844
1845 error:
1846 Py_XDECREF(chunks);
1847 Py_XDECREF(remaining);
1848 Py_XDECREF(line);
1849 return NULL;
1850}
1851
1852static PyObject *
1853textiowrapper_readline(textio *self, PyObject *args)
1854{
1855 PyObject *limitobj = NULL;
1856 Py_ssize_t limit = -1;
1857
Benjamin Peterson53ae6142014-12-21 20:51:50 -06001858 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00001859 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1860 return NULL;
1861 }
1862 if (limitobj) {
1863 if (!PyNumber_Check(limitobj)) {
1864 PyErr_Format(PyExc_TypeError,
1865 "integer argument expected, got '%.200s'",
1866 Py_TYPE(limitobj)->tp_name);
1867 return NULL;
1868 }
1869 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1870 if (limit == -1 && PyErr_Occurred())
1871 return NULL;
1872 }
1873 return _textiowrapper_readline(self, limit);
1874}
1875
1876/* Seek and Tell */
1877
1878typedef struct {
1879 Py_off_t start_pos;
1880 int dec_flags;
1881 int bytes_to_feed;
1882 int chars_to_skip;
1883 char need_eof;
1884} cookie_type;
1885
1886/*
1887 To speed up cookie packing/unpacking, we store the fields in a temporary
1888 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1889 The following macros define at which offsets in the intermediary byte
1890 string the various CookieStruct fields will be stored.
1891 */
1892
1893#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1894
1895#if defined(WORDS_BIGENDIAN)
1896
1897# define IS_LITTLE_ENDIAN 0
1898
1899/* We want the least significant byte of start_pos to also be the least
1900 significant byte of the cookie, which means that in big-endian mode we
1901 must copy the fields in reverse order. */
1902
1903# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1904# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1905# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1906# define OFF_CHARS_TO_SKIP (sizeof(char))
1907# define OFF_NEED_EOF 0
1908
1909#else
1910
1911# define IS_LITTLE_ENDIAN 1
1912
1913/* Little-endian mode: the least significant byte of start_pos will
1914 naturally end up the least significant byte of the cookie. */
1915
1916# define OFF_START_POS 0
1917# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1918# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1919# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1920# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1921
1922#endif
1923
1924static int
1925textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1926{
1927 unsigned char buffer[COOKIE_BUF_LEN];
1928 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1929 if (cookieLong == NULL)
1930 return -1;
1931
1932 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1933 IS_LITTLE_ENDIAN, 0) < 0) {
1934 Py_DECREF(cookieLong);
1935 return -1;
1936 }
1937 Py_DECREF(cookieLong);
1938
1939 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1940 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1941 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1942 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1943 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1944
1945 return 0;
1946}
1947
1948static PyObject *
1949textiowrapper_build_cookie(cookie_type *cookie)
1950{
1951 unsigned char buffer[COOKIE_BUF_LEN];
1952
1953 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1954 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1955 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1956 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1957 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1958
1959 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1960}
1961#undef IS_LITTLE_ENDIAN
1962
1963static int
1964_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1965{
1966 PyObject *res;
1967 /* When seeking to the start of the stream, we call decoder.reset()
1968 rather than decoder.getstate().
1969 This is for a few decoders such as utf-16 for which the state value
1970 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1971 utf-16, that we are expecting a BOM).
1972 */
1973 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1974 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1975 else
1976 res = PyObject_CallMethod(self->decoder, "setstate",
1977 "((si))", "", cookie->dec_flags);
1978 if (res == NULL)
1979 return -1;
1980 Py_DECREF(res);
1981 return 0;
1982}
1983
1984static int
1985_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1986{
1987 PyObject *res;
1988 /* Same as _textiowrapper_decoder_setstate() above. */
1989 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1990 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1991 self->encoding_start_of_stream = 1;
1992 }
1993 else {
1994 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1995 _PyIO_zero, NULL);
1996 self->encoding_start_of_stream = 0;
1997 }
1998 if (res == NULL)
1999 return -1;
2000 Py_DECREF(res);
2001 return 0;
2002}
2003
2004static PyObject *
2005textiowrapper_seek(textio *self, PyObject *args)
2006{
2007 PyObject *cookieObj, *posobj;
2008 cookie_type cookie;
2009 int whence = 0;
2010 PyObject *res;
2011 int cmp;
2012
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002013 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002014
2015 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2016 return NULL;
2017 CHECK_CLOSED(self);
2018
2019 Py_INCREF(cookieObj);
2020
2021 if (!self->seekable) {
2022 PyErr_SetString(PyExc_IOError,
2023 "underlying stream is not seekable");
2024 goto fail;
2025 }
2026
2027 if (whence == 1) {
2028 /* seek relative to current position */
2029 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2030 if (cmp < 0)
2031 goto fail;
2032
2033 if (cmp == 0) {
2034 PyErr_SetString(PyExc_IOError,
2035 "can't do nonzero cur-relative seeks");
2036 goto fail;
2037 }
2038
2039 /* Seeking to the current position should attempt to
2040 * sync the underlying buffer with the current position.
2041 */
2042 Py_DECREF(cookieObj);
2043 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2044 if (cookieObj == NULL)
2045 goto fail;
2046 }
2047 else if (whence == 2) {
2048 /* seek relative to end of file */
2049
2050 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2051 if (cmp < 0)
2052 goto fail;
2053
2054 if (cmp == 0) {
2055 PyErr_SetString(PyExc_IOError,
2056 "can't do nonzero end-relative seeks");
2057 goto fail;
2058 }
2059
2060 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2061 if (res == NULL)
2062 goto fail;
2063 Py_DECREF(res);
2064
2065 textiowrapper_set_decoded_chars(self, NULL);
2066 Py_CLEAR(self->snapshot);
2067 if (self->decoder) {
2068 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2069 if (res == NULL)
2070 goto fail;
2071 Py_DECREF(res);
2072 }
2073
2074 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2075 Py_XDECREF(cookieObj);
2076 return res;
2077 }
2078 else if (whence != 0) {
2079 PyErr_Format(PyExc_ValueError,
2080 "invalid whence (%d, should be 0, 1 or 2)", whence);
2081 goto fail;
2082 }
2083
2084 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2085 if (cmp < 0)
2086 goto fail;
2087
2088 if (cmp == 1) {
2089 PyObject *repr = PyObject_Repr(cookieObj);
2090 if (repr != NULL) {
2091 PyErr_Format(PyExc_ValueError,
2092 "negative seek position %s",
2093 PyString_AS_STRING(repr));
2094 Py_DECREF(repr);
2095 }
2096 goto fail;
2097 }
2098
2099 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2100 if (res == NULL)
2101 goto fail;
2102 Py_DECREF(res);
2103
2104 /* The strategy of seek() is to go back to the safe start point
2105 * and replay the effect of read(chars_to_skip) from there.
2106 */
2107 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2108 goto fail;
2109
2110 /* Seek back to the safe start point. */
2111 posobj = PyLong_FromOff_t(cookie.start_pos);
2112 if (posobj == NULL)
2113 goto fail;
2114 res = PyObject_CallMethodObjArgs(self->buffer,
2115 _PyIO_str_seek, posobj, NULL);
2116 Py_DECREF(posobj);
2117 if (res == NULL)
2118 goto fail;
2119 Py_DECREF(res);
2120
2121 textiowrapper_set_decoded_chars(self, NULL);
2122 Py_CLEAR(self->snapshot);
2123
2124 /* Restore the decoder to its state from the safe start point. */
2125 if (self->decoder) {
2126 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2127 goto fail;
2128 }
2129
2130 if (cookie.chars_to_skip) {
2131 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2132 PyObject *input_chunk = PyObject_CallMethod(
2133 self->buffer, "read", "i", cookie.bytes_to_feed);
2134 PyObject *decoded;
2135
2136 if (input_chunk == NULL)
2137 goto fail;
2138
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02002139 if (!PyBytes_Check(input_chunk)) {
2140 PyErr_Format(PyExc_TypeError,
2141 "underlying read() should have returned a bytes "
2142 "object, not '%.200s'",
2143 Py_TYPE(input_chunk)->tp_name);
2144 Py_DECREF(input_chunk);
2145 goto fail;
2146 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002147
2148 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2149 if (self->snapshot == NULL) {
2150 Py_DECREF(input_chunk);
2151 goto fail;
2152 }
2153
2154 decoded = PyObject_CallMethod(self->decoder, "decode",
2155 "Oi", input_chunk, (int)cookie.need_eof);
2156
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02002157 if (check_decoded(decoded) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +00002158 goto fail;
2159
2160 textiowrapper_set_decoded_chars(self, decoded);
2161
2162 /* Skip chars_to_skip of the decoded characters. */
2163 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2164 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2165 goto fail;
2166 }
2167 self->decoded_chars_used = cookie.chars_to_skip;
2168 }
2169 else {
2170 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2171 if (self->snapshot == NULL)
2172 goto fail;
2173 }
2174
2175 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2176 if (self->encoder) {
2177 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2178 goto fail;
2179 }
2180 return cookieObj;
2181 fail:
2182 Py_XDECREF(cookieObj);
2183 return NULL;
2184
2185}
2186
2187static PyObject *
2188textiowrapper_tell(textio *self, PyObject *args)
2189{
2190 PyObject *res;
2191 PyObject *posobj = NULL;
2192 cookie_type cookie = {0,0,0,0,0};
2193 PyObject *next_input;
2194 Py_ssize_t chars_to_skip, chars_decoded;
2195 PyObject *saved_state = NULL;
2196 char *input, *input_end;
2197
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002198 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002199 CHECK_CLOSED(self);
2200
2201 if (!self->seekable) {
2202 PyErr_SetString(PyExc_IOError,
2203 "underlying stream is not seekable");
2204 goto fail;
2205 }
2206 if (!self->telling) {
2207 PyErr_SetString(PyExc_IOError,
2208 "telling position disabled by next() call");
2209 goto fail;
2210 }
2211
2212 if (_textiowrapper_writeflush(self) < 0)
2213 return NULL;
2214 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2215 if (res == NULL)
2216 goto fail;
2217 Py_DECREF(res);
2218
2219 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2220 if (posobj == NULL)
2221 goto fail;
2222
2223 if (self->decoder == NULL || self->snapshot == NULL) {
2224 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2225 return posobj;
2226 }
2227
2228#if defined(HAVE_LARGEFILE_SUPPORT)
2229 cookie.start_pos = PyLong_AsLongLong(posobj);
2230#else
2231 cookie.start_pos = PyLong_AsLong(posobj);
2232#endif
2233 if (PyErr_Occurred())
2234 goto fail;
2235
2236 /* Skip backward to the snapshot point (see _read_chunk). */
2237 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2238 goto fail;
2239
2240 assert (PyBytes_Check(next_input));
2241
2242 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2243
2244 /* How many decoded characters have been used up since the snapshot? */
2245 if (self->decoded_chars_used == 0) {
2246 /* We haven't moved from the snapshot point. */
2247 Py_DECREF(posobj);
2248 return textiowrapper_build_cookie(&cookie);
2249 }
2250
2251 chars_to_skip = self->decoded_chars_used;
2252
2253 /* Starting from the snapshot position, we will walk the decoder
2254 * forward until it gives us enough decoded characters.
2255 */
2256 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2257 _PyIO_str_getstate, NULL);
2258 if (saved_state == NULL)
2259 goto fail;
2260
2261 /* Note our initial start point. */
2262 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2263 goto fail;
2264
2265 /* Feed the decoder one byte at a time. As we go, note the
2266 * nearest "safe start point" before the current location
2267 * (a point where the decoder has nothing buffered, so seek()
2268 * can safely start from there and advance to this location).
2269 */
2270 chars_decoded = 0;
2271 input = PyBytes_AS_STRING(next_input);
2272 input_end = input + PyBytes_GET_SIZE(next_input);
2273 while (input < input_end) {
2274 PyObject *state;
2275 char *dec_buffer;
2276 Py_ssize_t dec_buffer_len;
2277 int dec_flags;
2278
2279 PyObject *decoded = PyObject_CallMethod(
Serhiy Storchakaa9885e92013-08-20 20:08:53 +03002280 self->decoder, "decode", "s#", input, (Py_ssize_t)1);
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02002281 if (check_decoded(decoded) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +00002282 goto fail;
Antoine Pitrou19690592009-06-12 20:14:08 +00002283 chars_decoded += PyUnicode_GET_SIZE(decoded);
2284 Py_DECREF(decoded);
2285
2286 cookie.bytes_to_feed += 1;
2287
2288 state = PyObject_CallMethodObjArgs(self->decoder,
2289 _PyIO_str_getstate, NULL);
2290 if (state == NULL)
2291 goto fail;
2292 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2293 Py_DECREF(state);
2294 goto fail;
2295 }
2296 Py_DECREF(state);
2297
2298 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2299 /* Decoder buffer is empty, so this is a safe start point. */
2300 cookie.start_pos += cookie.bytes_to_feed;
2301 chars_to_skip -= chars_decoded;
2302 cookie.dec_flags = dec_flags;
2303 cookie.bytes_to_feed = 0;
2304 chars_decoded = 0;
2305 }
2306 if (chars_decoded >= chars_to_skip)
2307 break;
2308 input++;
2309 }
2310 if (input == input_end) {
2311 /* We didn't get enough decoded data; signal EOF to get more. */
2312 PyObject *decoded = PyObject_CallMethod(
2313 self->decoder, "decode", "si", "", /* final = */ 1);
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02002314 if (check_decoded(decoded) < 0)
Antoine Pitrou19690592009-06-12 20:14:08 +00002315 goto fail;
Antoine Pitrou19690592009-06-12 20:14:08 +00002316 chars_decoded += PyUnicode_GET_SIZE(decoded);
2317 Py_DECREF(decoded);
2318 cookie.need_eof = 1;
2319
2320 if (chars_decoded < chars_to_skip) {
2321 PyErr_SetString(PyExc_IOError,
2322 "can't reconstruct logical file position");
2323 goto fail;
2324 }
2325 }
2326
2327 /* finally */
2328 Py_XDECREF(posobj);
2329 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2330 Py_DECREF(saved_state);
2331 if (res == NULL)
2332 return NULL;
2333 Py_DECREF(res);
2334
2335 /* The returned cookie corresponds to the last safe start point. */
2336 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2337 return textiowrapper_build_cookie(&cookie);
2338
2339 fail:
2340 Py_XDECREF(posobj);
2341 if (saved_state) {
2342 PyObject *type, *value, *traceback;
2343 PyErr_Fetch(&type, &value, &traceback);
2344
2345 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
Serhiy Storchaka96d80122015-03-30 10:00:49 +03002346 _PyErr_ReplaceException(type, value, traceback);
Antoine Pitrou19690592009-06-12 20:14:08 +00002347 Py_DECREF(saved_state);
Serhiy Storchaka96d80122015-03-30 10:00:49 +03002348 Py_XDECREF(res);
Antoine Pitrou19690592009-06-12 20:14:08 +00002349 }
2350 return NULL;
2351}
2352
2353static PyObject *
2354textiowrapper_truncate(textio *self, PyObject *args)
2355{
2356 PyObject *pos = Py_None;
2357 PyObject *res;
2358
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002359 CHECK_ATTACHED(self)
Antoine Pitrou19690592009-06-12 20:14:08 +00002360 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2361 return NULL;
2362 }
2363
2364 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2365 if (res == NULL)
2366 return NULL;
2367 Py_DECREF(res);
2368
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00002369 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Antoine Pitrou19690592009-06-12 20:14:08 +00002370}
2371
2372static PyObject *
2373textiowrapper_repr(textio *self)
2374{
2375 PyObject *nameobj, *res;
2376 PyObject *namerepr = NULL, *encrepr = NULL;
2377
2378 CHECK_INITIALIZED(self);
2379
2380 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2381 if (nameobj == NULL) {
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002382 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou19690592009-06-12 20:14:08 +00002383 PyErr_Clear();
2384 else
2385 goto error;
2386 encrepr = PyObject_Repr(self->encoding);
2387 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2388 PyString_AS_STRING(encrepr));
2389 }
2390 else {
2391 encrepr = PyObject_Repr(self->encoding);
2392 namerepr = PyObject_Repr(nameobj);
2393 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2394 PyString_AS_STRING(namerepr),
2395 PyString_AS_STRING(encrepr));
2396 Py_DECREF(nameobj);
2397 }
2398 Py_XDECREF(namerepr);
2399 Py_XDECREF(encrepr);
2400 return res;
2401
2402error:
2403 Py_XDECREF(namerepr);
2404 Py_XDECREF(encrepr);
2405 return NULL;
2406}
2407
2408
2409/* Inquiries */
2410
2411static PyObject *
2412textiowrapper_fileno(textio *self, PyObject *args)
2413{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002414 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002415 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2416}
2417
2418static PyObject *
2419textiowrapper_seekable(textio *self, PyObject *args)
2420{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002421 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002422 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2423}
2424
2425static PyObject *
2426textiowrapper_readable(textio *self, PyObject *args)
2427{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002428 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002429 return PyObject_CallMethod(self->buffer, "readable", NULL);
2430}
2431
2432static PyObject *
2433textiowrapper_writable(textio *self, PyObject *args)
2434{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002435 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002436 return PyObject_CallMethod(self->buffer, "writable", NULL);
2437}
2438
2439static PyObject *
2440textiowrapper_isatty(textio *self, PyObject *args)
2441{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002442 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002443 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2444}
2445
2446static PyObject *
2447textiowrapper_flush(textio *self, PyObject *args)
2448{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002449 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002450 CHECK_CLOSED(self);
2451 self->telling = self->seekable;
2452 if (_textiowrapper_writeflush(self) < 0)
2453 return NULL;
2454 return PyObject_CallMethod(self->buffer, "flush", NULL);
2455}
2456
2457static PyObject *
2458textiowrapper_close(textio *self, PyObject *args)
2459{
2460 PyObject *res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002461 int r;
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002462 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002463
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002464 res = textiowrapper_closed_get(self, NULL);
2465 if (res == NULL)
2466 return NULL;
2467 r = PyObject_IsTrue(res);
2468 Py_DECREF(res);
2469 if (r < 0)
2470 return NULL;
Serhiy Storchaka354d50e2013-02-03 17:10:42 +02002471
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002472 if (r > 0) {
2473 Py_RETURN_NONE; /* stream already closed */
2474 }
2475 else {
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002476 PyObject *exc = NULL, *val, *tb;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002477 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002478 if (res == NULL)
2479 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002480 else
2481 Py_DECREF(res);
2482
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002483 res = PyObject_CallMethod(self->buffer, "close", NULL);
2484 if (exc != NULL) {
Serhiy Storchakaaa64c462015-03-30 09:48:42 +03002485 _PyErr_ReplaceException(exc, val, tb);
2486 Py_CLEAR(res);
Benjamin Petersona2d6d712012-12-20 12:24:10 -06002487 }
2488 return res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002489 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002490}
2491
2492static PyObject *
2493textiowrapper_iternext(textio *self)
2494{
2495 PyObject *line;
2496
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002497 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002498
2499 self->telling = 0;
2500 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2501 /* Skip method call overhead for speed */
2502 line = _textiowrapper_readline(self, -1);
2503 }
2504 else {
2505 line = PyObject_CallMethodObjArgs((PyObject *)self,
2506 _PyIO_str_readline, NULL);
2507 if (line && !PyUnicode_Check(line)) {
2508 PyErr_Format(PyExc_IOError,
2509 "readline() should have returned an str object, "
2510 "not '%.200s'", Py_TYPE(line)->tp_name);
2511 Py_DECREF(line);
2512 return NULL;
2513 }
2514 }
2515
2516 if (line == NULL)
2517 return NULL;
2518
2519 if (PyUnicode_GET_SIZE(line) == 0) {
2520 /* Reached EOF or would have blocked */
2521 Py_DECREF(line);
2522 Py_CLEAR(self->snapshot);
2523 self->telling = self->seekable;
2524 return NULL;
2525 }
2526
2527 return line;
2528}
2529
2530static PyObject *
2531textiowrapper_name_get(textio *self, void *context)
2532{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002533 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002534 return PyObject_GetAttrString(self->buffer, "name");
2535}
2536
2537static PyObject *
2538textiowrapper_closed_get(textio *self, void *context)
2539{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002540 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002541 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2542}
2543
2544static PyObject *
2545textiowrapper_newlines_get(textio *self, void *context)
2546{
2547 PyObject *res;
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002548 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002549 if (self->decoder == NULL)
2550 Py_RETURN_NONE;
2551 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2552 if (res == NULL) {
2553 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2554 PyErr_Clear();
2555 Py_RETURN_NONE;
2556 }
2557 else {
2558 return NULL;
2559 }
2560 }
2561 return res;
2562}
2563
2564static PyObject *
2565textiowrapper_errors_get(textio *self, void *context)
2566{
2567 CHECK_INITIALIZED(self);
2568 Py_INCREF(self->errors);
2569 return self->errors;
2570}
2571
2572static PyObject *
2573textiowrapper_chunk_size_get(textio *self, void *context)
2574{
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002575 CHECK_ATTACHED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002576 return PyLong_FromSsize_t(self->chunk_size);
2577}
2578
2579static int
2580textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2581{
2582 Py_ssize_t n;
Benjamin Peterson53ae6142014-12-21 20:51:50 -06002583 CHECK_ATTACHED_INT(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002584 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2585 if (n == -1 && PyErr_Occurred())
2586 return -1;
2587 if (n <= 0) {
2588 PyErr_SetString(PyExc_ValueError,
2589 "a strictly positive integer is required");
2590 return -1;
2591 }
2592 self->chunk_size = n;
2593 return 0;
2594}
2595
2596static PyMethodDef textiowrapper_methods[] = {
2597 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2598 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2599 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2600 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2601 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2602 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2603
2604 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2605 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2606 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2607 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2608 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2609
2610 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2611 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2612 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2613 {NULL, NULL}
2614};
2615
2616static PyMemberDef textiowrapper_members[] = {
2617 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2618 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2619 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2620 {NULL}
2621};
2622
2623static PyGetSetDef textiowrapper_getset[] = {
2624 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2625 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2626/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2627*/
2628 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2629 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2630 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2631 (setter)textiowrapper_chunk_size_set, NULL},
2632 {NULL}
2633};
2634
2635PyTypeObject PyTextIOWrapper_Type = {
2636 PyVarObject_HEAD_INIT(NULL, 0)
2637 "_io.TextIOWrapper", /*tp_name*/
2638 sizeof(textio), /*tp_basicsize*/
2639 0, /*tp_itemsize*/
2640 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2641 0, /*tp_print*/
2642 0, /*tp_getattr*/
2643 0, /*tps_etattr*/
2644 0, /*tp_compare */
2645 (reprfunc)textiowrapper_repr,/*tp_repr*/
2646 0, /*tp_as_number*/
2647 0, /*tp_as_sequence*/
2648 0, /*tp_as_mapping*/
2649 0, /*tp_hash */
2650 0, /*tp_call*/
2651 0, /*tp_str*/
2652 0, /*tp_getattro*/
2653 0, /*tp_setattro*/
2654 0, /*tp_as_buffer*/
2655 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2656 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2657 textiowrapper_doc, /* tp_doc */
2658 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2659 (inquiry)textiowrapper_clear, /* tp_clear */
2660 0, /* tp_richcompare */
2661 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2662 0, /* tp_iter */
2663 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2664 textiowrapper_methods, /* tp_methods */
2665 textiowrapper_members, /* tp_members */
2666 textiowrapper_getset, /* tp_getset */
2667 0, /* tp_base */
2668 0, /* tp_dict */
2669 0, /* tp_descr_get */
2670 0, /* tp_descr_set */
2671 offsetof(textio, dict), /*tp_dictoffset*/
2672 (initproc)textiowrapper_init, /* tp_init */
2673 0, /* tp_alloc */
2674 PyType_GenericNew, /* tp_new */
2675};