blob: a08ab5b0e27ff6efeecec3c03ec66ded02625fea [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012#include "structmember.h"
13#include "_iomodule.h"
14
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030015/*[clinic input]
16module _io
17class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020022_Py_IDENTIFIER(close);
23_Py_IDENTIFIER(_dealloc_warn);
24_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(fileno);
26_Py_IDENTIFIER(flush);
27_Py_IDENTIFIER(getpreferredencoding);
28_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020029_Py_IDENTIFIER(mode);
30_Py_IDENTIFIER(name);
31_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020032_Py_IDENTIFIER(read);
33_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053070textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_print*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_compare */
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
187 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000188 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 0, /* tp_traverse */
190 0, /* tp_clear */
191 0, /* tp_richcompare */
192 0, /* tp_weaklistoffset */
193 0, /* tp_iter */
194 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000195 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 &PyIOBase_Type, /* tp_base */
199 0, /* tp_dict */
200 0, /* tp_descr_get */
201 0, /* tp_descr_set */
202 0, /* tp_dictoffset */
203 0, /* tp_init */
204 0, /* tp_alloc */
205 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200206 0, /* tp_free */
207 0, /* tp_is_gc */
208 0, /* tp_bases */
209 0, /* tp_mro */
210 0, /* tp_cache */
211 0, /* tp_subclasses */
212 0, /* tp_weaklist */
213 0, /* tp_del */
214 0, /* tp_version_tag */
215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000216};
217
218
219/* IncrementalNewlineDecoder */
220
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200225 unsigned int pendingcr: 1;
226 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300230/*[clinic input]
231_io.IncrementalNewlineDecoder.__init__
232 decoder: object
233 translate: int
234 errors: object(c_default="NULL") = "strict"
235
236Codec used when reading a file in universal newlines mode.
237
238It wraps another incremental decoder, translating \r\n and \r into \n.
239It also records the types of newlines encountered. When used with
240translate=False, it ensures that the newline sequence is returned in
241one piece. When used with decoder=None, it expects unicode strings as
242decode input and translates newlines without first invoking an external
243decoder.
244[clinic start generated code]*/
245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300247_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248 PyObject *decoder, int translate,
249 PyObject *errors)
250/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252 self->decoder = decoder;
253 Py_INCREF(decoder);
254
255 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900256 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 if (self->errors == NULL)
258 return -1;
259 }
260 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 self->errors = errors;
262 }
INADA Naoki507434f2017-12-21 09:59:53 +0900263 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264
Xiang Zhangb08746b2018-10-31 19:49:16 +0800265 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266 self->seennl = 0;
267 self->pendingcr = 0;
268
269 return 0;
270}
271
272static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000273incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000274{
275 Py_CLEAR(self->decoder);
276 Py_CLEAR(self->errors);
277 Py_TYPE(self)->tp_free((PyObject *)self);
278}
279
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200280static int
281check_decoded(PyObject *decoded)
282{
283 if (decoded == NULL)
284 return -1;
285 if (!PyUnicode_Check(decoded)) {
286 PyErr_Format(PyExc_TypeError,
287 "decoder should return a string result, not '%.200s'",
288 Py_TYPE(decoded)->tp_name);
289 Py_DECREF(decoded);
290 return -1;
291 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200292 if (PyUnicode_READY(decoded) < 0) {
293 Py_DECREF(decoded);
294 return -1;
295 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200296 return 0;
297}
298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299#define SEEN_CR 1
300#define SEEN_LF 2
301#define SEEN_CRLF 4
302#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
303
304PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200305_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306 PyObject *input, int final)
307{
308 PyObject *output;
309 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200310 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311
312 if (self->decoder == NULL) {
313 PyErr_SetString(PyExc_ValueError,
314 "IncrementalNewlineDecoder.__init__ not called");
315 return NULL;
316 }
317
318 /* decode input (with the eventual \r from a previous pass) */
319 if (self->decoder != Py_None) {
320 output = PyObject_CallMethodObjArgs(self->decoder,
321 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
322 }
323 else {
324 output = input;
325 Py_INCREF(output);
326 }
327
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200328 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 return NULL;
330
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 /* Prefix output with CR */
334 int kind;
335 PyObject *modified;
336 char *out;
337
338 modified = PyUnicode_New(output_len + 1,
339 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (modified == NULL)
341 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 kind = PyUnicode_KIND(modified);
343 out = PyUnicode_DATA(modified);
344 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 self->pendingcr = 0;
349 output_len++;
350 }
351
352 /* retain last \r even when not translating data:
353 * then readline() is sure to get \r\n in one pass
354 */
355 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000356 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
358 {
359 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
360 if (modified == NULL)
361 goto error;
362 Py_DECREF(output);
363 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 self->pendingcr = 1;
365 }
366 }
367
368 /* Record which newlines are read and do newline translation if desired,
369 all in one pass. */
370 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 Py_ssize_t len;
373 int seennl = self->seennl;
374 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 in_str = PyUnicode_DATA(output);
378 len = PyUnicode_GET_LENGTH(output);
379 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
381 if (len == 0)
382 return output;
383
384 /* If, up to now, newlines are consistently \n, do a quick check
385 for the \r *byte* with the libc's optimized memchr.
386 */
387 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200388 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 }
390
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 if (only_lf) {
392 /* If not already seen, quick scan for a possible "\n" character.
393 (there's nothing else to be done, even when in translation mode)
394 */
395 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200396 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100397 if (kind == PyUnicode_1BYTE_KIND)
398 seennl |= SEEN_LF;
399 else {
400 Py_ssize_t i = 0;
401 for (;;) {
402 Py_UCS4 c;
403 /* Fast loop for non-control characters */
404 while (PyUnicode_READ(kind, in_str, i) > '\n')
405 i++;
406 c = PyUnicode_READ(kind, in_str, i++);
407 if (c == '\n') {
408 seennl |= SEEN_LF;
409 break;
410 }
411 if (i >= len)
412 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
415 }
416 /* Finished: we have scanned for newlines, and none of them
417 need translating */
418 }
419 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000421 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 if (seennl == SEEN_ALL)
423 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200425 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 while (PyUnicode_READ(kind, in_str, i) > '\r')
428 i++;
429 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 if (c == '\n')
431 seennl |= SEEN_LF;
432 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 }
437 else
438 seennl |= SEEN_CR;
439 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 break;
442 if (seennl == SEEN_ALL)
443 break;
444 }
445 endscan:
446 ;
447 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000448 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 void *translated;
450 int kind = PyUnicode_KIND(output);
451 void *in_str = PyUnicode_DATA(output);
452 Py_ssize_t in, out;
453 /* XXX: Previous in-place translation here is disabled as
454 resizing is not possible anymore */
455 /* We could try to optimize this so that we only do a copy
456 when there is something to translate. On the other hand,
457 we already know there is a \r byte, so chances are high
458 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200459 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 if (translated == NULL) {
461 PyErr_NoMemory();
462 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
469 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 seennl |= SEEN_LF;
473 continue;
474 }
475 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 in++;
478 seennl |= SEEN_CRLF;
479 }
480 else
481 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 continue;
484 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 Py_DECREF(output);
490 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100491 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200492 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200493 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
495 self->seennl |= seennl;
496 }
497
498 return output;
499
500 error:
501 Py_DECREF(output);
502 return NULL;
503}
504
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300505/*[clinic input]
506_io.IncrementalNewlineDecoder.decode
507 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200508 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300509[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511static PyObject *
512_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
513 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200514/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300515{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
517}
518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300519/*[clinic input]
520_io.IncrementalNewlineDecoder.getstate
521[clinic start generated code]*/
522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300524_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
525/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000526{
527 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700528 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 if (self->decoder != Py_None) {
531 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
532 _PyIO_str_getstate, NULL);
533 if (state == NULL)
534 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300535 if (!PyTuple_Check(state)) {
536 PyErr_SetString(PyExc_TypeError,
537 "illegal decoder state");
538 Py_DECREF(state);
539 return NULL;
540 }
541 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
542 &buffer, &flag))
543 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 Py_DECREF(state);
545 return NULL;
546 }
547 Py_INCREF(buffer);
548 Py_DECREF(state);
549 }
550 else {
551 buffer = PyBytes_FromString("");
552 flag = 0;
553 }
554 flag <<= 1;
555 if (self->pendingcr)
556 flag |= 1;
557 return Py_BuildValue("NK", buffer, flag);
558}
559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300560/*[clinic input]
561_io.IncrementalNewlineDecoder.setstate
562 state: object
563 /
564[clinic start generated code]*/
565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300567_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
568 PyObject *state)
569/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700572 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Oren Milman1d1d3e92017-08-20 18:35:36 +0300574 if (!PyTuple_Check(state)) {
575 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300577 }
578 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579 &buffer, &flag))
580 {
581 return NULL;
582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583
Victor Stinner7d7e7752014-06-17 23:31:25 +0200584 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 flag >>= 1;
586
587 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200588 return _PyObject_CallMethodId(self->decoder,
589 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 else
591 Py_RETURN_NONE;
592}
593
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300594/*[clinic input]
595_io.IncrementalNewlineDecoder.reset
596[clinic start generated code]*/
597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300599_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
600/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601{
602 self->seennl = 0;
603 self->pendingcr = 0;
604 if (self->decoder != Py_None)
605 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
606 else
607 Py_RETURN_NONE;
608}
609
610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612{
613 switch (self->seennl) {
614 case SEEN_CR:
615 return PyUnicode_FromString("\r");
616 case SEEN_LF:
617 return PyUnicode_FromString("\n");
618 case SEEN_CRLF:
619 return PyUnicode_FromString("\r\n");
620 case SEEN_CR | SEEN_LF:
621 return Py_BuildValue("ss", "\r", "\n");
622 case SEEN_CR | SEEN_CRLF:
623 return Py_BuildValue("ss", "\r", "\r\n");
624 case SEEN_LF | SEEN_CRLF:
625 return Py_BuildValue("ss", "\n", "\r\n");
626 case SEEN_CR | SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("sss", "\r", "\n", "\r\n");
628 default:
629 Py_RETURN_NONE;
630 }
631
632}
633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634/* TextIOWrapper */
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636typedef PyObject *
637 (*encodefunc_t)(PyObject *, PyObject *);
638
639typedef struct
640{
641 PyObject_HEAD
642 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000643 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 Py_ssize_t chunk_size;
645 PyObject *buffer;
646 PyObject *encoding;
647 PyObject *encoder;
648 PyObject *decoder;
649 PyObject *readnl;
650 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900651 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200653 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char readuniversal;
655 char readtranslate;
656 char writetranslate;
657 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200658 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200660 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900677 PyObject *pending_bytes; // data waiting to be written.
678 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000679 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000680
Oren Milman13614e32017-08-24 19:51:24 +0300681 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000686 PyObject *snapshot;
687 /* Bytes-to-characters ratio for the current chunk. Serves as input for
688 the heuristic in tell(). */
689 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 /* Cache raw object if it's a FileIO object */
692 PyObject *raw;
693
694 PyObject *weakreflist;
695 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000696} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
Zackery Spytz23db9352018-06-29 04:14:58 -0600698static void
699textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
700
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701/* A couple of specialized cases in order to bypass the slow incremental
702 encoding methods for the most popular encodings. */
703
704static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000705ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706{
INADA Naoki507434f2017-12-21 09:59:53 +0900707 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708}
709
710static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100713 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900714 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715}
716
717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000718utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100720 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900721 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitroue4501852009-05-14 18:55:55 +0000727 if (!self->encoding_start_of_stream) {
728 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200729#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900736 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
Antoine Pitroue4501852009-05-14 18:55:55 +0000739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000741{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100742 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900743 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900750 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 if (!self->encoding_start_of_stream) {
757 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200758#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 return utf32be_encode(self, text);
760#else
761 return utf32le_encode(self, text);
762#endif
763 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900765 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770{
INADA Naoki507434f2017-12-21 09:59:53 +0900771 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776{
INADA Naoki507434f2017-12-21 09:59:53 +0900777 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778}
779
Inada Naokibfba8c32019-05-16 15:03:20 +0900780// Return true when encoding can be skipped when text is ascii.
781static inline int
782is_asciicompat_encoding(encodefunc_t f)
783{
784 return f == (encodefunc_t) ascii_encode
785 || f == (encodefunc_t) latin1_encode
786 || f == (encodefunc_t) utf8_encode;
787}
788
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000789/* Map normalized encoding names onto the specialized encoding funcs */
790
791typedef struct {
792 const char *name;
793 encodefunc_t encodefunc;
794} encodefuncentry;
795
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200796static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"ascii", (encodefunc_t) ascii_encode},
798 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000799 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 {"utf-16-be", (encodefunc_t) utf16be_encode},
801 {"utf-16-le", (encodefunc_t) utf16le_encode},
802 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000803 {"utf-32-be", (encodefunc_t) utf32be_encode},
804 {"utf-32-le", (encodefunc_t) utf32le_encode},
805 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 {NULL, NULL}
807};
808
INADA Naoki507434f2017-12-21 09:59:53 +0900809static int
810validate_newline(const char *newline)
811{
812 if (newline && newline[0] != '\0'
813 && !(newline[0] == '\n' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\0')
815 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
816 PyErr_Format(PyExc_ValueError,
817 "illegal newline value: %s", newline);
818 return -1;
819 }
820 return 0;
821}
822
823static int
824set_newline(textio *self, const char *newline)
825{
826 PyObject *old = self->readnl;
827 if (newline == NULL) {
828 self->readnl = NULL;
829 }
830 else {
831 self->readnl = PyUnicode_FromString(newline);
832 if (self->readnl == NULL) {
833 self->readnl = old;
834 return -1;
835 }
836 }
837 self->readuniversal = (newline == NULL || newline[0] == '\0');
838 self->readtranslate = (newline == NULL);
839 self->writetranslate = (newline == NULL || newline[0] != '\0');
840 if (!self->readuniversal && self->readnl != NULL) {
841 // validate_newline() accepts only ASCII newlines.
842 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
843 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
844 if (strcmp(self->writenl, "\n") == 0) {
845 self->writenl = NULL;
846 }
847 }
848 else {
849#ifdef MS_WINDOWS
850 self->writenl = "\r\n";
851#else
852 self->writenl = NULL;
853#endif
854 }
855 Py_XDECREF(old);
856 return 0;
857}
858
859static int
860_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
861 const char *errors)
862{
863 PyObject *res;
864 int r;
865
866 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
867 if (res == NULL)
868 return -1;
869
870 r = PyObject_IsTrue(res);
871 Py_DECREF(res);
872 if (r == -1)
873 return -1;
874
875 if (r != 1)
876 return 0;
877
878 Py_CLEAR(self->decoder);
879 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
880 if (self->decoder == NULL)
881 return -1;
882
883 if (self->readuniversal) {
884 PyObject *incrementalDecoder = PyObject_CallFunction(
885 (PyObject *)&PyIncrementalNewlineDecoder_Type,
886 "Oi", self->decoder, (int)self->readtranslate);
887 if (incrementalDecoder == NULL)
888 return -1;
889 Py_CLEAR(self->decoder);
890 self->decoder = incrementalDecoder;
891 }
892
893 return 0;
894}
895
896static PyObject*
897_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
898{
899 PyObject *chars;
900
901 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
902 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
903 else
904 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
905 eof ? Py_True : Py_False, NULL);
906
907 if (check_decoded(chars) < 0)
908 // check_decoded already decreases refcount
909 return NULL;
910
911 return chars;
912}
913
914static int
915_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
916 const char *errors)
917{
918 PyObject *res;
919 int r;
920
921 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
922 if (res == NULL)
923 return -1;
924
925 r = PyObject_IsTrue(res);
926 Py_DECREF(res);
927 if (r == -1)
928 return -1;
929
930 if (r != 1)
931 return 0;
932
933 Py_CLEAR(self->encoder);
934 self->encodefunc = NULL;
935 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
936 if (self->encoder == NULL)
937 return -1;
938
939 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200940 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
941 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900942 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200943 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900944 const encodefuncentry *e = encodefuncs;
945 while (e->name != NULL) {
946 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
947 self->encodefunc = e->encodefunc;
948 break;
949 }
950 e++;
951 }
952 }
953 Py_XDECREF(res);
954
955 return 0;
956}
957
958static int
959_textiowrapper_fix_encoder_state(textio *self)
960{
961 if (!self->seekable || !self->encoder) {
962 return 0;
963 }
964
965 self->encoding_start_of_stream = 1;
966
967 PyObject *cookieObj = PyObject_CallMethodObjArgs(
968 self->buffer, _PyIO_str_tell, NULL);
969 if (cookieObj == NULL) {
970 return -1;
971 }
972
973 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
974 Py_DECREF(cookieObj);
975 if (cmp < 0) {
976 return -1;
977 }
978
979 if (cmp == 0) {
980 self->encoding_start_of_stream = 0;
981 PyObject *res = PyObject_CallMethodObjArgs(
982 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
983 if (res == NULL) {
984 return -1;
985 }
986 Py_DECREF(res);
987 }
988
989 return 0;
990}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300992/*[clinic input]
993_io.TextIOWrapper.__init__
994 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700995 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900996 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700997 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200998 line_buffering: bool(accept={int}) = False
999 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001000
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001001Character and line based layer over a BufferedIOBase object, buffer.
1002
1003encoding gives the name of the encoding that the stream will be
1004decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1005
1006errors determines the strictness of encoding and decoding (see
1007help(codecs.Codec) or the documentation for codecs.register) and
1008defaults to "strict".
1009
1010newline controls how line endings are handled. It can be None, '',
1011'\n', '\r', and '\r\n'. It works as follows:
1012
1013* On input, if newline is None, universal newlines mode is
1014 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1015 these are translated into '\n' before being returned to the
1016 caller. If it is '', universal newline mode is enabled, but line
1017 endings are returned to the caller untranslated. If it has any of
1018 the other legal values, input lines are only terminated by the given
1019 string, and the line ending is returned to the caller untranslated.
1020
1021* On output, if newline is None, any '\n' characters written are
1022 translated to the system default line separator, os.linesep. If
1023 newline is '' or '\n', no translation takes place. If newline is any
1024 of the other legal values, any '\n' characters written are translated
1025 to the given string.
1026
1027If line_buffering is True, a call to flush is implied when a call to
1028write contains a newline character.
1029[clinic start generated code]*/
1030
1031static int
1032_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001033 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001034 const char *newline, int line_buffering,
1035 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001036/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001037{
1038 PyObject *raw, *codec_info = NULL;
1039 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 PyObject *res;
1041 int r;
1042
1043 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001044 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001045
INADA Naoki507434f2017-12-21 09:59:53 +09001046 if (errors == Py_None) {
1047 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001048 if (errors == NULL) {
1049 return -1;
1050 }
INADA Naoki507434f2017-12-21 09:59:53 +09001051 }
1052 else if (!PyUnicode_Check(errors)) {
1053 // Check 'errors' argument here because Argument Clinic doesn't support
1054 // 'str(accept={str, NoneType})' converter.
1055 PyErr_Format(
1056 PyExc_TypeError,
1057 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1058 errors->ob_type->tp_name);
1059 return -1;
1060 }
1061
1062 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 return -1;
1064 }
1065
1066 Py_CLEAR(self->buffer);
1067 Py_CLEAR(self->encoding);
1068 Py_CLEAR(self->encoder);
1069 Py_CLEAR(self->decoder);
1070 Py_CLEAR(self->readnl);
1071 Py_CLEAR(self->decoded_chars);
1072 Py_CLEAR(self->pending_bytes);
1073 Py_CLEAR(self->snapshot);
1074 Py_CLEAR(self->errors);
1075 Py_CLEAR(self->raw);
1076 self->decoded_chars_used = 0;
1077 self->pending_bytes_count = 0;
1078 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001079 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080
1081 if (encoding == NULL) {
1082 /* Try os.device_encoding(fileno) */
1083 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001084 state = IO_STATE();
1085 if (state == NULL)
1086 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001087 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088 /* Ignore only AttributeError and UnsupportedOperation */
1089 if (fileno == NULL) {
1090 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1091 PyErr_ExceptionMatches(state->unsupported_operation)) {
1092 PyErr_Clear();
1093 }
1094 else {
1095 goto error;
1096 }
1097 }
1098 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001099 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001100 Py_DECREF(fileno);
1101 if (fd == -1 && PyErr_Occurred()) {
1102 goto error;
1103 }
1104
1105 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001106 if (self->encoding == NULL)
1107 goto error;
1108 else if (!PyUnicode_Check(self->encoding))
1109 Py_CLEAR(self->encoding);
1110 }
1111 }
1112 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001113 PyObject *locale_module = _PyIO_get_locale_module(state);
1114 if (locale_module == NULL)
1115 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001116 self->encoding = _PyObject_CallMethodIdObjArgs(
1117 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001118 Py_DECREF(locale_module);
1119 if (self->encoding == NULL) {
1120 catch_ImportError:
1121 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001122 Importing locale can raise an ImportError because of
1123 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001124 ImportError if _locale is not available. These will happen
1125 during module building.
1126 */
1127 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1128 PyErr_Clear();
1129 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001131 else
1132 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001134 else if (!PyUnicode_Check(self->encoding))
1135 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001137 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001138 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001139 if (encoding == NULL)
1140 goto error;
1141 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 else if (encoding != NULL) {
1143 self->encoding = PyUnicode_FromString(encoding);
1144 if (self->encoding == NULL)
1145 goto error;
1146 }
1147 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001148 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001150 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151 }
1152
Nick Coghlana9b15242014-02-04 22:11:18 +10001153 /* Check we have been asked for a real text encoding */
1154 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1155 if (codec_info == NULL) {
1156 Py_CLEAR(self->encoding);
1157 goto error;
1158 }
1159
1160 /* XXX: Failures beyond this point have the potential to leak elements
1161 * of the partially constructed object (like self->encoding)
1162 */
1163
INADA Naoki507434f2017-12-21 09:59:53 +09001164 Py_INCREF(errors);
1165 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001167 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001168 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001169 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001171 }
1172
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 self->buffer = buffer;
1174 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001175
INADA Naoki507434f2017-12-21 09:59:53 +09001176 /* Build the decoder object */
1177 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1178 goto error;
1179
1180 /* Build the encoder object */
1181 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1182 goto error;
1183
1184 /* Finished sorting out the codec details */
1185 Py_CLEAR(codec_info);
1186
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1188 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001189 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1190 {
1191 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1192 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001194 if (raw != NULL) {
1195 if (Py_TYPE(raw) == &PyFileIO_Type)
1196 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001197 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001198 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001199 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 }
1201
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001202 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203 if (res == NULL)
1204 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001205 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001207 if (r < 0)
1208 goto error;
1209 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001211 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1212 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001213 goto error;
1214 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001215 Py_XDECREF(res);
1216 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001217
Antoine Pitroue4501852009-05-14 18:55:55 +00001218 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001219 if (_textiowrapper_fix_encoder_state(self) < 0) {
1220 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001221 }
1222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001223 self->ok = 1;
1224 return 0;
1225
1226 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001227 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228 return -1;
1229}
1230
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001231/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1232 * -1 on error.
1233 */
1234static int
1235convert_optional_bool(PyObject *obj, int default_value)
1236{
1237 long v;
1238 if (obj == Py_None) {
1239 v = default_value;
1240 }
1241 else {
1242 v = PyLong_AsLong(obj);
1243 if (v == -1 && PyErr_Occurred())
1244 return -1;
1245 }
1246 return v != 0;
1247}
1248
INADA Naoki507434f2017-12-21 09:59:53 +09001249static int
1250textiowrapper_change_encoding(textio *self, PyObject *encoding,
1251 PyObject *errors, int newline_changed)
1252{
1253 /* Use existing settings where new settings are not specified */
1254 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1255 return 0; // no change
1256 }
1257
1258 if (encoding == Py_None) {
1259 encoding = self->encoding;
1260 if (errors == Py_None) {
1261 errors = self->errors;
1262 }
1263 }
1264 else if (errors == Py_None) {
1265 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001266 if (errors == NULL) {
1267 return -1;
1268 }
INADA Naoki507434f2017-12-21 09:59:53 +09001269 }
1270
1271 const char *c_errors = PyUnicode_AsUTF8(errors);
1272 if (c_errors == NULL) {
1273 return -1;
1274 }
1275
1276 // Create new encoder & decoder
1277 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1278 PyUnicode_AsUTF8(encoding), "codecs.open()");
1279 if (codec_info == NULL) {
1280 return -1;
1281 }
1282 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1283 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1284 Py_DECREF(codec_info);
1285 return -1;
1286 }
1287 Py_DECREF(codec_info);
1288
1289 Py_INCREF(encoding);
1290 Py_INCREF(errors);
1291 Py_SETREF(self->encoding, encoding);
1292 Py_SETREF(self->errors, errors);
1293
1294 return _textiowrapper_fix_encoder_state(self);
1295}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001296
1297/*[clinic input]
1298_io.TextIOWrapper.reconfigure
1299 *
INADA Naoki507434f2017-12-21 09:59:53 +09001300 encoding: object = None
1301 errors: object = None
1302 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001303 line_buffering as line_buffering_obj: object = None
1304 write_through as write_through_obj: object = None
1305
1306Reconfigure the text stream with new parameters.
1307
1308This also does an implicit stream flush.
1309
1310[clinic start generated code]*/
1311
1312static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001313_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1314 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001315 PyObject *line_buffering_obj,
1316 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001317/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001318{
1319 int line_buffering;
1320 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001321 const char *newline = NULL;
1322
1323 /* Check if something is in the read buffer */
1324 if (self->decoded_chars != NULL) {
1325 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001326 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001327 "of stream after the first read");
1328 return NULL;
1329 }
1330 }
1331
1332 if (newline_obj != NULL && newline_obj != Py_None) {
1333 newline = PyUnicode_AsUTF8(newline_obj);
1334 if (newline == NULL || validate_newline(newline) < 0) {
1335 return NULL;
1336 }
1337 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001338
1339 line_buffering = convert_optional_bool(line_buffering_obj,
1340 self->line_buffering);
1341 write_through = convert_optional_bool(write_through_obj,
1342 self->write_through);
1343 if (line_buffering < 0 || write_through < 0) {
1344 return NULL;
1345 }
INADA Naoki507434f2017-12-21 09:59:53 +09001346
1347 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001348 if (res == NULL) {
1349 return NULL;
1350 }
INADA Naoki507434f2017-12-21 09:59:53 +09001351 Py_DECREF(res);
1352 self->b2cratio = 0;
1353
1354 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1355 return NULL;
1356 }
1357
1358 if (textiowrapper_change_encoding(
1359 self, encoding, errors, newline_obj != NULL) < 0) {
1360 return NULL;
1361 }
1362
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001363 self->line_buffering = line_buffering;
1364 self->write_through = write_through;
1365 Py_RETURN_NONE;
1366}
1367
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001369textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371 self->ok = 0;
1372 Py_CLEAR(self->buffer);
1373 Py_CLEAR(self->encoding);
1374 Py_CLEAR(self->encoder);
1375 Py_CLEAR(self->decoder);
1376 Py_CLEAR(self->readnl);
1377 Py_CLEAR(self->decoded_chars);
1378 Py_CLEAR(self->pending_bytes);
1379 Py_CLEAR(self->snapshot);
1380 Py_CLEAR(self->errors);
1381 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001382
1383 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 return 0;
1385}
1386
1387static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001388textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001390 self->finalizing = 1;
1391 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001393 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394 _PyObject_GC_UNTRACK(self);
1395 if (self->weakreflist != NULL)
1396 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001397 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 Py_TYPE(self)->tp_free((PyObject *)self);
1399}
1400
1401static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001402textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403{
1404 Py_VISIT(self->buffer);
1405 Py_VISIT(self->encoding);
1406 Py_VISIT(self->encoder);
1407 Py_VISIT(self->decoder);
1408 Py_VISIT(self->readnl);
1409 Py_VISIT(self->decoded_chars);
1410 Py_VISIT(self->pending_bytes);
1411 Py_VISIT(self->snapshot);
1412 Py_VISIT(self->errors);
1413 Py_VISIT(self->raw);
1414
1415 Py_VISIT(self->dict);
1416 return 0;
1417}
1418
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001420textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001421
1422/* This macro takes some shortcuts to make the common case faster. */
1423#define CHECK_CLOSED(self) \
1424 do { \
1425 int r; \
1426 PyObject *_res; \
1427 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1428 if (self->raw != NULL) \
1429 r = _PyFileIO_closed(self->raw); \
1430 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001431 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432 if (_res == NULL) \
1433 return NULL; \
1434 r = PyObject_IsTrue(_res); \
1435 Py_DECREF(_res); \
1436 if (r < 0) \
1437 return NULL; \
1438 } \
1439 if (r > 0) { \
1440 PyErr_SetString(PyExc_ValueError, \
1441 "I/O operation on closed file."); \
1442 return NULL; \
1443 } \
1444 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001445 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446 return NULL; \
1447 } while (0)
1448
1449#define CHECK_INITIALIZED(self) \
1450 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001451 PyErr_SetString(PyExc_ValueError, \
1452 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 return NULL; \
1454 }
1455
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001456#define CHECK_ATTACHED(self) \
1457 CHECK_INITIALIZED(self); \
1458 if (self->detached) { \
1459 PyErr_SetString(PyExc_ValueError, \
1460 "underlying buffer has been detached"); \
1461 return NULL; \
1462 }
1463
1464#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001466 PyErr_SetString(PyExc_ValueError, \
1467 "I/O operation on uninitialized object"); \
1468 return -1; \
1469 } else if (self->detached) { \
1470 PyErr_SetString(PyExc_ValueError, \
1471 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 return -1; \
1473 }
1474
1475
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001476/*[clinic input]
1477_io.TextIOWrapper.detach
1478[clinic start generated code]*/
1479
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001480static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001481_io_TextIOWrapper_detach_impl(textio *self)
1482/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001483{
1484 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001485 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001486 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1487 if (res == NULL)
1488 return NULL;
1489 Py_DECREF(res);
1490 buffer = self->buffer;
1491 self->buffer = NULL;
1492 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001493 return buffer;
1494}
1495
Antoine Pitrou24f36292009-03-28 22:16:42 +00001496/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497 underlying buffered object, though. */
1498static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001499_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 if (self->pending_bytes == NULL)
1502 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001503
Inada Naokibfba8c32019-05-16 15:03:20 +09001504 PyObject *pending = self->pending_bytes;
1505 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001506
Inada Naokibfba8c32019-05-16 15:03:20 +09001507 if (PyBytes_Check(pending)) {
1508 b = pending;
1509 Py_INCREF(b);
1510 }
1511 else if (PyUnicode_Check(pending)) {
1512 assert(PyUnicode_IS_ASCII(pending));
1513 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1514 b = PyBytes_FromStringAndSize(
1515 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1516 if (b == NULL) {
1517 return -1;
1518 }
1519 }
1520 else {
1521 assert(PyList_Check(pending));
1522 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1523 if (b == NULL) {
1524 return -1;
1525 }
1526
1527 char *buf = PyBytes_AsString(b);
1528 Py_ssize_t pos = 0;
1529
1530 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1531 PyObject *obj = PyList_GET_ITEM(pending, i);
1532 char *src;
1533 Py_ssize_t len;
1534 if (PyUnicode_Check(obj)) {
1535 assert(PyUnicode_IS_ASCII(obj));
1536 src = PyUnicode_DATA(obj);
1537 len = PyUnicode_GET_LENGTH(obj);
1538 }
1539 else {
1540 assert(PyBytes_Check(obj));
1541 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1542 Py_DECREF(b);
1543 return -1;
1544 }
1545 }
1546 memcpy(buf + pos, src, len);
1547 pos += len;
1548 }
1549 assert(pos == self->pending_bytes_count);
1550 }
1551
1552 self->pending_bytes_count = 0;
1553 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001554 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001555
1556 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001557 do {
1558 ret = PyObject_CallMethodObjArgs(self->buffer,
1559 _PyIO_str_write, b, NULL);
1560 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 Py_DECREF(b);
1562 if (ret == NULL)
1563 return -1;
1564 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 return 0;
1566}
1567
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001568/*[clinic input]
1569_io.TextIOWrapper.write
1570 text: unicode
1571 /
1572[clinic start generated code]*/
1573
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001575_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1576/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577{
1578 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579 PyObject *b;
1580 Py_ssize_t textlen;
1581 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001582 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001583
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001584 if (PyUnicode_READY(text) == -1)
1585 return NULL;
1586
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001587 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001588 CHECK_CLOSED(self);
1589
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001590 if (self->encoder == NULL)
1591 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001592
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 Py_INCREF(text);
1594
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001595 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596
1597 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001598 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 haslf = 1;
1600
1601 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001602 PyObject *newtext = _PyObject_CallMethodId(
1603 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 Py_DECREF(text);
1605 if (newtext == NULL)
1606 return NULL;
1607 text = newtext;
1608 }
1609
Antoine Pitroue96ec682011-07-23 21:46:35 +02001610 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001611 text_needflush = 1;
1612 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001614 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615 needflush = 1;
1616
1617 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001618 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001619 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1620 b = text;
1621 Py_INCREF(b);
1622 }
1623 else {
1624 b = (*self->encodefunc)((PyObject *) self, text);
1625 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001626 self->encoding_start_of_stream = 0;
1627 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628 else
1629 b = PyObject_CallMethodObjArgs(self->encoder,
1630 _PyIO_str_encode, text, NULL);
Inada Naokibfba8c32019-05-16 15:03:20 +09001631
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 Py_DECREF(text);
1633 if (b == NULL)
1634 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001635 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001636 PyErr_Format(PyExc_TypeError,
1637 "encoder should return a bytes object, not '%.200s'",
1638 Py_TYPE(b)->tp_name);
1639 Py_DECREF(b);
1640 return NULL;
1641 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642
Inada Naokibfba8c32019-05-16 15:03:20 +09001643 Py_ssize_t bytes_len;
1644 if (b == text) {
1645 bytes_len = PyUnicode_GET_LENGTH(b);
1646 }
1647 else {
1648 bytes_len = PyBytes_GET_SIZE(b);
1649 }
1650
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001651 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001652 self->pending_bytes_count = 0;
1653 self->pending_bytes = b;
1654 }
1655 else if (!PyList_CheckExact(self->pending_bytes)) {
1656 PyObject *list = PyList_New(2);
1657 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 Py_DECREF(b);
1659 return NULL;
1660 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001661 PyList_SET_ITEM(list, 0, self->pending_bytes);
1662 PyList_SET_ITEM(list, 1, b);
1663 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001665 else {
1666 if (PyList_Append(self->pending_bytes, b) < 0) {
1667 Py_DECREF(b);
1668 return NULL;
1669 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001672
1673 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001674 if (self->pending_bytes_count > self->chunk_size || needflush ||
1675 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001676 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 return NULL;
1678 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001679
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 if (needflush) {
1681 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1682 if (ret == NULL)
1683 return NULL;
1684 Py_DECREF(ret);
1685 }
1686
Zackery Spytz23db9352018-06-29 04:14:58 -06001687 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 Py_CLEAR(self->snapshot);
1689
1690 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001691 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 if (ret == NULL)
1693 return NULL;
1694 Py_DECREF(ret);
1695 }
1696
1697 return PyLong_FromSsize_t(textlen);
1698}
1699
1700/* Steal a reference to chars and store it in the decoded_char buffer;
1701 */
1702static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001703textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001704{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001705 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 self->decoded_chars_used = 0;
1707}
1708
1709static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001710textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711{
1712 PyObject *chars;
1713 Py_ssize_t avail;
1714
1715 if (self->decoded_chars == NULL)
1716 return PyUnicode_FromStringAndSize(NULL, 0);
1717
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 /* decoded_chars is guaranteed to be "ready". */
1719 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 - self->decoded_chars_used);
1721
1722 assert(avail >= 0);
1723
1724 if (n < 0 || n > avail)
1725 n = avail;
1726
1727 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001728 chars = PyUnicode_Substring(self->decoded_chars,
1729 self->decoded_chars_used,
1730 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (chars == NULL)
1732 return NULL;
1733 }
1734 else {
1735 chars = self->decoded_chars;
1736 Py_INCREF(chars);
1737 }
1738
1739 self->decoded_chars_used += n;
1740 return chars;
1741}
1742
1743/* Read and decode the next chunk of data from the BufferedReader.
1744 */
1745static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001746textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747{
1748 PyObject *dec_buffer = NULL;
1749 PyObject *dec_flags = NULL;
1750 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001751 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001753 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 int eof;
1755
1756 /* The return value is True unless EOF was reached. The decoded string is
1757 * placed in self._decoded_chars (replacing its previous value). The
1758 * entire input chunk is sent to the decoder, though some of it may remain
1759 * buffered in the decoder, yet to be converted.
1760 */
1761
1762 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001763 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 return -1;
1765 }
1766
1767 if (self->telling) {
1768 /* To prepare for tell(), we need to snapshot a point in the file
1769 * where the decoder's input buffer is empty.
1770 */
1771
1772 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1773 _PyIO_str_getstate, NULL);
1774 if (state == NULL)
1775 return -1;
1776 /* Given this, we know there was a valid snapshot point
1777 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1778 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001779 if (!PyTuple_Check(state)) {
1780 PyErr_SetString(PyExc_TypeError,
1781 "illegal decoder state");
1782 Py_DECREF(state);
1783 return -1;
1784 }
1785 if (!PyArg_ParseTuple(state,
1786 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1787 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 Py_DECREF(state);
1789 return -1;
1790 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001791
1792 if (!PyBytes_Check(dec_buffer)) {
1793 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001794 "illegal decoder state: the first item should be a "
1795 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001796 Py_TYPE(dec_buffer)->tp_name);
1797 Py_DECREF(state);
1798 return -1;
1799 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 Py_INCREF(dec_buffer);
1801 Py_INCREF(dec_flags);
1802 Py_DECREF(state);
1803 }
1804
1805 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001806 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001807 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001808 }
1809 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810 if (chunk_size == NULL)
1811 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001812
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001813 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001814 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1815 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 Py_DECREF(chunk_size);
1817 if (input_chunk == NULL)
1818 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001819
1820 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001821 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001822 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001823 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1824 Py_TYPE(input_chunk)->tp_name);
1825 goto fail;
1826 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827
Antoine Pitroub8503892014-04-29 10:14:02 +02001828 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001829 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830
INADA Naoki507434f2017-12-21 09:59:53 +09001831 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1832 PyBuffer_Release(&input_chunk_buf);
1833 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001834 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001835
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001836 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001837 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001838 if (nchars > 0)
1839 self->b2cratio = (double) nbytes / nchars;
1840 else
1841 self->b2cratio = 0.0;
1842 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 eof = 0;
1844
1845 if (self->telling) {
1846 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1847 * next input to be decoded is dec_buffer + input_chunk.
1848 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001849 PyObject *next_input = dec_buffer;
1850 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001851 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001852 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001853 goto fail;
1854 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001855 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1856 if (snapshot == NULL) {
1857 dec_flags = NULL;
1858 goto fail;
1859 }
1860 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 }
1862 Py_DECREF(input_chunk);
1863
1864 return (eof == 0);
1865
1866 fail:
1867 Py_XDECREF(dec_buffer);
1868 Py_XDECREF(dec_flags);
1869 Py_XDECREF(input_chunk);
1870 return -1;
1871}
1872
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001873/*[clinic input]
1874_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001875 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001876 /
1877[clinic start generated code]*/
1878
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001879static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001880_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001881/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 PyObject *result = NULL, *chunks = NULL;
1884
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001885 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886 CHECK_CLOSED(self);
1887
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001888 if (self->decoder == NULL)
1889 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001890
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001891 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001892 return NULL;
1893
1894 if (n < 0) {
1895 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001896 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897 PyObject *decoded;
1898 if (bytes == NULL)
1899 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001900
1901 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1902 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1903 bytes, 1);
1904 else
1905 decoded = PyObject_CallMethodObjArgs(
1906 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001908 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001909 goto fail;
1910
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001911 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912
1913 if (result == NULL) {
1914 Py_DECREF(decoded);
1915 return NULL;
1916 }
1917
1918 PyUnicode_AppendAndDel(&result, decoded);
1919 if (result == NULL)
1920 goto fail;
1921
Zackery Spytz23db9352018-06-29 04:14:58 -06001922 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923 Py_CLEAR(self->snapshot);
1924 return result;
1925 }
1926 else {
1927 int res = 1;
1928 Py_ssize_t remaining = n;
1929
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001930 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931 if (result == NULL)
1932 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001933 if (PyUnicode_READY(result) == -1)
1934 goto fail;
1935 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936
1937 /* Keep reading chunks until we have n characters to return */
1938 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001939 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001940 if (res < 0) {
1941 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1942 when EINTR occurs so we needn't do it ourselves. */
1943 if (_PyIO_trap_eintr()) {
1944 continue;
1945 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001947 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948 if (res == 0) /* EOF */
1949 break;
1950 if (chunks == NULL) {
1951 chunks = PyList_New(0);
1952 if (chunks == NULL)
1953 goto fail;
1954 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001955 if (PyUnicode_GET_LENGTH(result) > 0 &&
1956 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957 goto fail;
1958 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001959 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960 if (result == NULL)
1961 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001962 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 }
1964 if (chunks != NULL) {
1965 if (result != NULL && PyList_Append(chunks, result) < 0)
1966 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001967 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968 if (result == NULL)
1969 goto fail;
1970 Py_CLEAR(chunks);
1971 }
1972 return result;
1973 }
1974 fail:
1975 Py_XDECREF(result);
1976 Py_XDECREF(chunks);
1977 return NULL;
1978}
1979
1980
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001981/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 that is to the NUL character. Otherwise the function will produce
1983 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001984static const char *
1985find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001987 if (kind == PyUnicode_1BYTE_KIND) {
1988 assert(ch < 256);
1989 return (char *) memchr((void *) s, (char) ch, end - s);
1990 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001991 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001992 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001993 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001994 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 return s;
1996 if (s == end)
1997 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001998 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 }
2000}
2001
2002Py_ssize_t
2003_PyIO_find_line_ending(
2004 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002005 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002006{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002007 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008
2009 if (translated) {
2010 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002011 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002012 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002013 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 else {
2015 *consumed = len;
2016 return -1;
2017 }
2018 }
2019 else if (universal) {
2020 /* Universal newline search. Find any of \r, \r\n, \n
2021 * The decoder ensures that \r\n are not split in two pieces
2022 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002023 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002025 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002027 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002028 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002029 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 if (s >= end) {
2031 *consumed = len;
2032 return -1;
2033 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002034 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002035 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002037 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002039 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002040 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002042 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 }
2044 }
2045 }
2046 else {
2047 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002048 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02002049 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002050 /* Assume that readnl is an ASCII character. */
2051 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002053 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002055 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 *consumed = len;
2057 return -1;
2058 }
2059 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002060 const char *s = start;
2061 const char *e = end - (readnl_len - 1)*kind;
2062 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (e < s)
2064 e = s;
2065 while (s < e) {
2066 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002067 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 if (pos == NULL || pos >= e)
2069 break;
2070 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002071 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 break;
2073 }
2074 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002075 return (pos - start)/kind + readnl_len;
2076 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002078 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 if (pos == NULL)
2080 *consumed = len;
2081 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002082 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 return -1;
2084 }
2085 }
2086}
2087
2088static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002089_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090{
2091 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2092 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2093 int res;
2094
2095 CHECK_CLOSED(self);
2096
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002097 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 return NULL;
2099
2100 chunked = 0;
2101
2102 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002103 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002104 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002105 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 Py_ssize_t consumed = 0;
2107
2108 /* First, get some data if necessary */
2109 res = 1;
2110 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002111 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002112 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002113 if (res < 0) {
2114 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2115 when EINTR occurs so we needn't do it ourselves. */
2116 if (_PyIO_trap_eintr()) {
2117 continue;
2118 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002120 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 if (res == 0)
2122 break;
2123 }
2124 if (res == 0) {
2125 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002126 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002127 Py_CLEAR(self->snapshot);
2128 start = endpos = offset_to_buffer = 0;
2129 break;
2130 }
2131
2132 if (remaining == NULL) {
2133 line = self->decoded_chars;
2134 start = self->decoded_chars_used;
2135 offset_to_buffer = 0;
2136 Py_INCREF(line);
2137 }
2138 else {
2139 assert(self->decoded_chars_used == 0);
2140 line = PyUnicode_Concat(remaining, self->decoded_chars);
2141 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002142 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 Py_CLEAR(remaining);
2144 if (line == NULL)
2145 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002146 if (PyUnicode_READY(line) == -1)
2147 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148 }
2149
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002150 ptr = PyUnicode_DATA(line);
2151 line_len = PyUnicode_GET_LENGTH(line);
2152 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002153
2154 endpos = _PyIO_find_line_ending(
2155 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002156 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002157 ptr + kind * start,
2158 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002159 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160 if (endpos >= 0) {
2161 endpos += start;
2162 if (limit >= 0 && (endpos - start) + chunked >= limit)
2163 endpos = start + limit - chunked;
2164 break;
2165 }
2166
2167 /* We can put aside up to `endpos` */
2168 endpos = consumed + start;
2169 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2170 /* Didn't find line ending, but reached length limit */
2171 endpos = start + limit - chunked;
2172 break;
2173 }
2174
2175 if (endpos > start) {
2176 /* No line ending seen yet - put aside current data */
2177 PyObject *s;
2178 if (chunks == NULL) {
2179 chunks = PyList_New(0);
2180 if (chunks == NULL)
2181 goto error;
2182 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002183 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002184 if (s == NULL)
2185 goto error;
2186 if (PyList_Append(chunks, s) < 0) {
2187 Py_DECREF(s);
2188 goto error;
2189 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002190 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 Py_DECREF(s);
2192 }
2193 /* There may be some remaining bytes we'll have to prepend to the
2194 next chunk of data */
2195 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002196 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197 if (remaining == NULL)
2198 goto error;
2199 }
2200 Py_CLEAR(line);
2201 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002202 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002203 }
2204
2205 if (line != NULL) {
2206 /* Our line ends in the current buffer */
2207 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002208 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2209 PyObject *s = PyUnicode_Substring(line, start, endpos);
2210 Py_CLEAR(line);
2211 if (s == NULL)
2212 goto error;
2213 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214 }
2215 }
2216 if (remaining != NULL) {
2217 if (chunks == NULL) {
2218 chunks = PyList_New(0);
2219 if (chunks == NULL)
2220 goto error;
2221 }
2222 if (PyList_Append(chunks, remaining) < 0)
2223 goto error;
2224 Py_CLEAR(remaining);
2225 }
2226 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002227 if (line != NULL) {
2228 if (PyList_Append(chunks, line) < 0)
2229 goto error;
2230 Py_DECREF(line);
2231 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2233 if (line == NULL)
2234 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002235 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002237 if (line == NULL) {
2238 Py_INCREF(_PyIO_empty_str);
2239 line = _PyIO_empty_str;
2240 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002241
2242 return line;
2243
2244 error:
2245 Py_XDECREF(chunks);
2246 Py_XDECREF(remaining);
2247 Py_XDECREF(line);
2248 return NULL;
2249}
2250
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002251/*[clinic input]
2252_io.TextIOWrapper.readline
2253 size: Py_ssize_t = -1
2254 /
2255[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002257static PyObject *
2258_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2259/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2260{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002261 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002262 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263}
2264
2265/* Seek and Tell */
2266
2267typedef struct {
2268 Py_off_t start_pos;
2269 int dec_flags;
2270 int bytes_to_feed;
2271 int chars_to_skip;
2272 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002273} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274
2275/*
2276 To speed up cookie packing/unpacking, we store the fields in a temporary
2277 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2278 The following macros define at which offsets in the intermediary byte
2279 string the various CookieStruct fields will be stored.
2280 */
2281
2282#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2283
Christian Heimes743e0cd2012-10-17 23:52:17 +02002284#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285/* We want the least significant byte of start_pos to also be the least
2286 significant byte of the cookie, which means that in big-endian mode we
2287 must copy the fields in reverse order. */
2288
2289# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2290# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2291# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2292# define OFF_CHARS_TO_SKIP (sizeof(char))
2293# define OFF_NEED_EOF 0
2294
2295#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296/* Little-endian mode: the least significant byte of start_pos will
2297 naturally end up the least significant byte of the cookie. */
2298
2299# define OFF_START_POS 0
2300# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2301# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2302# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2303# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2304
2305#endif
2306
2307static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002308textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309{
2310 unsigned char buffer[COOKIE_BUF_LEN];
2311 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2312 if (cookieLong == NULL)
2313 return -1;
2314
2315 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002316 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002317 Py_DECREF(cookieLong);
2318 return -1;
2319 }
2320 Py_DECREF(cookieLong);
2321
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002322 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2323 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2324 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2325 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2326 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002327
2328 return 0;
2329}
2330
2331static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002332textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333{
2334 unsigned char buffer[COOKIE_BUF_LEN];
2335
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002336 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2337 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2338 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2339 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2340 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341
Christian Heimes743e0cd2012-10-17 23:52:17 +02002342 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2343 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345
2346static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002347_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348{
2349 PyObject *res;
2350 /* When seeking to the start of the stream, we call decoder.reset()
2351 rather than decoder.getstate().
2352 This is for a few decoders such as utf-16 for which the state value
2353 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2354 utf-16, that we are expecting a BOM).
2355 */
2356 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2357 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2358 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002359 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2360 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002361 if (res == NULL)
2362 return -1;
2363 Py_DECREF(res);
2364 return 0;
2365}
2366
Antoine Pitroue4501852009-05-14 18:55:55 +00002367static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002368_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002369{
2370 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002371 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002372 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2373 self->encoding_start_of_stream = 1;
2374 }
2375 else {
2376 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002377 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002378 self->encoding_start_of_stream = 0;
2379 }
2380 if (res == NULL)
2381 return -1;
2382 Py_DECREF(res);
2383 return 0;
2384}
2385
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002386static int
2387_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2388{
2389 /* Same as _textiowrapper_decoder_setstate() above. */
2390 return _textiowrapper_encoder_reset(
2391 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2392}
2393
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002394/*[clinic input]
2395_io.TextIOWrapper.seek
2396 cookie as cookieObj: object
2397 whence: int = 0
2398 /
2399[clinic start generated code]*/
2400
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002402_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2403/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002405 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002406 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407 PyObject *res;
2408 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002409 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002411 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412 CHECK_CLOSED(self);
2413
2414 Py_INCREF(cookieObj);
2415
2416 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002417 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 goto fail;
2419 }
2420
ngie-eign848037c2019-03-02 23:28:26 -08002421 switch (whence) {
2422 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002423 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002424 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425 if (cmp < 0)
2426 goto fail;
2427
2428 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002429 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430 goto fail;
2431 }
2432
2433 /* Seeking to the current position should attempt to
2434 * sync the underlying buffer with the current position.
2435 */
2436 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002437 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438 if (cookieObj == NULL)
2439 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002440 break;
2441
ngie-eign848037c2019-03-02 23:28:26 -08002442 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002444 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445 if (cmp < 0)
2446 goto fail;
2447
2448 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002449 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450 goto fail;
2451 }
2452
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002453 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 if (res == NULL)
2455 goto fail;
2456 Py_DECREF(res);
2457
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002458 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459 Py_CLEAR(self->snapshot);
2460 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002461 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462 if (res == NULL)
2463 goto fail;
2464 Py_DECREF(res);
2465 }
2466
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002467 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002468 Py_CLEAR(cookieObj);
2469 if (res == NULL)
2470 goto fail;
2471 if (self->encoder) {
2472 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002473 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002474 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2475 Py_DECREF(res);
2476 goto fail;
2477 }
2478 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002480
ngie-eign848037c2019-03-02 23:28:26 -08002481 case SEEK_SET:
2482 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002483
ngie-eign848037c2019-03-02 23:28:26 -08002484 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002486 "invalid whence (%d, should be %d, %d or %d)", whence,
2487 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002488 goto fail;
2489 }
2490
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002491 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492 if (cmp < 0)
2493 goto fail;
2494
2495 if (cmp == 1) {
2496 PyErr_Format(PyExc_ValueError,
2497 "negative seek position %R", cookieObj);
2498 goto fail;
2499 }
2500
2501 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2502 if (res == NULL)
2503 goto fail;
2504 Py_DECREF(res);
2505
2506 /* The strategy of seek() is to go back to the safe start point
2507 * and replay the effect of read(chars_to_skip) from there.
2508 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002509 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510 goto fail;
2511
2512 /* Seek back to the safe start point. */
2513 posobj = PyLong_FromOff_t(cookie.start_pos);
2514 if (posobj == NULL)
2515 goto fail;
2516 res = PyObject_CallMethodObjArgs(self->buffer,
2517 _PyIO_str_seek, posobj, NULL);
2518 Py_DECREF(posobj);
2519 if (res == NULL)
2520 goto fail;
2521 Py_DECREF(res);
2522
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002523 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524 Py_CLEAR(self->snapshot);
2525
2526 /* Restore the decoder to its state from the safe start point. */
2527 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002528 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529 goto fail;
2530 }
2531
2532 if (cookie.chars_to_skip) {
2533 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002534 PyObject *input_chunk = _PyObject_CallMethodId(
2535 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 PyObject *decoded;
2537
2538 if (input_chunk == NULL)
2539 goto fail;
2540
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002541 if (!PyBytes_Check(input_chunk)) {
2542 PyErr_Format(PyExc_TypeError,
2543 "underlying read() should have returned a bytes "
2544 "object, not '%.200s'",
2545 Py_TYPE(input_chunk)->tp_name);
2546 Py_DECREF(input_chunk);
2547 goto fail;
2548 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002550 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2551 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552 goto fail;
2553 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002554 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002556 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2557 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002559 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560 goto fail;
2561
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563
2564 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002565 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002566 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 goto fail;
2568 }
2569 self->decoded_chars_used = cookie.chars_to_skip;
2570 }
2571 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002572 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2573 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002575 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576 }
2577
Antoine Pitroue4501852009-05-14 18:55:55 +00002578 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2579 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002580 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002581 goto fail;
2582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583 return cookieObj;
2584 fail:
2585 Py_XDECREF(cookieObj);
2586 return NULL;
2587
2588}
2589
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002590/*[clinic input]
2591_io.TextIOWrapper.tell
2592[clinic start generated code]*/
2593
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002595_io_TextIOWrapper_tell_impl(textio *self)
2596/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597{
2598 PyObject *res;
2599 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002600 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601 PyObject *next_input;
2602 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002603 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002604 PyObject *saved_state = NULL;
2605 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002606 Py_ssize_t dec_buffer_len;
2607 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002608
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002609 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610 CHECK_CLOSED(self);
2611
2612 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002613 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 goto fail;
2615 }
2616 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002617 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618 "telling position disabled by next() call");
2619 goto fail;
2620 }
2621
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002622 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002623 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002624 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625 if (res == NULL)
2626 goto fail;
2627 Py_DECREF(res);
2628
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002629 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002630 if (posobj == NULL)
2631 goto fail;
2632
2633 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002634 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635 return posobj;
2636 }
2637
2638#if defined(HAVE_LARGEFILE_SUPPORT)
2639 cookie.start_pos = PyLong_AsLongLong(posobj);
2640#else
2641 cookie.start_pos = PyLong_AsLong(posobj);
2642#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002643 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 if (PyErr_Occurred())
2645 goto fail;
2646
2647 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002648 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002649 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002650 goto fail;
2651
2652 assert (PyBytes_Check(next_input));
2653
2654 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2655
2656 /* How many decoded characters have been used up since the snapshot? */
2657 if (self->decoded_chars_used == 0) {
2658 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002659 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660 }
2661
2662 chars_to_skip = self->decoded_chars_used;
2663
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002664 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2666 _PyIO_str_getstate, NULL);
2667 if (saved_state == NULL)
2668 goto fail;
2669
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002670#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002671 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002672 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2673 _PyIO_str_getstate, NULL); \
2674 if (_state == NULL) \
2675 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002676 if (!PyTuple_Check(_state)) { \
2677 PyErr_SetString(PyExc_TypeError, \
2678 "illegal decoder state"); \
2679 Py_DECREF(_state); \
2680 goto fail; \
2681 } \
2682 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2683 &dec_buffer, &dec_flags)) \
2684 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002685 Py_DECREF(_state); \
2686 goto fail; \
2687 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002688 if (!PyBytes_Check(dec_buffer)) { \
2689 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002690 "illegal decoder state: the first item should be a " \
2691 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002692 Py_TYPE(dec_buffer)->tp_name); \
2693 Py_DECREF(_state); \
2694 goto fail; \
2695 } \
2696 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002697 Py_DECREF(_state); \
2698 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002700#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002701 PyObject *_decoded = _PyObject_CallMethodId( \
2702 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002703 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002704 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002705 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002706 Py_DECREF(_decoded); \
2707 } while (0)
2708
2709 /* Fast search for an acceptable start point, close to our
2710 current pos */
2711 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2712 skip_back = 1;
2713 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2714 input = PyBytes_AS_STRING(next_input);
2715 while (skip_bytes > 0) {
2716 /* Decode up to temptative start point */
2717 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2718 goto fail;
2719 DECODER_DECODE(input, skip_bytes, chars_decoded);
2720 if (chars_decoded <= chars_to_skip) {
2721 DECODER_GETSTATE();
2722 if (dec_buffer_len == 0) {
2723 /* Before pos and no bytes buffered in decoder => OK */
2724 cookie.dec_flags = dec_flags;
2725 chars_to_skip -= chars_decoded;
2726 break;
2727 }
2728 /* Skip back by buffered amount and reset heuristic */
2729 skip_bytes -= dec_buffer_len;
2730 skip_back = 1;
2731 }
2732 else {
2733 /* We're too far ahead, skip back a bit */
2734 skip_bytes -= skip_back;
2735 skip_back *= 2;
2736 }
2737 }
2738 if (skip_bytes <= 0) {
2739 skip_bytes = 0;
2740 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2741 goto fail;
2742 }
2743
2744 /* Note our initial start point. */
2745 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002746 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002747 if (chars_to_skip == 0)
2748 goto finally;
2749
2750 /* We should be close to the desired position. Now feed the decoder one
2751 * byte at a time until we reach the `chars_to_skip` target.
2752 * As we go, note the nearest "safe start point" before the current
2753 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002754 * can safely start from there and advance to this location).
2755 */
2756 chars_decoded = 0;
2757 input = PyBytes_AS_STRING(next_input);
2758 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002759 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002761 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002763 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002764 /* We got n chars for 1 byte */
2765 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002766 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002767 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768
2769 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2770 /* Decoder buffer is empty, so this is a safe start point. */
2771 cookie.start_pos += cookie.bytes_to_feed;
2772 chars_to_skip -= chars_decoded;
2773 cookie.dec_flags = dec_flags;
2774 cookie.bytes_to_feed = 0;
2775 chars_decoded = 0;
2776 }
2777 if (chars_decoded >= chars_to_skip)
2778 break;
2779 input++;
2780 }
2781 if (input == input_end) {
2782 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002783 PyObject *decoded = _PyObject_CallMethodId(
2784 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002785 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002786 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002787 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002788 Py_DECREF(decoded);
2789 cookie.need_eof = 1;
2790
2791 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002792 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002793 "can't reconstruct logical file position");
2794 goto fail;
2795 }
2796 }
2797
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002798finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002799 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002800 Py_DECREF(saved_state);
2801 if (res == NULL)
2802 return NULL;
2803 Py_DECREF(res);
2804
2805 /* The returned cookie corresponds to the last safe start point. */
2806 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002807 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002808
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002809fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002810 if (saved_state) {
2811 PyObject *type, *value, *traceback;
2812 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002813 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002814 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002815 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002816 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002817 }
2818 return NULL;
2819}
2820
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002821/*[clinic input]
2822_io.TextIOWrapper.truncate
2823 pos: object = None
2824 /
2825[clinic start generated code]*/
2826
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002827static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002828_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2829/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002830{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002831 PyObject *res;
2832
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002833 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002834
2835 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2836 if (res == NULL)
2837 return NULL;
2838 Py_DECREF(res);
2839
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002840 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002841}
2842
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002843static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002844textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002845{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002846 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002847 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002848
2849 CHECK_INITIALIZED(self);
2850
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002851 res = PyUnicode_FromString("<_io.TextIOWrapper");
2852 if (res == NULL)
2853 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002854
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002855 status = Py_ReprEnter((PyObject *)self);
2856 if (status != 0) {
2857 if (status > 0) {
2858 PyErr_Format(PyExc_RuntimeError,
2859 "reentrant call inside %s.__repr__",
2860 Py_TYPE(self)->tp_name);
2861 }
2862 goto error;
2863 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002864 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002865 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002866 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002867 PyErr_Clear();
2868 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002869 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002870 }
2871 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002872 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002873 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002874 if (s == NULL)
2875 goto error;
2876 PyUnicode_AppendAndDel(&res, s);
2877 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002878 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002879 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002880 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002881 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002882 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002883 PyErr_Clear();
2884 else
2885 goto error;
2886 }
2887 else {
2888 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2889 Py_DECREF(modeobj);
2890 if (s == NULL)
2891 goto error;
2892 PyUnicode_AppendAndDel(&res, s);
2893 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002894 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002895 }
2896 s = PyUnicode_FromFormat("%U encoding=%R>",
2897 res, self->encoding);
2898 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002899 if (status == 0) {
2900 Py_ReprLeave((PyObject *)self);
2901 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002902 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002903
2904 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002905 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002906 if (status == 0) {
2907 Py_ReprLeave((PyObject *)self);
2908 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002909 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002910}
2911
2912
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002913/* Inquiries */
2914
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002915/*[clinic input]
2916_io.TextIOWrapper.fileno
2917[clinic start generated code]*/
2918
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002919static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002920_io_TextIOWrapper_fileno_impl(textio *self)
2921/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002922{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002923 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002924 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002925}
2926
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002927/*[clinic input]
2928_io.TextIOWrapper.seekable
2929[clinic start generated code]*/
2930
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002931static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002932_io_TextIOWrapper_seekable_impl(textio *self)
2933/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002934{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002935 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002936 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002937}
2938
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002939/*[clinic input]
2940_io.TextIOWrapper.readable
2941[clinic start generated code]*/
2942
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002943static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002944_io_TextIOWrapper_readable_impl(textio *self)
2945/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002946{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002947 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002948 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002949}
2950
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002951/*[clinic input]
2952_io.TextIOWrapper.writable
2953[clinic start generated code]*/
2954
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002955static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002956_io_TextIOWrapper_writable_impl(textio *self)
2957/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002958{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002959 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002960 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002961}
2962
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002963/*[clinic input]
2964_io.TextIOWrapper.isatty
2965[clinic start generated code]*/
2966
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002967static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002968_io_TextIOWrapper_isatty_impl(textio *self)
2969/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002970{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002971 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002972 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973}
2974
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002975/*[clinic input]
2976_io.TextIOWrapper.flush
2977[clinic start generated code]*/
2978
Antoine Pitrou243757e2010-11-05 21:15:39 +00002979static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002980_io_TextIOWrapper_flush_impl(textio *self)
2981/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002982{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002983 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002984 CHECK_CLOSED(self);
2985 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002986 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002987 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002988 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002989}
2990
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002991/*[clinic input]
2992_io.TextIOWrapper.close
2993[clinic start generated code]*/
2994
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002995static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002996_io_TextIOWrapper_close_impl(textio *self)
2997/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002998{
2999 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003000 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003001 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003002
Antoine Pitrou6be88762010-05-03 16:48:20 +00003003 res = textiowrapper_closed_get(self, NULL);
3004 if (res == NULL)
3005 return NULL;
3006 r = PyObject_IsTrue(res);
3007 Py_DECREF(res);
3008 if (r < 0)
3009 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003010
Antoine Pitrou6be88762010-05-03 16:48:20 +00003011 if (r > 0) {
3012 Py_RETURN_NONE; /* stream already closed */
3013 }
3014 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003015 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003016 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01003017 res = _PyObject_CallMethodIdObjArgs(self->buffer,
3018 &PyId__dealloc_warn,
3019 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00003020 if (res)
3021 Py_DECREF(res);
3022 else
3023 PyErr_Clear();
3024 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003025 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06003026 if (res == NULL)
3027 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003028 else
3029 Py_DECREF(res);
3030
Benjamin Peterson68623612012-12-20 11:53:11 -06003031 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3032 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003033 _PyErr_ChainExceptions(exc, val, tb);
3034 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003035 }
3036 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003037 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003038}
3039
3040static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003041textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003042{
3043 PyObject *line;
3044
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003045 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003046
3047 self->telling = 0;
3048 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3049 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003050 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003051 }
3052 else {
3053 line = PyObject_CallMethodObjArgs((PyObject *)self,
3054 _PyIO_str_readline, NULL);
3055 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003056 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003057 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003058 "not '%.200s'", Py_TYPE(line)->tp_name);
3059 Py_DECREF(line);
3060 return NULL;
3061 }
3062 }
3063
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003064 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003065 return NULL;
3066
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003067 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003068 /* Reached EOF or would have blocked */
3069 Py_DECREF(line);
3070 Py_CLEAR(self->snapshot);
3071 self->telling = self->seekable;
3072 return NULL;
3073 }
3074
3075 return line;
3076}
3077
3078static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003079textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003080{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003081 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003082 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003083}
3084
3085static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003086textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003087{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003088 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003089 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3090}
3091
3092static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003093textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003094{
3095 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003096 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003097 if (self->decoder == NULL ||
3098 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3099 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003100 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003101 }
3102 return res;
3103}
3104
3105static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003106textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003107{
3108 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003109 Py_INCREF(self->errors);
3110 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003111}
3112
3113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003114textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003115{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003116 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003117 return PyLong_FromSsize_t(self->chunk_size);
3118}
3119
3120static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003121textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003122{
3123 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003124 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003125 if (arg == NULL) {
3126 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3127 return -1;
3128 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003129 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003130 if (n == -1 && PyErr_Occurred())
3131 return -1;
3132 if (n <= 0) {
3133 PyErr_SetString(PyExc_ValueError,
3134 "a strictly positive integer is required");
3135 return -1;
3136 }
3137 self->chunk_size = n;
3138 return 0;
3139}
3140
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003141#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003142
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003143static PyMethodDef incrementalnewlinedecoder_methods[] = {
3144 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3145 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3146 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3147 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3148 {NULL}
3149};
3150
3151static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3152 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3153 {NULL}
3154};
3155
3156PyTypeObject PyIncrementalNewlineDecoder_Type = {
3157 PyVarObject_HEAD_INIT(NULL, 0)
3158 "_io.IncrementalNewlineDecoder", /*tp_name*/
3159 sizeof(nldecoder_object), /*tp_basicsize*/
3160 0, /*tp_itemsize*/
3161 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3162 0, /*tp_print*/
3163 0, /*tp_getattr*/
3164 0, /*tp_setattr*/
3165 0, /*tp_compare */
3166 0, /*tp_repr*/
3167 0, /*tp_as_number*/
3168 0, /*tp_as_sequence*/
3169 0, /*tp_as_mapping*/
3170 0, /*tp_hash */
3171 0, /*tp_call*/
3172 0, /*tp_str*/
3173 0, /*tp_getattro*/
3174 0, /*tp_setattro*/
3175 0, /*tp_as_buffer*/
3176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3177 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3178 0, /* tp_traverse */
3179 0, /* tp_clear */
3180 0, /* tp_richcompare */
3181 0, /*tp_weaklistoffset*/
3182 0, /* tp_iter */
3183 0, /* tp_iternext */
3184 incrementalnewlinedecoder_methods, /* tp_methods */
3185 0, /* tp_members */
3186 incrementalnewlinedecoder_getset, /* tp_getset */
3187 0, /* tp_base */
3188 0, /* tp_dict */
3189 0, /* tp_descr_get */
3190 0, /* tp_descr_set */
3191 0, /* tp_dictoffset */
3192 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3193 0, /* tp_alloc */
3194 PyType_GenericNew, /* tp_new */
3195};
3196
3197
3198static PyMethodDef textiowrapper_methods[] = {
3199 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003200 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003201 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3202 _IO_TEXTIOWRAPPER_READ_METHODDEF
3203 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3204 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3205 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3206
3207 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3208 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3209 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3210 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3211 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003212
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003213 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3214 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3215 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003216 {NULL, NULL}
3217};
3218
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003219static PyMemberDef textiowrapper_members[] = {
3220 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3221 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3222 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003223 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003224 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003225 {NULL}
3226};
3227
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003228static PyGetSetDef textiowrapper_getset[] = {
3229 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3230 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003231/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3232*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003233 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3234 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3235 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3236 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003237 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003238};
3239
3240PyTypeObject PyTextIOWrapper_Type = {
3241 PyVarObject_HEAD_INIT(NULL, 0)
3242 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003243 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003244 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003245 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003246 0, /*tp_print*/
3247 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003248 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003249 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003250 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003251 0, /*tp_as_number*/
3252 0, /*tp_as_sequence*/
3253 0, /*tp_as_mapping*/
3254 0, /*tp_hash */
3255 0, /*tp_call*/
3256 0, /*tp_str*/
3257 0, /*tp_getattro*/
3258 0, /*tp_setattro*/
3259 0, /*tp_as_buffer*/
3260 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003261 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003262 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003263 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3264 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003265 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003266 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003267 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003268 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3269 textiowrapper_methods, /* tp_methods */
3270 textiowrapper_members, /* tp_members */
3271 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003272 0, /* tp_base */
3273 0, /* tp_dict */
3274 0, /* tp_descr_get */
3275 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003276 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003277 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003278 0, /* tp_alloc */
3279 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003280 0, /* tp_free */
3281 0, /* tp_is_gc */
3282 0, /* tp_bases */
3283 0, /* tp_mro */
3284 0, /* tp_cache */
3285 0, /* tp_subclasses */
3286 0, /* tp_weaklist */
3287 0, /* tp_del */
3288 0, /* tp_version_tag */
3289 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003290};