blob: 3eb0dcc865ba2e7558f29f7f4fce3ec3d37b3b69 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012#include "structmember.h"
13#include "_iomodule.h"
14
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030015/*[clinic input]
16module _io
17class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020022_Py_IDENTIFIER(close);
23_Py_IDENTIFIER(_dealloc_warn);
24_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(fileno);
26_Py_IDENTIFIER(flush);
27_Py_IDENTIFIER(getpreferredencoding);
28_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020029_Py_IDENTIFIER(mode);
30_Py_IDENTIFIER(name);
31_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020032_Py_IDENTIFIER(read);
33_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053070textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_print*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_compare */
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000215};
216
217
218/* IncrementalNewlineDecoder */
219
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000226 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000227} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300229/*[clinic input]
230_io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235Codec used when reading a file in universal newlines mode.
236
237It wraps another incremental decoder, translating \r\n and \r into \n.
238It also records the types of newlines encountered. When used with
239translate=False, it ensures that the newline sequence is returned in
240one piece. When used with decoder=None, it expects unicode strings as
241decode input and translates newlines without first invoking an external
242decoder.
243[clinic start generated code]*/
244
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000245static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300246_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900255 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 self->errors = errors;
261 }
INADA Naoki507434f2017-12-21 09:59:53 +0900262 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263
Xiang Zhangb08746b2018-10-31 19:49:16 +0800264 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269}
270
271static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273{
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277}
278
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200279static int
280check_decoded(PyObject *decoded)
281{
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200295 return 0;
296}
297
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298#define SEEN_CR 1
299#define SEEN_LF 2
300#define SEEN_CRLF 4
301#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200304_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 PyObject *input, int final)
306{
307 PyObject *output;
308 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200309 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200327 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 return NULL;
329
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 if (modified == NULL)
340 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000355 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 }
389
Antoine Pitrou66913e22009-03-06 23:40:56 +0000390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200395 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000412 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 if (seennl == SEEN_ALL)
422 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200424 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 }
436 else
437 seennl |= SEEN_CR;
438 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000447 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200458 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200481 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482 continue;
483 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200486 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100490 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200492 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502}
503
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300504/*[clinic input]
505_io.IncrementalNewlineDecoder.decode
506 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200507 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300508[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300510static PyObject *
511_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200513/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300514{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516}
517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300518/*[clinic input]
519_io.IncrementalNewlineDecoder.getstate
520[clinic start generated code]*/
521
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300523_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700527 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557}
558
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300559/*[clinic input]
560_io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563[clinic start generated code]*/
564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300566_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569{
570 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700571 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
Oren Milman1d1d3e92017-08-20 18:35:36 +0300573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582
Victor Stinner7d7e7752014-06-17 23:31:25 +0200583 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 flag >>= 1;
585
586 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 else
590 Py_RETURN_NONE;
591}
592
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300593/*[clinic input]
594_io.IncrementalNewlineDecoder.reset
595[clinic start generated code]*/
596
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300598_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600{
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607}
608
609static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000610incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611{
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631}
632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633/* TextIOWrapper */
634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638typedef struct
639{
640 PyObject_HEAD
641 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000642 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200652 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200657 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200659 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900676 PyObject *pending_bytes; // data waiting to be written.
677 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000679
Oren Milman13614e32017-08-24 19:51:24 +0300680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000695} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
Zackery Spytz23db9352018-06-29 04:14:58 -0600697static void
698textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700/* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000704ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705{
INADA Naoki507434f2017-12-21 09:59:53 +0900706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707}
708
709static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000710utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000711{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100712 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900713 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100719 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900720 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721}
722
723static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000724utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725{
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200728#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000732#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000733 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900735 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900742 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100748 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900749 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000750}
751
752static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000753utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000754{
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200757#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000758 return utf32be_encode(self, text);
759#else
760 return utf32le_encode(self, text);
761#endif
762 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900764 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769{
INADA Naoki507434f2017-12-21 09:59:53 +0900770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771}
772
773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000774latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775{
INADA Naoki507434f2017-12-21 09:59:53 +0900776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777}
778
Inada Naokibfba8c32019-05-16 15:03:20 +0900779// Return true when encoding can be skipped when text is ascii.
780static inline int
781is_asciicompat_encoding(encodefunc_t f)
782{
783 return f == (encodefunc_t) ascii_encode
784 || f == (encodefunc_t) latin1_encode
785 || f == (encodefunc_t) utf8_encode;
786}
787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788/* Map normalized encoding names onto the specialized encoding funcs */
789
790typedef struct {
791 const char *name;
792 encodefunc_t encodefunc;
793} encodefuncentry;
794
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200795static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000796 {"ascii", (encodefunc_t) ascii_encode},
797 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000798 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"utf-16-be", (encodefunc_t) utf16be_encode},
800 {"utf-16-le", (encodefunc_t) utf16le_encode},
801 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000802 {"utf-32-be", (encodefunc_t) utf32be_encode},
803 {"utf-32-le", (encodefunc_t) utf32le_encode},
804 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 {NULL, NULL}
806};
807
INADA Naoki507434f2017-12-21 09:59:53 +0900808static int
809validate_newline(const char *newline)
810{
811 if (newline && newline[0] != '\0'
812 && !(newline[0] == '\n' && newline[1] == '\0')
813 && !(newline[0] == '\r' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815 PyErr_Format(PyExc_ValueError,
816 "illegal newline value: %s", newline);
817 return -1;
818 }
819 return 0;
820}
821
822static int
823set_newline(textio *self, const char *newline)
824{
825 PyObject *old = self->readnl;
826 if (newline == NULL) {
827 self->readnl = NULL;
828 }
829 else {
830 self->readnl = PyUnicode_FromString(newline);
831 if (self->readnl == NULL) {
832 self->readnl = old;
833 return -1;
834 }
835 }
836 self->readuniversal = (newline == NULL || newline[0] == '\0');
837 self->readtranslate = (newline == NULL);
838 self->writetranslate = (newline == NULL || newline[0] != '\0');
839 if (!self->readuniversal && self->readnl != NULL) {
840 // validate_newline() accepts only ASCII newlines.
841 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843 if (strcmp(self->writenl, "\n") == 0) {
844 self->writenl = NULL;
845 }
846 }
847 else {
848#ifdef MS_WINDOWS
849 self->writenl = "\r\n";
850#else
851 self->writenl = NULL;
852#endif
853 }
854 Py_XDECREF(old);
855 return 0;
856}
857
858static int
859_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860 const char *errors)
861{
862 PyObject *res;
863 int r;
864
865 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
866 if (res == NULL)
867 return -1;
868
869 r = PyObject_IsTrue(res);
870 Py_DECREF(res);
871 if (r == -1)
872 return -1;
873
874 if (r != 1)
875 return 0;
876
877 Py_CLEAR(self->decoder);
878 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879 if (self->decoder == NULL)
880 return -1;
881
882 if (self->readuniversal) {
883 PyObject *incrementalDecoder = PyObject_CallFunction(
884 (PyObject *)&PyIncrementalNewlineDecoder_Type,
885 "Oi", self->decoder, (int)self->readtranslate);
886 if (incrementalDecoder == NULL)
887 return -1;
888 Py_CLEAR(self->decoder);
889 self->decoder = incrementalDecoder;
890 }
891
892 return 0;
893}
894
895static PyObject*
896_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897{
898 PyObject *chars;
899
900 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902 else
903 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904 eof ? Py_True : Py_False, NULL);
905
906 if (check_decoded(chars) < 0)
907 // check_decoded already decreases refcount
908 return NULL;
909
910 return chars;
911}
912
913static int
914_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915 const char *errors)
916{
917 PyObject *res;
918 int r;
919
920 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
921 if (res == NULL)
922 return -1;
923
924 r = PyObject_IsTrue(res);
925 Py_DECREF(res);
926 if (r == -1)
927 return -1;
928
929 if (r != 1)
930 return 0;
931
932 Py_CLEAR(self->encoder);
933 self->encodefunc = NULL;
934 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935 if (self->encoder == NULL)
936 return -1;
937
938 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200939 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900941 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900943 const encodefuncentry *e = encodefuncs;
944 while (e->name != NULL) {
945 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946 self->encodefunc = e->encodefunc;
947 break;
948 }
949 e++;
950 }
951 }
952 Py_XDECREF(res);
953
954 return 0;
955}
956
957static int
958_textiowrapper_fix_encoder_state(textio *self)
959{
960 if (!self->seekable || !self->encoder) {
961 return 0;
962 }
963
964 self->encoding_start_of_stream = 1;
965
966 PyObject *cookieObj = PyObject_CallMethodObjArgs(
967 self->buffer, _PyIO_str_tell, NULL);
968 if (cookieObj == NULL) {
969 return -1;
970 }
971
972 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973 Py_DECREF(cookieObj);
974 if (cmp < 0) {
975 return -1;
976 }
977
978 if (cmp == 0) {
979 self->encoding_start_of_stream = 0;
980 PyObject *res = PyObject_CallMethodObjArgs(
981 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
982 if (res == NULL) {
983 return -1;
984 }
985 Py_DECREF(res);
986 }
987
988 return 0;
989}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300991/*[clinic input]
992_io.TextIOWrapper.__init__
993 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700994 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900995 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700996 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200997 line_buffering: bool(accept={int}) = False
998 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001000Character and line based layer over a BufferedIOBase object, buffer.
1001
1002encoding gives the name of the encoding that the stream will be
1003decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1004
1005errors determines the strictness of encoding and decoding (see
1006help(codecs.Codec) or the documentation for codecs.register) and
1007defaults to "strict".
1008
1009newline controls how line endings are handled. It can be None, '',
1010'\n', '\r', and '\r\n'. It works as follows:
1011
1012* On input, if newline is None, universal newlines mode is
1013 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1014 these are translated into '\n' before being returned to the
1015 caller. If it is '', universal newline mode is enabled, but line
1016 endings are returned to the caller untranslated. If it has any of
1017 the other legal values, input lines are only terminated by the given
1018 string, and the line ending is returned to the caller untranslated.
1019
1020* On output, if newline is None, any '\n' characters written are
1021 translated to the system default line separator, os.linesep. If
1022 newline is '' or '\n', no translation takes place. If newline is any
1023 of the other legal values, any '\n' characters written are translated
1024 to the given string.
1025
1026If line_buffering is True, a call to flush is implied when a call to
1027write contains a newline character.
1028[clinic start generated code]*/
1029
1030static int
1031_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001032 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001033 const char *newline, int line_buffering,
1034 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001035/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001036{
1037 PyObject *raw, *codec_info = NULL;
1038 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 PyObject *res;
1040 int r;
1041
1042 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001043 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001044
INADA Naoki507434f2017-12-21 09:59:53 +09001045 if (errors == Py_None) {
1046 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001047 if (errors == NULL) {
1048 return -1;
1049 }
INADA Naoki507434f2017-12-21 09:59:53 +09001050 }
1051 else if (!PyUnicode_Check(errors)) {
1052 // Check 'errors' argument here because Argument Clinic doesn't support
1053 // 'str(accept={str, NoneType})' converter.
1054 PyErr_Format(
1055 PyExc_TypeError,
1056 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1057 errors->ob_type->tp_name);
1058 return -1;
1059 }
1060
1061 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001062 return -1;
1063 }
1064
1065 Py_CLEAR(self->buffer);
1066 Py_CLEAR(self->encoding);
1067 Py_CLEAR(self->encoder);
1068 Py_CLEAR(self->decoder);
1069 Py_CLEAR(self->readnl);
1070 Py_CLEAR(self->decoded_chars);
1071 Py_CLEAR(self->pending_bytes);
1072 Py_CLEAR(self->snapshot);
1073 Py_CLEAR(self->errors);
1074 Py_CLEAR(self->raw);
1075 self->decoded_chars_used = 0;
1076 self->pending_bytes_count = 0;
1077 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001078 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079
1080 if (encoding == NULL) {
1081 /* Try os.device_encoding(fileno) */
1082 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001083 state = IO_STATE();
1084 if (state == NULL)
1085 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 /* Ignore only AttributeError and UnsupportedOperation */
1088 if (fileno == NULL) {
1089 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1090 PyErr_ExceptionMatches(state->unsupported_operation)) {
1091 PyErr_Clear();
1092 }
1093 else {
1094 goto error;
1095 }
1096 }
1097 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001098 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001099 Py_DECREF(fileno);
1100 if (fd == -1 && PyErr_Occurred()) {
1101 goto error;
1102 }
1103
1104 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001105 if (self->encoding == NULL)
1106 goto error;
1107 else if (!PyUnicode_Check(self->encoding))
1108 Py_CLEAR(self->encoding);
1109 }
1110 }
1111 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001112 PyObject *locale_module = _PyIO_get_locale_module(state);
1113 if (locale_module == NULL)
1114 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001115 self->encoding = _PyObject_CallMethodIdObjArgs(
1116 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001117 Py_DECREF(locale_module);
1118 if (self->encoding == NULL) {
1119 catch_ImportError:
1120 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001121 Importing locale can raise an ImportError because of
1122 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001123 ImportError if _locale is not available. These will happen
1124 during module building.
1125 */
1126 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1127 PyErr_Clear();
1128 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001130 else
1131 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001133 else if (!PyUnicode_Check(self->encoding))
1134 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001136 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001137 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001138 if (encoding == NULL)
1139 goto error;
1140 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 else if (encoding != NULL) {
1142 self->encoding = PyUnicode_FromString(encoding);
1143 if (self->encoding == NULL)
1144 goto error;
1145 }
1146 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001147 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001149 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 }
1151
Nick Coghlana9b15242014-02-04 22:11:18 +10001152 /* Check we have been asked for a real text encoding */
1153 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1154 if (codec_info == NULL) {
1155 Py_CLEAR(self->encoding);
1156 goto error;
1157 }
1158
1159 /* XXX: Failures beyond this point have the potential to leak elements
1160 * of the partially constructed object (like self->encoding)
1161 */
1162
INADA Naoki507434f2017-12-21 09:59:53 +09001163 Py_INCREF(errors);
1164 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001167 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001168 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 }
1171
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 self->buffer = buffer;
1173 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001174
INADA Naoki507434f2017-12-21 09:59:53 +09001175 /* Build the decoder object */
1176 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1177 goto error;
1178
1179 /* Build the encoder object */
1180 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1181 goto error;
1182
1183 /* Finished sorting out the codec details */
1184 Py_CLEAR(codec_info);
1185
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1187 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001188 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1189 {
1190 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1191 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001193 if (raw != NULL) {
1194 if (Py_TYPE(raw) == &PyFileIO_Type)
1195 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001196 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001197 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001198 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 }
1200
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001201 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 if (res == NULL)
1203 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001204 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001205 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001206 if (r < 0)
1207 goto error;
1208 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001210 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1211 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001212 goto error;
1213 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001214 Py_XDECREF(res);
1215 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001216
Antoine Pitroue4501852009-05-14 18:55:55 +00001217 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001218 if (_textiowrapper_fix_encoder_state(self) < 0) {
1219 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001220 }
1221
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 self->ok = 1;
1223 return 0;
1224
1225 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001226 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227 return -1;
1228}
1229
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001230/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1231 * -1 on error.
1232 */
1233static int
1234convert_optional_bool(PyObject *obj, int default_value)
1235{
1236 long v;
1237 if (obj == Py_None) {
1238 v = default_value;
1239 }
1240 else {
1241 v = PyLong_AsLong(obj);
1242 if (v == -1 && PyErr_Occurred())
1243 return -1;
1244 }
1245 return v != 0;
1246}
1247
INADA Naoki507434f2017-12-21 09:59:53 +09001248static int
1249textiowrapper_change_encoding(textio *self, PyObject *encoding,
1250 PyObject *errors, int newline_changed)
1251{
1252 /* Use existing settings where new settings are not specified */
1253 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1254 return 0; // no change
1255 }
1256
1257 if (encoding == Py_None) {
1258 encoding = self->encoding;
1259 if (errors == Py_None) {
1260 errors = self->errors;
1261 }
1262 }
1263 else if (errors == Py_None) {
1264 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001265 if (errors == NULL) {
1266 return -1;
1267 }
INADA Naoki507434f2017-12-21 09:59:53 +09001268 }
1269
1270 const char *c_errors = PyUnicode_AsUTF8(errors);
1271 if (c_errors == NULL) {
1272 return -1;
1273 }
1274
1275 // Create new encoder & decoder
1276 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1277 PyUnicode_AsUTF8(encoding), "codecs.open()");
1278 if (codec_info == NULL) {
1279 return -1;
1280 }
1281 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1282 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1283 Py_DECREF(codec_info);
1284 return -1;
1285 }
1286 Py_DECREF(codec_info);
1287
1288 Py_INCREF(encoding);
1289 Py_INCREF(errors);
1290 Py_SETREF(self->encoding, encoding);
1291 Py_SETREF(self->errors, errors);
1292
1293 return _textiowrapper_fix_encoder_state(self);
1294}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001295
1296/*[clinic input]
1297_io.TextIOWrapper.reconfigure
1298 *
INADA Naoki507434f2017-12-21 09:59:53 +09001299 encoding: object = None
1300 errors: object = None
1301 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001302 line_buffering as line_buffering_obj: object = None
1303 write_through as write_through_obj: object = None
1304
1305Reconfigure the text stream with new parameters.
1306
1307This also does an implicit stream flush.
1308
1309[clinic start generated code]*/
1310
1311static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001312_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1313 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001314 PyObject *line_buffering_obj,
1315 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001316/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001317{
1318 int line_buffering;
1319 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001320 const char *newline = NULL;
1321
1322 /* Check if something is in the read buffer */
1323 if (self->decoded_chars != NULL) {
1324 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001325 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001326 "of stream after the first read");
1327 return NULL;
1328 }
1329 }
1330
1331 if (newline_obj != NULL && newline_obj != Py_None) {
1332 newline = PyUnicode_AsUTF8(newline_obj);
1333 if (newline == NULL || validate_newline(newline) < 0) {
1334 return NULL;
1335 }
1336 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001337
1338 line_buffering = convert_optional_bool(line_buffering_obj,
1339 self->line_buffering);
1340 write_through = convert_optional_bool(write_through_obj,
1341 self->write_through);
1342 if (line_buffering < 0 || write_through < 0) {
1343 return NULL;
1344 }
INADA Naoki507434f2017-12-21 09:59:53 +09001345
1346 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001347 if (res == NULL) {
1348 return NULL;
1349 }
INADA Naoki507434f2017-12-21 09:59:53 +09001350 Py_DECREF(res);
1351 self->b2cratio = 0;
1352
1353 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1354 return NULL;
1355 }
1356
1357 if (textiowrapper_change_encoding(
1358 self, encoding, errors, newline_obj != NULL) < 0) {
1359 return NULL;
1360 }
1361
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001362 self->line_buffering = line_buffering;
1363 self->write_through = write_through;
1364 Py_RETURN_NONE;
1365}
1366
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001368textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self->ok = 0;
1371 Py_CLEAR(self->buffer);
1372 Py_CLEAR(self->encoding);
1373 Py_CLEAR(self->encoder);
1374 Py_CLEAR(self->decoder);
1375 Py_CLEAR(self->readnl);
1376 Py_CLEAR(self->decoded_chars);
1377 Py_CLEAR(self->pending_bytes);
1378 Py_CLEAR(self->snapshot);
1379 Py_CLEAR(self->errors);
1380 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001381
1382 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383 return 0;
1384}
1385
1386static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001387textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001388{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001389 self->finalizing = 1;
1390 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001392 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393 _PyObject_GC_UNTRACK(self);
1394 if (self->weakreflist != NULL)
1395 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001396 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397 Py_TYPE(self)->tp_free((PyObject *)self);
1398}
1399
1400static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001401textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402{
1403 Py_VISIT(self->buffer);
1404 Py_VISIT(self->encoding);
1405 Py_VISIT(self->encoder);
1406 Py_VISIT(self->decoder);
1407 Py_VISIT(self->readnl);
1408 Py_VISIT(self->decoded_chars);
1409 Py_VISIT(self->pending_bytes);
1410 Py_VISIT(self->snapshot);
1411 Py_VISIT(self->errors);
1412 Py_VISIT(self->raw);
1413
1414 Py_VISIT(self->dict);
1415 return 0;
1416}
1417
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001419textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001420
1421/* This macro takes some shortcuts to make the common case faster. */
1422#define CHECK_CLOSED(self) \
1423 do { \
1424 int r; \
1425 PyObject *_res; \
1426 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1427 if (self->raw != NULL) \
1428 r = _PyFileIO_closed(self->raw); \
1429 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001430 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001431 if (_res == NULL) \
1432 return NULL; \
1433 r = PyObject_IsTrue(_res); \
1434 Py_DECREF(_res); \
1435 if (r < 0) \
1436 return NULL; \
1437 } \
1438 if (r > 0) { \
1439 PyErr_SetString(PyExc_ValueError, \
1440 "I/O operation on closed file."); \
1441 return NULL; \
1442 } \
1443 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001444 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 return NULL; \
1446 } while (0)
1447
1448#define CHECK_INITIALIZED(self) \
1449 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001450 PyErr_SetString(PyExc_ValueError, \
1451 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452 return NULL; \
1453 }
1454
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001455#define CHECK_ATTACHED(self) \
1456 CHECK_INITIALIZED(self); \
1457 if (self->detached) { \
1458 PyErr_SetString(PyExc_ValueError, \
1459 "underlying buffer has been detached"); \
1460 return NULL; \
1461 }
1462
1463#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001464 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001465 PyErr_SetString(PyExc_ValueError, \
1466 "I/O operation on uninitialized object"); \
1467 return -1; \
1468 } else if (self->detached) { \
1469 PyErr_SetString(PyExc_ValueError, \
1470 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001471 return -1; \
1472 }
1473
1474
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001475/*[clinic input]
1476_io.TextIOWrapper.detach
1477[clinic start generated code]*/
1478
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001479static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001480_io_TextIOWrapper_detach_impl(textio *self)
1481/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001482{
1483 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001484 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001485 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1486 if (res == NULL)
1487 return NULL;
1488 Py_DECREF(res);
1489 buffer = self->buffer;
1490 self->buffer = NULL;
1491 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001492 return buffer;
1493}
1494
Antoine Pitrou24f36292009-03-28 22:16:42 +00001495/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496 underlying buffered object, though. */
1497static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001498_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001499{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 if (self->pending_bytes == NULL)
1501 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001502
Inada Naokibfba8c32019-05-16 15:03:20 +09001503 PyObject *pending = self->pending_bytes;
1504 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001505
Inada Naokibfba8c32019-05-16 15:03:20 +09001506 if (PyBytes_Check(pending)) {
1507 b = pending;
1508 Py_INCREF(b);
1509 }
1510 else if (PyUnicode_Check(pending)) {
1511 assert(PyUnicode_IS_ASCII(pending));
1512 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1513 b = PyBytes_FromStringAndSize(
1514 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1515 if (b == NULL) {
1516 return -1;
1517 }
1518 }
1519 else {
1520 assert(PyList_Check(pending));
1521 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1522 if (b == NULL) {
1523 return -1;
1524 }
1525
1526 char *buf = PyBytes_AsString(b);
1527 Py_ssize_t pos = 0;
1528
1529 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1530 PyObject *obj = PyList_GET_ITEM(pending, i);
1531 char *src;
1532 Py_ssize_t len;
1533 if (PyUnicode_Check(obj)) {
1534 assert(PyUnicode_IS_ASCII(obj));
1535 src = PyUnicode_DATA(obj);
1536 len = PyUnicode_GET_LENGTH(obj);
1537 }
1538 else {
1539 assert(PyBytes_Check(obj));
1540 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1541 Py_DECREF(b);
1542 return -1;
1543 }
1544 }
1545 memcpy(buf + pos, src, len);
1546 pos += len;
1547 }
1548 assert(pos == self->pending_bytes_count);
1549 }
1550
1551 self->pending_bytes_count = 0;
1552 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001553 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001554
1555 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001556 do {
1557 ret = PyObject_CallMethodObjArgs(self->buffer,
1558 _PyIO_str_write, b, NULL);
1559 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001560 Py_DECREF(b);
1561 if (ret == NULL)
1562 return -1;
1563 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 return 0;
1565}
1566
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001567/*[clinic input]
1568_io.TextIOWrapper.write
1569 text: unicode
1570 /
1571[clinic start generated code]*/
1572
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001573static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001574_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1575/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576{
1577 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001578 PyObject *b;
1579 Py_ssize_t textlen;
1580 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001581 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001583 if (PyUnicode_READY(text) == -1)
1584 return NULL;
1585
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001586 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 CHECK_CLOSED(self);
1588
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001589 if (self->encoder == NULL)
1590 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001591
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592 Py_INCREF(text);
1593
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001594 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595
1596 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001597 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598 haslf = 1;
1599
1600 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001601 PyObject *newtext = _PyObject_CallMethodId(
1602 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 Py_DECREF(text);
1604 if (newtext == NULL)
1605 return NULL;
1606 text = newtext;
1607 }
1608
Antoine Pitroue96ec682011-07-23 21:46:35 +02001609 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001610 text_needflush = 1;
1611 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001613 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 needflush = 1;
1615
1616 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001617 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001618 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1619 b = text;
1620 Py_INCREF(b);
1621 }
1622 else {
1623 b = (*self->encodefunc)((PyObject *) self, text);
1624 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001625 self->encoding_start_of_stream = 0;
1626 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 else
1628 b = PyObject_CallMethodObjArgs(self->encoder,
1629 _PyIO_str_encode, text, NULL);
Inada Naokibfba8c32019-05-16 15:03:20 +09001630
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 Py_DECREF(text);
1632 if (b == NULL)
1633 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001634 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001635 PyErr_Format(PyExc_TypeError,
1636 "encoder should return a bytes object, not '%.200s'",
1637 Py_TYPE(b)->tp_name);
1638 Py_DECREF(b);
1639 return NULL;
1640 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641
Inada Naokibfba8c32019-05-16 15:03:20 +09001642 Py_ssize_t bytes_len;
1643 if (b == text) {
1644 bytes_len = PyUnicode_GET_LENGTH(b);
1645 }
1646 else {
1647 bytes_len = PyBytes_GET_SIZE(b);
1648 }
1649
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001650 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001651 self->pending_bytes_count = 0;
1652 self->pending_bytes = b;
1653 }
1654 else if (!PyList_CheckExact(self->pending_bytes)) {
1655 PyObject *list = PyList_New(2);
1656 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 Py_DECREF(b);
1658 return NULL;
1659 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001660 PyList_SET_ITEM(list, 0, self->pending_bytes);
1661 PyList_SET_ITEM(list, 1, b);
1662 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001664 else {
1665 if (PyList_Append(self->pending_bytes, b) < 0) {
1666 Py_DECREF(b);
1667 return NULL;
1668 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001671
1672 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001673 if (self->pending_bytes_count > self->chunk_size || needflush ||
1674 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001675 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 return NULL;
1677 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001678
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 if (needflush) {
1680 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1681 if (ret == NULL)
1682 return NULL;
1683 Py_DECREF(ret);
1684 }
1685
Zackery Spytz23db9352018-06-29 04:14:58 -06001686 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 Py_CLEAR(self->snapshot);
1688
1689 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001690 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 if (ret == NULL)
1692 return NULL;
1693 Py_DECREF(ret);
1694 }
1695
1696 return PyLong_FromSsize_t(textlen);
1697}
1698
1699/* Steal a reference to chars and store it in the decoded_char buffer;
1700 */
1701static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001702textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001704 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 self->decoded_chars_used = 0;
1706}
1707
1708static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001709textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710{
1711 PyObject *chars;
1712 Py_ssize_t avail;
1713
1714 if (self->decoded_chars == NULL)
1715 return PyUnicode_FromStringAndSize(NULL, 0);
1716
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001717 /* decoded_chars is guaranteed to be "ready". */
1718 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 - self->decoded_chars_used);
1720
1721 assert(avail >= 0);
1722
1723 if (n < 0 || n > avail)
1724 n = avail;
1725
1726 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 chars = PyUnicode_Substring(self->decoded_chars,
1728 self->decoded_chars_used,
1729 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (chars == NULL)
1731 return NULL;
1732 }
1733 else {
1734 chars = self->decoded_chars;
1735 Py_INCREF(chars);
1736 }
1737
1738 self->decoded_chars_used += n;
1739 return chars;
1740}
1741
1742/* Read and decode the next chunk of data from the BufferedReader.
1743 */
1744static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001745textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746{
1747 PyObject *dec_buffer = NULL;
1748 PyObject *dec_flags = NULL;
1749 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001750 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001752 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 int eof;
1754
1755 /* The return value is True unless EOF was reached. The decoded string is
1756 * placed in self._decoded_chars (replacing its previous value). The
1757 * entire input chunk is sent to the decoder, though some of it may remain
1758 * buffered in the decoder, yet to be converted.
1759 */
1760
1761 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001762 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 return -1;
1764 }
1765
1766 if (self->telling) {
1767 /* To prepare for tell(), we need to snapshot a point in the file
1768 * where the decoder's input buffer is empty.
1769 */
1770
1771 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1772 _PyIO_str_getstate, NULL);
1773 if (state == NULL)
1774 return -1;
1775 /* Given this, we know there was a valid snapshot point
1776 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1777 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001778 if (!PyTuple_Check(state)) {
1779 PyErr_SetString(PyExc_TypeError,
1780 "illegal decoder state");
1781 Py_DECREF(state);
1782 return -1;
1783 }
1784 if (!PyArg_ParseTuple(state,
1785 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1786 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 Py_DECREF(state);
1788 return -1;
1789 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001790
1791 if (!PyBytes_Check(dec_buffer)) {
1792 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001793 "illegal decoder state: the first item should be a "
1794 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001795 Py_TYPE(dec_buffer)->tp_name);
1796 Py_DECREF(state);
1797 return -1;
1798 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 Py_INCREF(dec_buffer);
1800 Py_INCREF(dec_flags);
1801 Py_DECREF(state);
1802 }
1803
1804 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001805 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001806 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001807 }
1808 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 if (chunk_size == NULL)
1810 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001811
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001813 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1814 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 Py_DECREF(chunk_size);
1816 if (input_chunk == NULL)
1817 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001818
1819 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001820 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001821 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001822 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1823 Py_TYPE(input_chunk)->tp_name);
1824 goto fail;
1825 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826
Antoine Pitroub8503892014-04-29 10:14:02 +02001827 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001828 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829
INADA Naoki507434f2017-12-21 09:59:53 +09001830 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1831 PyBuffer_Release(&input_chunk_buf);
1832 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001833 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001834
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001835 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001836 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001837 if (nchars > 0)
1838 self->b2cratio = (double) nbytes / nchars;
1839 else
1840 self->b2cratio = 0.0;
1841 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 eof = 0;
1843
1844 if (self->telling) {
1845 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1846 * next input to be decoded is dec_buffer + input_chunk.
1847 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001848 PyObject *next_input = dec_buffer;
1849 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001850 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001851 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001852 goto fail;
1853 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001854 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1855 if (snapshot == NULL) {
1856 dec_flags = NULL;
1857 goto fail;
1858 }
1859 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001860 }
1861 Py_DECREF(input_chunk);
1862
1863 return (eof == 0);
1864
1865 fail:
1866 Py_XDECREF(dec_buffer);
1867 Py_XDECREF(dec_flags);
1868 Py_XDECREF(input_chunk);
1869 return -1;
1870}
1871
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001872/*[clinic input]
1873_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001874 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001875 /
1876[clinic start generated code]*/
1877
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001878static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001879_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001880/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 PyObject *result = NULL, *chunks = NULL;
1883
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001884 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885 CHECK_CLOSED(self);
1886
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001887 if (self->decoder == NULL)
1888 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001889
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001890 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001891 return NULL;
1892
1893 if (n < 0) {
1894 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001895 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001896 PyObject *decoded;
1897 if (bytes == NULL)
1898 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001899
1900 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1901 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1902 bytes, 1);
1903 else
1904 decoded = PyObject_CallMethodObjArgs(
1905 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001907 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 goto fail;
1909
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001910 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911
1912 if (result == NULL) {
1913 Py_DECREF(decoded);
1914 return NULL;
1915 }
1916
1917 PyUnicode_AppendAndDel(&result, decoded);
1918 if (result == NULL)
1919 goto fail;
1920
Zackery Spytz23db9352018-06-29 04:14:58 -06001921 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922 Py_CLEAR(self->snapshot);
1923 return result;
1924 }
1925 else {
1926 int res = 1;
1927 Py_ssize_t remaining = n;
1928
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001929 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930 if (result == NULL)
1931 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001932 if (PyUnicode_READY(result) == -1)
1933 goto fail;
1934 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935
1936 /* Keep reading chunks until we have n characters to return */
1937 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001938 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001939 if (res < 0) {
1940 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1941 when EINTR occurs so we needn't do it ourselves. */
1942 if (_PyIO_trap_eintr()) {
1943 continue;
1944 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001946 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 if (res == 0) /* EOF */
1948 break;
1949 if (chunks == NULL) {
1950 chunks = PyList_New(0);
1951 if (chunks == NULL)
1952 goto fail;
1953 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001954 if (PyUnicode_GET_LENGTH(result) > 0 &&
1955 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 goto fail;
1957 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001958 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001959 if (result == NULL)
1960 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001961 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 }
1963 if (chunks != NULL) {
1964 if (result != NULL && PyList_Append(chunks, result) < 0)
1965 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001966 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 if (result == NULL)
1968 goto fail;
1969 Py_CLEAR(chunks);
1970 }
1971 return result;
1972 }
1973 fail:
1974 Py_XDECREF(result);
1975 Py_XDECREF(chunks);
1976 return NULL;
1977}
1978
1979
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001980/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001981 that is to the NUL character. Otherwise the function will produce
1982 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001983static const char *
1984find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001985{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001986 if (kind == PyUnicode_1BYTE_KIND) {
1987 assert(ch < 256);
1988 return (char *) memchr((void *) s, (char) ch, end - s);
1989 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001991 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001992 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001993 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 return s;
1995 if (s == end)
1996 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001997 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 }
1999}
2000
2001Py_ssize_t
2002_PyIO_find_line_ending(
2003 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002004 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002006 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007
2008 if (translated) {
2009 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002010 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002012 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 else {
2014 *consumed = len;
2015 return -1;
2016 }
2017 }
2018 else if (universal) {
2019 /* Universal newline search. Find any of \r, \r\n, \n
2020 * The decoder ensures that \r\n are not split in two pieces
2021 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002022 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002024 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002026 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002027 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002028 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 if (s >= end) {
2030 *consumed = len;
2031 return -1;
2032 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002033 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002034 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002036 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002037 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002038 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002039 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002041 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 }
2043 }
2044 }
2045 else {
2046 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002047 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02002048 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002049 /* Assume that readnl is an ASCII character. */
2050 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002052 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002054 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 *consumed = len;
2056 return -1;
2057 }
2058 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002059 const char *s = start;
2060 const char *e = end - (readnl_len - 1)*kind;
2061 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 if (e < s)
2063 e = s;
2064 while (s < e) {
2065 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002066 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 if (pos == NULL || pos >= e)
2068 break;
2069 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002070 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 break;
2072 }
2073 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002074 return (pos - start)/kind + readnl_len;
2075 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002077 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 if (pos == NULL)
2079 *consumed = len;
2080 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002081 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 return -1;
2083 }
2084 }
2085}
2086
2087static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002088_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089{
2090 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2091 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2092 int res;
2093
2094 CHECK_CLOSED(self);
2095
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002096 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002097 return NULL;
2098
2099 chunked = 0;
2100
2101 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002102 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002104 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 Py_ssize_t consumed = 0;
2106
2107 /* First, get some data if necessary */
2108 res = 1;
2109 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002110 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002111 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002112 if (res < 0) {
2113 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2114 when EINTR occurs so we needn't do it ourselves. */
2115 if (_PyIO_trap_eintr()) {
2116 continue;
2117 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002119 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002120 if (res == 0)
2121 break;
2122 }
2123 if (res == 0) {
2124 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002125 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126 Py_CLEAR(self->snapshot);
2127 start = endpos = offset_to_buffer = 0;
2128 break;
2129 }
2130
2131 if (remaining == NULL) {
2132 line = self->decoded_chars;
2133 start = self->decoded_chars_used;
2134 offset_to_buffer = 0;
2135 Py_INCREF(line);
2136 }
2137 else {
2138 assert(self->decoded_chars_used == 0);
2139 line = PyUnicode_Concat(remaining, self->decoded_chars);
2140 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002141 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142 Py_CLEAR(remaining);
2143 if (line == NULL)
2144 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002145 if (PyUnicode_READY(line) == -1)
2146 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 }
2148
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002149 ptr = PyUnicode_DATA(line);
2150 line_len = PyUnicode_GET_LENGTH(line);
2151 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152
2153 endpos = _PyIO_find_line_ending(
2154 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002155 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002156 ptr + kind * start,
2157 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002158 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002159 if (endpos >= 0) {
2160 endpos += start;
2161 if (limit >= 0 && (endpos - start) + chunked >= limit)
2162 endpos = start + limit - chunked;
2163 break;
2164 }
2165
2166 /* We can put aside up to `endpos` */
2167 endpos = consumed + start;
2168 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2169 /* Didn't find line ending, but reached length limit */
2170 endpos = start + limit - chunked;
2171 break;
2172 }
2173
2174 if (endpos > start) {
2175 /* No line ending seen yet - put aside current data */
2176 PyObject *s;
2177 if (chunks == NULL) {
2178 chunks = PyList_New(0);
2179 if (chunks == NULL)
2180 goto error;
2181 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002182 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183 if (s == NULL)
2184 goto error;
2185 if (PyList_Append(chunks, s) < 0) {
2186 Py_DECREF(s);
2187 goto error;
2188 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002189 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190 Py_DECREF(s);
2191 }
2192 /* There may be some remaining bytes we'll have to prepend to the
2193 next chunk of data */
2194 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002195 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 if (remaining == NULL)
2197 goto error;
2198 }
2199 Py_CLEAR(line);
2200 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002201 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 }
2203
2204 if (line != NULL) {
2205 /* Our line ends in the current buffer */
2206 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002207 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2208 PyObject *s = PyUnicode_Substring(line, start, endpos);
2209 Py_CLEAR(line);
2210 if (s == NULL)
2211 goto error;
2212 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213 }
2214 }
2215 if (remaining != NULL) {
2216 if (chunks == NULL) {
2217 chunks = PyList_New(0);
2218 if (chunks == NULL)
2219 goto error;
2220 }
2221 if (PyList_Append(chunks, remaining) < 0)
2222 goto error;
2223 Py_CLEAR(remaining);
2224 }
2225 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002226 if (line != NULL) {
2227 if (PyList_Append(chunks, line) < 0)
2228 goto error;
2229 Py_DECREF(line);
2230 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2232 if (line == NULL)
2233 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002234 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002236 if (line == NULL) {
2237 Py_INCREF(_PyIO_empty_str);
2238 line = _PyIO_empty_str;
2239 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240
2241 return line;
2242
2243 error:
2244 Py_XDECREF(chunks);
2245 Py_XDECREF(remaining);
2246 Py_XDECREF(line);
2247 return NULL;
2248}
2249
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002250/*[clinic input]
2251_io.TextIOWrapper.readline
2252 size: Py_ssize_t = -1
2253 /
2254[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002256static PyObject *
2257_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2258/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2259{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002260 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002261 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262}
2263
2264/* Seek and Tell */
2265
2266typedef struct {
2267 Py_off_t start_pos;
2268 int dec_flags;
2269 int bytes_to_feed;
2270 int chars_to_skip;
2271 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002272} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273
2274/*
2275 To speed up cookie packing/unpacking, we store the fields in a temporary
2276 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2277 The following macros define at which offsets in the intermediary byte
2278 string the various CookieStruct fields will be stored.
2279 */
2280
2281#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2282
Christian Heimes743e0cd2012-10-17 23:52:17 +02002283#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284/* We want the least significant byte of start_pos to also be the least
2285 significant byte of the cookie, which means that in big-endian mode we
2286 must copy the fields in reverse order. */
2287
2288# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2289# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2290# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2291# define OFF_CHARS_TO_SKIP (sizeof(char))
2292# define OFF_NEED_EOF 0
2293
2294#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002295/* Little-endian mode: the least significant byte of start_pos will
2296 naturally end up the least significant byte of the cookie. */
2297
2298# define OFF_START_POS 0
2299# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2300# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2301# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2302# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2303
2304#endif
2305
2306static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002307textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308{
2309 unsigned char buffer[COOKIE_BUF_LEN];
2310 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2311 if (cookieLong == NULL)
2312 return -1;
2313
2314 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002315 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 Py_DECREF(cookieLong);
2317 return -1;
2318 }
2319 Py_DECREF(cookieLong);
2320
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002321 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2322 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2323 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2324 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2325 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326
2327 return 0;
2328}
2329
2330static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002331textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332{
2333 unsigned char buffer[COOKIE_BUF_LEN];
2334
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002335 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2336 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2337 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2338 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2339 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002340
Christian Heimes743e0cd2012-10-17 23:52:17 +02002341 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2342 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344
2345static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002346_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347{
2348 PyObject *res;
2349 /* When seeking to the start of the stream, we call decoder.reset()
2350 rather than decoder.getstate().
2351 This is for a few decoders such as utf-16 for which the state value
2352 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2353 utf-16, that we are expecting a BOM).
2354 */
2355 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2356 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2357 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002358 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2359 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360 if (res == NULL)
2361 return -1;
2362 Py_DECREF(res);
2363 return 0;
2364}
2365
Antoine Pitroue4501852009-05-14 18:55:55 +00002366static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002367_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002368{
2369 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002370 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002371 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2372 self->encoding_start_of_stream = 1;
2373 }
2374 else {
2375 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002376 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002377 self->encoding_start_of_stream = 0;
2378 }
2379 if (res == NULL)
2380 return -1;
2381 Py_DECREF(res);
2382 return 0;
2383}
2384
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002385static int
2386_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2387{
2388 /* Same as _textiowrapper_decoder_setstate() above. */
2389 return _textiowrapper_encoder_reset(
2390 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2391}
2392
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002393/*[clinic input]
2394_io.TextIOWrapper.seek
2395 cookie as cookieObj: object
2396 whence: int = 0
2397 /
2398[clinic start generated code]*/
2399
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002401_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2402/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002404 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002405 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406 PyObject *res;
2407 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002408 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002409
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002410 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002411 CHECK_CLOSED(self);
2412
2413 Py_INCREF(cookieObj);
2414
2415 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002416 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002417 goto fail;
2418 }
2419
ngie-eign848037c2019-03-02 23:28:26 -08002420 switch (whence) {
2421 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002423 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002424 if (cmp < 0)
2425 goto fail;
2426
2427 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002428 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002429 goto fail;
2430 }
2431
2432 /* Seeking to the current position should attempt to
2433 * sync the underlying buffer with the current position.
2434 */
2435 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002436 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437 if (cookieObj == NULL)
2438 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002439 break;
2440
ngie-eign848037c2019-03-02 23:28:26 -08002441 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002443 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002444 if (cmp < 0)
2445 goto fail;
2446
2447 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002448 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 goto fail;
2450 }
2451
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002452 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453 if (res == NULL)
2454 goto fail;
2455 Py_DECREF(res);
2456
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002457 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 Py_CLEAR(self->snapshot);
2459 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002460 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461 if (res == NULL)
2462 goto fail;
2463 Py_DECREF(res);
2464 }
2465
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002466 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002467 Py_CLEAR(cookieObj);
2468 if (res == NULL)
2469 goto fail;
2470 if (self->encoder) {
2471 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002472 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002473 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2474 Py_DECREF(res);
2475 goto fail;
2476 }
2477 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002479
ngie-eign848037c2019-03-02 23:28:26 -08002480 case SEEK_SET:
2481 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002482
ngie-eign848037c2019-03-02 23:28:26 -08002483 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002485 "invalid whence (%d, should be %d, %d or %d)", whence,
2486 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 goto fail;
2488 }
2489
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002490 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 if (cmp < 0)
2492 goto fail;
2493
2494 if (cmp == 1) {
2495 PyErr_Format(PyExc_ValueError,
2496 "negative seek position %R", cookieObj);
2497 goto fail;
2498 }
2499
2500 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2501 if (res == NULL)
2502 goto fail;
2503 Py_DECREF(res);
2504
2505 /* The strategy of seek() is to go back to the safe start point
2506 * and replay the effect of read(chars_to_skip) from there.
2507 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002508 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509 goto fail;
2510
2511 /* Seek back to the safe start point. */
2512 posobj = PyLong_FromOff_t(cookie.start_pos);
2513 if (posobj == NULL)
2514 goto fail;
2515 res = PyObject_CallMethodObjArgs(self->buffer,
2516 _PyIO_str_seek, posobj, NULL);
2517 Py_DECREF(posobj);
2518 if (res == NULL)
2519 goto fail;
2520 Py_DECREF(res);
2521
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002522 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523 Py_CLEAR(self->snapshot);
2524
2525 /* Restore the decoder to its state from the safe start point. */
2526 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002527 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528 goto fail;
2529 }
2530
2531 if (cookie.chars_to_skip) {
2532 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002533 PyObject *input_chunk = _PyObject_CallMethodId(
2534 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535 PyObject *decoded;
2536
2537 if (input_chunk == NULL)
2538 goto fail;
2539
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002540 if (!PyBytes_Check(input_chunk)) {
2541 PyErr_Format(PyExc_TypeError,
2542 "underlying read() should have returned a bytes "
2543 "object, not '%.200s'",
2544 Py_TYPE(input_chunk)->tp_name);
2545 Py_DECREF(input_chunk);
2546 goto fail;
2547 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002549 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2550 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 goto fail;
2552 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002553 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002555 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2556 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002558 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559 goto fail;
2560
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002561 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562
2563 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002564 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002565 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566 goto fail;
2567 }
2568 self->decoded_chars_used = cookie.chars_to_skip;
2569 }
2570 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002571 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2572 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002574 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575 }
2576
Antoine Pitroue4501852009-05-14 18:55:55 +00002577 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2578 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002580 goto fail;
2581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002582 return cookieObj;
2583 fail:
2584 Py_XDECREF(cookieObj);
2585 return NULL;
2586
2587}
2588
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002589/*[clinic input]
2590_io.TextIOWrapper.tell
2591[clinic start generated code]*/
2592
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002594_io_TextIOWrapper_tell_impl(textio *self)
2595/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596{
2597 PyObject *res;
2598 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600 PyObject *next_input;
2601 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002602 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603 PyObject *saved_state = NULL;
2604 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002605 Py_ssize_t dec_buffer_len;
2606 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002608 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002609 CHECK_CLOSED(self);
2610
2611 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002612 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002613 goto fail;
2614 }
2615 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002616 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617 "telling position disabled by next() call");
2618 goto fail;
2619 }
2620
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002621 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002623 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624 if (res == NULL)
2625 goto fail;
2626 Py_DECREF(res);
2627
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002628 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002629 if (posobj == NULL)
2630 goto fail;
2631
2632 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002633 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634 return posobj;
2635 }
2636
2637#if defined(HAVE_LARGEFILE_SUPPORT)
2638 cookie.start_pos = PyLong_AsLongLong(posobj);
2639#else
2640 cookie.start_pos = PyLong_AsLong(posobj);
2641#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002642 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 if (PyErr_Occurred())
2644 goto fail;
2645
2646 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002647 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002648 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 goto fail;
2650
2651 assert (PyBytes_Check(next_input));
2652
2653 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2654
2655 /* How many decoded characters have been used up since the snapshot? */
2656 if (self->decoded_chars_used == 0) {
2657 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002658 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002659 }
2660
2661 chars_to_skip = self->decoded_chars_used;
2662
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002663 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2665 _PyIO_str_getstate, NULL);
2666 if (saved_state == NULL)
2667 goto fail;
2668
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002669#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002670 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002671 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2672 _PyIO_str_getstate, NULL); \
2673 if (_state == NULL) \
2674 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002675 if (!PyTuple_Check(_state)) { \
2676 PyErr_SetString(PyExc_TypeError, \
2677 "illegal decoder state"); \
2678 Py_DECREF(_state); \
2679 goto fail; \
2680 } \
2681 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2682 &dec_buffer, &dec_flags)) \
2683 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002684 Py_DECREF(_state); \
2685 goto fail; \
2686 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002687 if (!PyBytes_Check(dec_buffer)) { \
2688 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002689 "illegal decoder state: the first item should be a " \
2690 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002691 Py_TYPE(dec_buffer)->tp_name); \
2692 Py_DECREF(_state); \
2693 goto fail; \
2694 } \
2695 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002696 Py_DECREF(_state); \
2697 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002698
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002699#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002700 PyObject *_decoded = _PyObject_CallMethodId( \
2701 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002702 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002703 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002704 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002705 Py_DECREF(_decoded); \
2706 } while (0)
2707
2708 /* Fast search for an acceptable start point, close to our
2709 current pos */
2710 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2711 skip_back = 1;
2712 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2713 input = PyBytes_AS_STRING(next_input);
2714 while (skip_bytes > 0) {
2715 /* Decode up to temptative start point */
2716 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2717 goto fail;
2718 DECODER_DECODE(input, skip_bytes, chars_decoded);
2719 if (chars_decoded <= chars_to_skip) {
2720 DECODER_GETSTATE();
2721 if (dec_buffer_len == 0) {
2722 /* Before pos and no bytes buffered in decoder => OK */
2723 cookie.dec_flags = dec_flags;
2724 chars_to_skip -= chars_decoded;
2725 break;
2726 }
2727 /* Skip back by buffered amount and reset heuristic */
2728 skip_bytes -= dec_buffer_len;
2729 skip_back = 1;
2730 }
2731 else {
2732 /* We're too far ahead, skip back a bit */
2733 skip_bytes -= skip_back;
2734 skip_back *= 2;
2735 }
2736 }
2737 if (skip_bytes <= 0) {
2738 skip_bytes = 0;
2739 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740 goto fail;
2741 }
2742
2743 /* Note our initial start point. */
2744 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002745 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002746 if (chars_to_skip == 0)
2747 goto finally;
2748
2749 /* We should be close to the desired position. Now feed the decoder one
2750 * byte at a time until we reach the `chars_to_skip` target.
2751 * As we go, note the nearest "safe start point" before the current
2752 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753 * can safely start from there and advance to this location).
2754 */
2755 chars_decoded = 0;
2756 input = PyBytes_AS_STRING(next_input);
2757 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002758 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002759 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002760 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002762 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002763 /* We got n chars for 1 byte */
2764 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002765 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002766 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002767
2768 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2769 /* Decoder buffer is empty, so this is a safe start point. */
2770 cookie.start_pos += cookie.bytes_to_feed;
2771 chars_to_skip -= chars_decoded;
2772 cookie.dec_flags = dec_flags;
2773 cookie.bytes_to_feed = 0;
2774 chars_decoded = 0;
2775 }
2776 if (chars_decoded >= chars_to_skip)
2777 break;
2778 input++;
2779 }
2780 if (input == input_end) {
2781 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002782 PyObject *decoded = _PyObject_CallMethodId(
2783 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002784 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002785 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002786 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002787 Py_DECREF(decoded);
2788 cookie.need_eof = 1;
2789
2790 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002791 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002792 "can't reconstruct logical file position");
2793 goto fail;
2794 }
2795 }
2796
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002797finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002798 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002799 Py_DECREF(saved_state);
2800 if (res == NULL)
2801 return NULL;
2802 Py_DECREF(res);
2803
2804 /* The returned cookie corresponds to the last safe start point. */
2805 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002806 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002807
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002808fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002809 if (saved_state) {
2810 PyObject *type, *value, *traceback;
2811 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002812 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002813 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002814 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002815 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002816 }
2817 return NULL;
2818}
2819
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002820/*[clinic input]
2821_io.TextIOWrapper.truncate
2822 pos: object = None
2823 /
2824[clinic start generated code]*/
2825
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002826static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002827_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2828/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002829{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002830 PyObject *res;
2831
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002832 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002833
2834 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2835 if (res == NULL)
2836 return NULL;
2837 Py_DECREF(res);
2838
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002839 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002840}
2841
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002842static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002843textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002844{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002845 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002846 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002847
2848 CHECK_INITIALIZED(self);
2849
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002850 res = PyUnicode_FromString("<_io.TextIOWrapper");
2851 if (res == NULL)
2852 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002853
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002854 status = Py_ReprEnter((PyObject *)self);
2855 if (status != 0) {
2856 if (status > 0) {
2857 PyErr_Format(PyExc_RuntimeError,
2858 "reentrant call inside %s.__repr__",
2859 Py_TYPE(self)->tp_name);
2860 }
2861 goto error;
2862 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002863 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002864 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002865 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002866 PyErr_Clear();
2867 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002868 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002869 }
2870 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002871 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002872 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002873 if (s == NULL)
2874 goto error;
2875 PyUnicode_AppendAndDel(&res, s);
2876 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002877 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002878 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002879 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002880 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002881 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002882 PyErr_Clear();
2883 else
2884 goto error;
2885 }
2886 else {
2887 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2888 Py_DECREF(modeobj);
2889 if (s == NULL)
2890 goto error;
2891 PyUnicode_AppendAndDel(&res, s);
2892 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002893 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002894 }
2895 s = PyUnicode_FromFormat("%U encoding=%R>",
2896 res, self->encoding);
2897 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002898 if (status == 0) {
2899 Py_ReprLeave((PyObject *)self);
2900 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002901 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002902
2903 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002904 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002905 if (status == 0) {
2906 Py_ReprLeave((PyObject *)self);
2907 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002908 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002909}
2910
2911
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002912/* Inquiries */
2913
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002914/*[clinic input]
2915_io.TextIOWrapper.fileno
2916[clinic start generated code]*/
2917
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002918static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002919_io_TextIOWrapper_fileno_impl(textio *self)
2920/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002921{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002922 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002923 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002924}
2925
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002926/*[clinic input]
2927_io.TextIOWrapper.seekable
2928[clinic start generated code]*/
2929
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002930static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002931_io_TextIOWrapper_seekable_impl(textio *self)
2932/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002933{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002934 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002935 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002936}
2937
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002938/*[clinic input]
2939_io.TextIOWrapper.readable
2940[clinic start generated code]*/
2941
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002942static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002943_io_TextIOWrapper_readable_impl(textio *self)
2944/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002945{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002946 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002947 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002948}
2949
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002950/*[clinic input]
2951_io.TextIOWrapper.writable
2952[clinic start generated code]*/
2953
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002954static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002955_io_TextIOWrapper_writable_impl(textio *self)
2956/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002957{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002958 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002959 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002960}
2961
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002962/*[clinic input]
2963_io.TextIOWrapper.isatty
2964[clinic start generated code]*/
2965
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002966static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002967_io_TextIOWrapper_isatty_impl(textio *self)
2968/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002969{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002970 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002971 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002972}
2973
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002974/*[clinic input]
2975_io.TextIOWrapper.flush
2976[clinic start generated code]*/
2977
Antoine Pitrou243757e2010-11-05 21:15:39 +00002978static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002979_io_TextIOWrapper_flush_impl(textio *self)
2980/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002981{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002982 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002983 CHECK_CLOSED(self);
2984 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002985 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002986 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002987 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002988}
2989
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002990/*[clinic input]
2991_io.TextIOWrapper.close
2992[clinic start generated code]*/
2993
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002995_io_TextIOWrapper_close_impl(textio *self)
2996/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002997{
2998 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002999 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003000 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003001
Antoine Pitrou6be88762010-05-03 16:48:20 +00003002 res = textiowrapper_closed_get(self, NULL);
3003 if (res == NULL)
3004 return NULL;
3005 r = PyObject_IsTrue(res);
3006 Py_DECREF(res);
3007 if (r < 0)
3008 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003009
Antoine Pitrou6be88762010-05-03 16:48:20 +00003010 if (r > 0) {
3011 Py_RETURN_NONE; /* stream already closed */
3012 }
3013 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003014 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003015 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01003016 res = _PyObject_CallMethodIdObjArgs(self->buffer,
3017 &PyId__dealloc_warn,
3018 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00003019 if (res)
3020 Py_DECREF(res);
3021 else
3022 PyErr_Clear();
3023 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003024 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06003025 if (res == NULL)
3026 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003027 else
3028 Py_DECREF(res);
3029
Benjamin Peterson68623612012-12-20 11:53:11 -06003030 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
3031 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003032 _PyErr_ChainExceptions(exc, val, tb);
3033 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003034 }
3035 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003036 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037}
3038
3039static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003040textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003041{
3042 PyObject *line;
3043
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003044 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003045
3046 self->telling = 0;
3047 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3048 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003049 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003050 }
3051 else {
3052 line = PyObject_CallMethodObjArgs((PyObject *)self,
3053 _PyIO_str_readline, NULL);
3054 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003055 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003056 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003057 "not '%.200s'", Py_TYPE(line)->tp_name);
3058 Py_DECREF(line);
3059 return NULL;
3060 }
3061 }
3062
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003063 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003064 return NULL;
3065
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003066 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003067 /* Reached EOF or would have blocked */
3068 Py_DECREF(line);
3069 Py_CLEAR(self->snapshot);
3070 self->telling = self->seekable;
3071 return NULL;
3072 }
3073
3074 return line;
3075}
3076
3077static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003078textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003079{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003080 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003081 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003082}
3083
3084static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003085textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003086{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003087 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003088 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3089}
3090
3091static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003092textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003093{
3094 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003095 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003096 if (self->decoder == NULL ||
3097 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3098 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003099 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003100 }
3101 return res;
3102}
3103
3104static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003105textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003106{
3107 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003108 Py_INCREF(self->errors);
3109 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003110}
3111
3112static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003113textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003114{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003115 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003116 return PyLong_FromSsize_t(self->chunk_size);
3117}
3118
3119static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003120textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003121{
3122 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003123 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003124 if (arg == NULL) {
3125 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3126 return -1;
3127 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003128 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003129 if (n == -1 && PyErr_Occurred())
3130 return -1;
3131 if (n <= 0) {
3132 PyErr_SetString(PyExc_ValueError,
3133 "a strictly positive integer is required");
3134 return -1;
3135 }
3136 self->chunk_size = n;
3137 return 0;
3138}
3139
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003140#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003141
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003142static PyMethodDef incrementalnewlinedecoder_methods[] = {
3143 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3144 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3145 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3146 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3147 {NULL}
3148};
3149
3150static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3151 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3152 {NULL}
3153};
3154
3155PyTypeObject PyIncrementalNewlineDecoder_Type = {
3156 PyVarObject_HEAD_INIT(NULL, 0)
3157 "_io.IncrementalNewlineDecoder", /*tp_name*/
3158 sizeof(nldecoder_object), /*tp_basicsize*/
3159 0, /*tp_itemsize*/
3160 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3161 0, /*tp_print*/
3162 0, /*tp_getattr*/
3163 0, /*tp_setattr*/
3164 0, /*tp_compare */
3165 0, /*tp_repr*/
3166 0, /*tp_as_number*/
3167 0, /*tp_as_sequence*/
3168 0, /*tp_as_mapping*/
3169 0, /*tp_hash */
3170 0, /*tp_call*/
3171 0, /*tp_str*/
3172 0, /*tp_getattro*/
3173 0, /*tp_setattro*/
3174 0, /*tp_as_buffer*/
3175 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3176 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3177 0, /* tp_traverse */
3178 0, /* tp_clear */
3179 0, /* tp_richcompare */
3180 0, /*tp_weaklistoffset*/
3181 0, /* tp_iter */
3182 0, /* tp_iternext */
3183 incrementalnewlinedecoder_methods, /* tp_methods */
3184 0, /* tp_members */
3185 incrementalnewlinedecoder_getset, /* tp_getset */
3186 0, /* tp_base */
3187 0, /* tp_dict */
3188 0, /* tp_descr_get */
3189 0, /* tp_descr_set */
3190 0, /* tp_dictoffset */
3191 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3192 0, /* tp_alloc */
3193 PyType_GenericNew, /* tp_new */
3194};
3195
3196
3197static PyMethodDef textiowrapper_methods[] = {
3198 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003199 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003200 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3201 _IO_TEXTIOWRAPPER_READ_METHODDEF
3202 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3203 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3204 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3205
3206 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3207 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3208 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3209 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3210 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003211
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003212 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3213 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3214 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003215 {NULL, NULL}
3216};
3217
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003218static PyMemberDef textiowrapper_members[] = {
3219 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3220 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3221 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003222 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003223 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003224 {NULL}
3225};
3226
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003227static PyGetSetDef textiowrapper_getset[] = {
3228 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3229 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003230/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3231*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003232 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3233 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3234 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3235 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003236 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003237};
3238
3239PyTypeObject PyTextIOWrapper_Type = {
3240 PyVarObject_HEAD_INIT(NULL, 0)
3241 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003242 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003243 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003244 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003245 0, /*tp_print*/
3246 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003247 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003248 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003249 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003250 0, /*tp_as_number*/
3251 0, /*tp_as_sequence*/
3252 0, /*tp_as_mapping*/
3253 0, /*tp_hash */
3254 0, /*tp_call*/
3255 0, /*tp_str*/
3256 0, /*tp_getattro*/
3257 0, /*tp_setattro*/
3258 0, /*tp_as_buffer*/
3259 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003260 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003261 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003262 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3263 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003264 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003265 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003266 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003267 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3268 textiowrapper_methods, /* tp_methods */
3269 textiowrapper_members, /* tp_members */
3270 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003271 0, /* tp_base */
3272 0, /* tp_dict */
3273 0, /* tp_descr_get */
3274 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003275 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003276 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003277 0, /* tp_alloc */
3278 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003279 0, /* tp_free */
3280 0, /* tp_is_gc */
3281 0, /* tp_bases */
3282 0, /* tp_mro */
3283 0, /* tp_cache */
3284 0, /* tp_subclasses */
3285 0, /* tp_weaklist */
3286 0, /* tp_del */
3287 0, /* tp_version_tag */
3288 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003289};