blob: 7ddac8062a96e9dfbb2630c93f3f20d95afa64b4 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012#include "structmember.h"
13#include "_iomodule.h"
14
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030015/*[clinic input]
16module _io
17class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020022_Py_IDENTIFIER(close);
23_Py_IDENTIFIER(_dealloc_warn);
24_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(fileno);
26_Py_IDENTIFIER(flush);
27_Py_IDENTIFIER(getpreferredencoding);
28_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020029_Py_IDENTIFIER(mode);
30_Py_IDENTIFIER(name);
31_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020032_Py_IDENTIFIER(read);
33_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053070textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200172 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200175 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000215};
216
217
218/* IncrementalNewlineDecoder */
219
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000226 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000227} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300229/*[clinic input]
230_io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235Codec used when reading a file in universal newlines mode.
236
237It wraps another incremental decoder, translating \r\n and \r into \n.
238It also records the types of newlines encountered. When used with
239translate=False, it ensures that the newline sequence is returned in
240one piece. When used with decoder=None, it expects unicode strings as
241decode input and translates newlines without first invoking an external
242decoder.
243[clinic start generated code]*/
244
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000245static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300246_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900255 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 self->errors = errors;
261 }
INADA Naoki507434f2017-12-21 09:59:53 +0900262 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263
Xiang Zhangb08746b2018-10-31 19:49:16 +0800264 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269}
270
271static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273{
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277}
278
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200279static int
280check_decoded(PyObject *decoded)
281{
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200295 return 0;
296}
297
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298#define SEEN_CR 1
299#define SEEN_LF 2
300#define SEEN_CRLF 4
301#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200304_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 PyObject *input, int final)
306{
307 PyObject *output;
308 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200309 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200327 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 return NULL;
329
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 if (modified == NULL)
340 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000355 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 }
389
Antoine Pitrou66913e22009-03-06 23:40:56 +0000390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200395 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000412 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 if (seennl == SEEN_ALL)
422 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200424 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 }
436 else
437 seennl |= SEEN_CR;
438 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000447 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200458 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200481 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482 continue;
483 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200486 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100490 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200492 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502}
503
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300504/*[clinic input]
505_io.IncrementalNewlineDecoder.decode
506 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200507 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300508[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300510static PyObject *
511_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200513/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300514{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516}
517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300518/*[clinic input]
519_io.IncrementalNewlineDecoder.getstate
520[clinic start generated code]*/
521
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300523_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700527 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528
529 if (self->decoder != Py_None) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200530 PyObject *state = _PyObject_CallMethodNoArgs(self->decoder,
531 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 if (state == NULL)
533 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557}
558
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300559/*[clinic input]
560_io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563[clinic start generated code]*/
564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300566_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569{
570 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700571 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
Oren Milman1d1d3e92017-08-20 18:35:36 +0300573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582
Victor Stinner7d7e7752014-06-17 23:31:25 +0200583 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 flag >>= 1;
585
586 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 else
590 Py_RETURN_NONE;
591}
592
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300593/*[clinic input]
594_io.IncrementalNewlineDecoder.reset
595[clinic start generated code]*/
596
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300598_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600{
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200604 return _PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000605 else
606 Py_RETURN_NONE;
607}
608
609static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000610incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611{
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631}
632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633/* TextIOWrapper */
634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638typedef struct
639{
640 PyObject_HEAD
641 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000642 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200652 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200657 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200659 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900676 PyObject *pending_bytes; // data waiting to be written.
677 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000679
Oren Milman13614e32017-08-24 19:51:24 +0300680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000695} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
Zackery Spytz23db9352018-06-29 04:14:58 -0600697static void
698textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700/* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000704ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705{
INADA Naoki507434f2017-12-21 09:59:53 +0900706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707}
708
709static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000710utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000711{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100712 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900713 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100719 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900720 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721}
722
723static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000724utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725{
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200728#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000732#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000733 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900735 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900742 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100748 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900749 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000750}
751
752static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000753utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000754{
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200757#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000758 return utf32be_encode(self, text);
759#else
760 return utf32le_encode(self, text);
761#endif
762 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900764 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769{
INADA Naoki507434f2017-12-21 09:59:53 +0900770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771}
772
773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000774latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775{
INADA Naoki507434f2017-12-21 09:59:53 +0900776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777}
778
Inada Naokibfba8c32019-05-16 15:03:20 +0900779// Return true when encoding can be skipped when text is ascii.
780static inline int
781is_asciicompat_encoding(encodefunc_t f)
782{
783 return f == (encodefunc_t) ascii_encode
784 || f == (encodefunc_t) latin1_encode
785 || f == (encodefunc_t) utf8_encode;
786}
787
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788/* Map normalized encoding names onto the specialized encoding funcs */
789
790typedef struct {
791 const char *name;
792 encodefunc_t encodefunc;
793} encodefuncentry;
794
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200795static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000796 {"ascii", (encodefunc_t) ascii_encode},
797 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000798 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"utf-16-be", (encodefunc_t) utf16be_encode},
800 {"utf-16-le", (encodefunc_t) utf16le_encode},
801 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000802 {"utf-32-be", (encodefunc_t) utf32be_encode},
803 {"utf-32-le", (encodefunc_t) utf32le_encode},
804 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 {NULL, NULL}
806};
807
INADA Naoki507434f2017-12-21 09:59:53 +0900808static int
809validate_newline(const char *newline)
810{
811 if (newline && newline[0] != '\0'
812 && !(newline[0] == '\n' && newline[1] == '\0')
813 && !(newline[0] == '\r' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
815 PyErr_Format(PyExc_ValueError,
816 "illegal newline value: %s", newline);
817 return -1;
818 }
819 return 0;
820}
821
822static int
823set_newline(textio *self, const char *newline)
824{
825 PyObject *old = self->readnl;
826 if (newline == NULL) {
827 self->readnl = NULL;
828 }
829 else {
830 self->readnl = PyUnicode_FromString(newline);
831 if (self->readnl == NULL) {
832 self->readnl = old;
833 return -1;
834 }
835 }
836 self->readuniversal = (newline == NULL || newline[0] == '\0');
837 self->readtranslate = (newline == NULL);
838 self->writetranslate = (newline == NULL || newline[0] != '\0');
839 if (!self->readuniversal && self->readnl != NULL) {
840 // validate_newline() accepts only ASCII newlines.
841 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
842 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
843 if (strcmp(self->writenl, "\n") == 0) {
844 self->writenl = NULL;
845 }
846 }
847 else {
848#ifdef MS_WINDOWS
849 self->writenl = "\r\n";
850#else
851 self->writenl = NULL;
852#endif
853 }
854 Py_XDECREF(old);
855 return 0;
856}
857
858static int
859_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
860 const char *errors)
861{
862 PyObject *res;
863 int r;
864
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200865 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900866 if (res == NULL)
867 return -1;
868
869 r = PyObject_IsTrue(res);
870 Py_DECREF(res);
871 if (r == -1)
872 return -1;
873
874 if (r != 1)
875 return 0;
876
877 Py_CLEAR(self->decoder);
878 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
879 if (self->decoder == NULL)
880 return -1;
881
882 if (self->readuniversal) {
883 PyObject *incrementalDecoder = PyObject_CallFunction(
884 (PyObject *)&PyIncrementalNewlineDecoder_Type,
885 "Oi", self->decoder, (int)self->readtranslate);
886 if (incrementalDecoder == NULL)
887 return -1;
888 Py_CLEAR(self->decoder);
889 self->decoder = incrementalDecoder;
890 }
891
892 return 0;
893}
894
895static PyObject*
896_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
897{
898 PyObject *chars;
899
900 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
901 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
902 else
903 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
904 eof ? Py_True : Py_False, NULL);
905
906 if (check_decoded(chars) < 0)
907 // check_decoded already decreases refcount
908 return NULL;
909
910 return chars;
911}
912
913static int
914_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
915 const char *errors)
916{
917 PyObject *res;
918 int r;
919
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200920 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900921 if (res == NULL)
922 return -1;
923
924 r = PyObject_IsTrue(res);
925 Py_DECREF(res);
926 if (r == -1)
927 return -1;
928
929 if (r != 1)
930 return 0;
931
932 Py_CLEAR(self->encoder);
933 self->encodefunc = NULL;
934 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
935 if (self->encoder == NULL)
936 return -1;
937
938 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200939 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
940 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900941 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900943 const encodefuncentry *e = encodefuncs;
944 while (e->name != NULL) {
945 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
946 self->encodefunc = e->encodefunc;
947 break;
948 }
949 e++;
950 }
951 }
952 Py_XDECREF(res);
953
954 return 0;
955}
956
957static int
958_textiowrapper_fix_encoder_state(textio *self)
959{
960 if (!self->seekable || !self->encoder) {
961 return 0;
962 }
963
964 self->encoding_start_of_stream = 1;
965
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200966 PyObject *cookieObj = _PyObject_CallMethodNoArgs(
967 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900968 if (cookieObj == NULL) {
969 return -1;
970 }
971
972 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
973 Py_DECREF(cookieObj);
974 if (cmp < 0) {
975 return -1;
976 }
977
978 if (cmp == 0) {
979 self->encoding_start_of_stream = 0;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +0200980 PyObject *res = _PyObject_CallMethodOneArg(
981 self->encoder, _PyIO_str_setstate, _PyLong_Zero);
INADA Naoki507434f2017-12-21 09:59:53 +0900982 if (res == NULL) {
983 return -1;
984 }
985 Py_DECREF(res);
986 }
987
988 return 0;
989}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990
Victor Stinner22eb6892019-06-26 00:51:05 +0200991static int
992io_check_errors(PyObject *errors)
993{
994 assert(errors != NULL && errors != Py_None);
995
996 PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
997#ifndef Py_DEBUG
998 /* In release mode, only check in development mode (-X dev) */
999 if (!interp->config.dev_mode) {
1000 return 0;
1001 }
1002#else
1003 /* Always check in debug mode */
1004#endif
1005
1006 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1007 before_PyUnicode_InitEncodings() is called. */
1008 if (!interp->fs_codec.encoding) {
1009 return 0;
1010 }
1011
1012 Py_ssize_t name_length;
1013 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1014 if (name == NULL) {
1015 return -1;
1016 }
1017 if (strlen(name) != (size_t)name_length) {
1018 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1019 return -1;
1020 }
1021 PyObject *handler = PyCodec_LookupError(name);
1022 if (handler != NULL) {
1023 Py_DECREF(handler);
1024 return 0;
1025 }
1026 return -1;
1027}
1028
1029
1030
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001031/*[clinic input]
1032_io.TextIOWrapper.__init__
1033 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -07001034 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +09001035 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -07001036 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001037 line_buffering: bool(accept={int}) = False
1038 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001040Character and line based layer over a BufferedIOBase object, buffer.
1041
1042encoding gives the name of the encoding that the stream will be
1043decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1044
1045errors determines the strictness of encoding and decoding (see
1046help(codecs.Codec) or the documentation for codecs.register) and
1047defaults to "strict".
1048
1049newline controls how line endings are handled. It can be None, '',
1050'\n', '\r', and '\r\n'. It works as follows:
1051
1052* On input, if newline is None, universal newlines mode is
1053 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1054 these are translated into '\n' before being returned to the
1055 caller. If it is '', universal newline mode is enabled, but line
1056 endings are returned to the caller untranslated. If it has any of
1057 the other legal values, input lines are only terminated by the given
1058 string, and the line ending is returned to the caller untranslated.
1059
1060* On output, if newline is None, any '\n' characters written are
1061 translated to the system default line separator, os.linesep. If
1062 newline is '' or '\n', no translation takes place. If newline is any
1063 of the other legal values, any '\n' characters written are translated
1064 to the given string.
1065
1066If line_buffering is True, a call to flush is implied when a call to
1067write contains a newline character.
1068[clinic start generated code]*/
1069
1070static int
1071_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001072 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001073 const char *newline, int line_buffering,
1074 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001075/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001076{
1077 PyObject *raw, *codec_info = NULL;
1078 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 PyObject *res;
1080 int r;
1081
1082 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001083 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001084
INADA Naoki507434f2017-12-21 09:59:53 +09001085 if (errors == Py_None) {
1086 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001087 if (errors == NULL) {
1088 return -1;
1089 }
INADA Naoki507434f2017-12-21 09:59:53 +09001090 }
1091 else if (!PyUnicode_Check(errors)) {
1092 // Check 'errors' argument here because Argument Clinic doesn't support
1093 // 'str(accept={str, NoneType})' converter.
1094 PyErr_Format(
1095 PyExc_TypeError,
1096 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1097 errors->ob_type->tp_name);
1098 return -1;
1099 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001100 else if (io_check_errors(errors)) {
1101 return -1;
1102 }
INADA Naoki507434f2017-12-21 09:59:53 +09001103
1104 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001105 return -1;
1106 }
1107
1108 Py_CLEAR(self->buffer);
1109 Py_CLEAR(self->encoding);
1110 Py_CLEAR(self->encoder);
1111 Py_CLEAR(self->decoder);
1112 Py_CLEAR(self->readnl);
1113 Py_CLEAR(self->decoded_chars);
1114 Py_CLEAR(self->pending_bytes);
1115 Py_CLEAR(self->snapshot);
1116 Py_CLEAR(self->errors);
1117 Py_CLEAR(self->raw);
1118 self->decoded_chars_used = 0;
1119 self->pending_bytes_count = 0;
1120 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001121 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122
1123 if (encoding == NULL) {
1124 /* Try os.device_encoding(fileno) */
1125 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001126 state = IO_STATE();
1127 if (state == NULL)
1128 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001129 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 /* Ignore only AttributeError and UnsupportedOperation */
1131 if (fileno == NULL) {
1132 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1133 PyErr_ExceptionMatches(state->unsupported_operation)) {
1134 PyErr_Clear();
1135 }
1136 else {
1137 goto error;
1138 }
1139 }
1140 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001141 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001142 Py_DECREF(fileno);
1143 if (fd == -1 && PyErr_Occurred()) {
1144 goto error;
1145 }
1146
1147 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 if (self->encoding == NULL)
1149 goto error;
1150 else if (!PyUnicode_Check(self->encoding))
1151 Py_CLEAR(self->encoding);
1152 }
1153 }
1154 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001155 PyObject *locale_module = _PyIO_get_locale_module(state);
1156 if (locale_module == NULL)
1157 goto catch_ImportError;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001158 self->encoding = _PyObject_CallMethodIdOneArg(
1159 locale_module, &PyId_getpreferredencoding, Py_False);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001160 Py_DECREF(locale_module);
1161 if (self->encoding == NULL) {
1162 catch_ImportError:
1163 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001164 Importing locale can raise an ImportError because of
1165 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001166 ImportError if _locale is not available. These will happen
1167 during module building.
1168 */
1169 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1170 PyErr_Clear();
1171 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001173 else
1174 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001176 else if (!PyUnicode_Check(self->encoding))
1177 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001179 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001180 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001181 if (encoding == NULL)
1182 goto error;
1183 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 else if (encoding != NULL) {
1185 self->encoding = PyUnicode_FromString(encoding);
1186 if (self->encoding == NULL)
1187 goto error;
1188 }
1189 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001190 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001192 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 }
1194
Nick Coghlana9b15242014-02-04 22:11:18 +10001195 /* Check we have been asked for a real text encoding */
1196 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1197 if (codec_info == NULL) {
1198 Py_CLEAR(self->encoding);
1199 goto error;
1200 }
1201
1202 /* XXX: Failures beyond this point have the potential to leak elements
1203 * of the partially constructed object (like self->encoding)
1204 */
1205
INADA Naoki507434f2017-12-21 09:59:53 +09001206 Py_INCREF(errors);
1207 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001208 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001210 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001211 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001212 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213 }
1214
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215 self->buffer = buffer;
1216 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001217
INADA Naoki507434f2017-12-21 09:59:53 +09001218 /* Build the decoder object */
1219 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1220 goto error;
1221
1222 /* Build the encoder object */
1223 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1224 goto error;
1225
1226 /* Finished sorting out the codec details */
1227 Py_CLEAR(codec_info);
1228
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1230 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001231 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1232 {
1233 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1234 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001236 if (raw != NULL) {
1237 if (Py_TYPE(raw) == &PyFileIO_Type)
1238 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001239 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001240 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001241 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001242 }
1243
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001244 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245 if (res == NULL)
1246 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001247 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001249 if (r < 0)
1250 goto error;
1251 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001252
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001253 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1254 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001255 goto error;
1256 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001257 Py_XDECREF(res);
1258 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001259
Antoine Pitroue4501852009-05-14 18:55:55 +00001260 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001261 if (_textiowrapper_fix_encoder_state(self) < 0) {
1262 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001263 }
1264
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 self->ok = 1;
1266 return 0;
1267
1268 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001269 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 return -1;
1271}
1272
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001273/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1274 * -1 on error.
1275 */
1276static int
1277convert_optional_bool(PyObject *obj, int default_value)
1278{
1279 long v;
1280 if (obj == Py_None) {
1281 v = default_value;
1282 }
1283 else {
1284 v = PyLong_AsLong(obj);
1285 if (v == -1 && PyErr_Occurred())
1286 return -1;
1287 }
1288 return v != 0;
1289}
1290
INADA Naoki507434f2017-12-21 09:59:53 +09001291static int
1292textiowrapper_change_encoding(textio *self, PyObject *encoding,
1293 PyObject *errors, int newline_changed)
1294{
1295 /* Use existing settings where new settings are not specified */
1296 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1297 return 0; // no change
1298 }
1299
1300 if (encoding == Py_None) {
1301 encoding = self->encoding;
1302 if (errors == Py_None) {
1303 errors = self->errors;
1304 }
1305 }
1306 else if (errors == Py_None) {
1307 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001308 if (errors == NULL) {
1309 return -1;
1310 }
INADA Naoki507434f2017-12-21 09:59:53 +09001311 }
1312
1313 const char *c_errors = PyUnicode_AsUTF8(errors);
1314 if (c_errors == NULL) {
1315 return -1;
1316 }
1317
1318 // Create new encoder & decoder
1319 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1320 PyUnicode_AsUTF8(encoding), "codecs.open()");
1321 if (codec_info == NULL) {
1322 return -1;
1323 }
1324 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1325 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1326 Py_DECREF(codec_info);
1327 return -1;
1328 }
1329 Py_DECREF(codec_info);
1330
1331 Py_INCREF(encoding);
1332 Py_INCREF(errors);
1333 Py_SETREF(self->encoding, encoding);
1334 Py_SETREF(self->errors, errors);
1335
1336 return _textiowrapper_fix_encoder_state(self);
1337}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001338
1339/*[clinic input]
1340_io.TextIOWrapper.reconfigure
1341 *
INADA Naoki507434f2017-12-21 09:59:53 +09001342 encoding: object = None
1343 errors: object = None
1344 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001345 line_buffering as line_buffering_obj: object = None
1346 write_through as write_through_obj: object = None
1347
1348Reconfigure the text stream with new parameters.
1349
1350This also does an implicit stream flush.
1351
1352[clinic start generated code]*/
1353
1354static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001355_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1356 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001357 PyObject *line_buffering_obj,
1358 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001359/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001360{
1361 int line_buffering;
1362 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001363 const char *newline = NULL;
1364
1365 /* Check if something is in the read buffer */
1366 if (self->decoded_chars != NULL) {
1367 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001368 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001369 "of stream after the first read");
1370 return NULL;
1371 }
1372 }
1373
1374 if (newline_obj != NULL && newline_obj != Py_None) {
1375 newline = PyUnicode_AsUTF8(newline_obj);
1376 if (newline == NULL || validate_newline(newline) < 0) {
1377 return NULL;
1378 }
1379 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001380
1381 line_buffering = convert_optional_bool(line_buffering_obj,
1382 self->line_buffering);
1383 write_through = convert_optional_bool(write_through_obj,
1384 self->write_through);
1385 if (line_buffering < 0 || write_through < 0) {
1386 return NULL;
1387 }
INADA Naoki507434f2017-12-21 09:59:53 +09001388
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001389 PyObject *res = _PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001390 if (res == NULL) {
1391 return NULL;
1392 }
INADA Naoki507434f2017-12-21 09:59:53 +09001393 Py_DECREF(res);
1394 self->b2cratio = 0;
1395
1396 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1397 return NULL;
1398 }
1399
1400 if (textiowrapper_change_encoding(
1401 self, encoding, errors, newline_obj != NULL) < 0) {
1402 return NULL;
1403 }
1404
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001405 self->line_buffering = line_buffering;
1406 self->write_through = write_through;
1407 Py_RETURN_NONE;
1408}
1409
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001411textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 self->ok = 0;
1414 Py_CLEAR(self->buffer);
1415 Py_CLEAR(self->encoding);
1416 Py_CLEAR(self->encoder);
1417 Py_CLEAR(self->decoder);
1418 Py_CLEAR(self->readnl);
1419 Py_CLEAR(self->decoded_chars);
1420 Py_CLEAR(self->pending_bytes);
1421 Py_CLEAR(self->snapshot);
1422 Py_CLEAR(self->errors);
1423 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001424
1425 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001426 return 0;
1427}
1428
1429static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001430textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001431{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001432 self->finalizing = 1;
1433 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001435 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 _PyObject_GC_UNTRACK(self);
1437 if (self->weakreflist != NULL)
1438 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001439 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440 Py_TYPE(self)->tp_free((PyObject *)self);
1441}
1442
1443static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001444textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445{
1446 Py_VISIT(self->buffer);
1447 Py_VISIT(self->encoding);
1448 Py_VISIT(self->encoder);
1449 Py_VISIT(self->decoder);
1450 Py_VISIT(self->readnl);
1451 Py_VISIT(self->decoded_chars);
1452 Py_VISIT(self->pending_bytes);
1453 Py_VISIT(self->snapshot);
1454 Py_VISIT(self->errors);
1455 Py_VISIT(self->raw);
1456
1457 Py_VISIT(self->dict);
1458 return 0;
1459}
1460
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001461static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001462textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463
1464/* This macro takes some shortcuts to make the common case faster. */
1465#define CHECK_CLOSED(self) \
1466 do { \
1467 int r; \
1468 PyObject *_res; \
1469 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1470 if (self->raw != NULL) \
1471 r = _PyFileIO_closed(self->raw); \
1472 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001473 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474 if (_res == NULL) \
1475 return NULL; \
1476 r = PyObject_IsTrue(_res); \
1477 Py_DECREF(_res); \
1478 if (r < 0) \
1479 return NULL; \
1480 } \
1481 if (r > 0) { \
1482 PyErr_SetString(PyExc_ValueError, \
1483 "I/O operation on closed file."); \
1484 return NULL; \
1485 } \
1486 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001487 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 return NULL; \
1489 } while (0)
1490
1491#define CHECK_INITIALIZED(self) \
1492 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001493 PyErr_SetString(PyExc_ValueError, \
1494 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495 return NULL; \
1496 }
1497
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001498#define CHECK_ATTACHED(self) \
1499 CHECK_INITIALIZED(self); \
1500 if (self->detached) { \
1501 PyErr_SetString(PyExc_ValueError, \
1502 "underlying buffer has been detached"); \
1503 return NULL; \
1504 }
1505
1506#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001507 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001508 PyErr_SetString(PyExc_ValueError, \
1509 "I/O operation on uninitialized object"); \
1510 return -1; \
1511 } else if (self->detached) { \
1512 PyErr_SetString(PyExc_ValueError, \
1513 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514 return -1; \
1515 }
1516
1517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001518/*[clinic input]
1519_io.TextIOWrapper.detach
1520[clinic start generated code]*/
1521
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001522static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001523_io_TextIOWrapper_detach_impl(textio *self)
1524/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001525{
1526 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001527 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001528 res = _PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001529 if (res == NULL)
1530 return NULL;
1531 Py_DECREF(res);
1532 buffer = self->buffer;
1533 self->buffer = NULL;
1534 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001535 return buffer;
1536}
1537
Antoine Pitrou24f36292009-03-28 22:16:42 +00001538/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539 underlying buffered object, though. */
1540static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001541_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543 if (self->pending_bytes == NULL)
1544 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001545
Inada Naokibfba8c32019-05-16 15:03:20 +09001546 PyObject *pending = self->pending_bytes;
1547 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001548
Inada Naokibfba8c32019-05-16 15:03:20 +09001549 if (PyBytes_Check(pending)) {
1550 b = pending;
1551 Py_INCREF(b);
1552 }
1553 else if (PyUnicode_Check(pending)) {
1554 assert(PyUnicode_IS_ASCII(pending));
1555 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1556 b = PyBytes_FromStringAndSize(
1557 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1558 if (b == NULL) {
1559 return -1;
1560 }
1561 }
1562 else {
1563 assert(PyList_Check(pending));
1564 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1565 if (b == NULL) {
1566 return -1;
1567 }
1568
1569 char *buf = PyBytes_AsString(b);
1570 Py_ssize_t pos = 0;
1571
1572 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1573 PyObject *obj = PyList_GET_ITEM(pending, i);
1574 char *src;
1575 Py_ssize_t len;
1576 if (PyUnicode_Check(obj)) {
1577 assert(PyUnicode_IS_ASCII(obj));
1578 src = PyUnicode_DATA(obj);
1579 len = PyUnicode_GET_LENGTH(obj);
1580 }
1581 else {
1582 assert(PyBytes_Check(obj));
1583 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1584 Py_DECREF(b);
1585 return -1;
1586 }
1587 }
1588 memcpy(buf + pos, src, len);
1589 pos += len;
1590 }
1591 assert(pos == self->pending_bytes_count);
1592 }
1593
1594 self->pending_bytes_count = 0;
1595 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001596 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001597
1598 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001599 do {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001600 ret = _PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001601 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 Py_DECREF(b);
1603 if (ret == NULL)
1604 return -1;
1605 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 return 0;
1607}
1608
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001609/*[clinic input]
1610_io.TextIOWrapper.write
1611 text: unicode
1612 /
1613[clinic start generated code]*/
1614
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001616_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1617/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618{
1619 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 PyObject *b;
1621 Py_ssize_t textlen;
1622 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001623 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001625 if (PyUnicode_READY(text) == -1)
1626 return NULL;
1627
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001628 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 CHECK_CLOSED(self);
1630
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001631 if (self->encoder == NULL)
1632 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001634 Py_INCREF(text);
1635
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001636 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637
1638 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 haslf = 1;
1641
1642 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001643 PyObject *newtext = _PyObject_CallMethodId(
1644 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 Py_DECREF(text);
1646 if (newtext == NULL)
1647 return NULL;
1648 text = newtext;
1649 }
1650
Antoine Pitroue96ec682011-07-23 21:46:35 +02001651 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001652 text_needflush = 1;
1653 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001655 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 needflush = 1;
1657
1658 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001659 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001660 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1661 b = text;
1662 Py_INCREF(b);
1663 }
1664 else {
1665 b = (*self->encodefunc)((PyObject *) self, text);
1666 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001667 self->encoding_start_of_stream = 0;
1668 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 else
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001670 b = _PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naokibfba8c32019-05-16 15:03:20 +09001671
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 Py_DECREF(text);
1673 if (b == NULL)
1674 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001675 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001676 PyErr_Format(PyExc_TypeError,
1677 "encoder should return a bytes object, not '%.200s'",
1678 Py_TYPE(b)->tp_name);
1679 Py_DECREF(b);
1680 return NULL;
1681 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682
Inada Naokibfba8c32019-05-16 15:03:20 +09001683 Py_ssize_t bytes_len;
1684 if (b == text) {
1685 bytes_len = PyUnicode_GET_LENGTH(b);
1686 }
1687 else {
1688 bytes_len = PyBytes_GET_SIZE(b);
1689 }
1690
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001692 self->pending_bytes_count = 0;
1693 self->pending_bytes = b;
1694 }
1695 else if (!PyList_CheckExact(self->pending_bytes)) {
1696 PyObject *list = PyList_New(2);
1697 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 Py_DECREF(b);
1699 return NULL;
1700 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001701 PyList_SET_ITEM(list, 0, self->pending_bytes);
1702 PyList_SET_ITEM(list, 1, b);
1703 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001704 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001705 else {
1706 if (PyList_Append(self->pending_bytes, b) < 0) {
1707 Py_DECREF(b);
1708 return NULL;
1709 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001712
1713 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001714 if (self->pending_bytes_count > self->chunk_size || needflush ||
1715 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001716 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 return NULL;
1718 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001719
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 if (needflush) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001721 ret = _PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 if (ret == NULL)
1723 return NULL;
1724 Py_DECREF(ret);
1725 }
1726
Zackery Spytz23db9352018-06-29 04:14:58 -06001727 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 Py_CLEAR(self->snapshot);
1729
1730 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001731 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 if (ret == NULL)
1733 return NULL;
1734 Py_DECREF(ret);
1735 }
1736
1737 return PyLong_FromSsize_t(textlen);
1738}
1739
1740/* Steal a reference to chars and store it in the decoded_char buffer;
1741 */
1742static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001743textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001745 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 self->decoded_chars_used = 0;
1747}
1748
1749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001750textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751{
1752 PyObject *chars;
1753 Py_ssize_t avail;
1754
1755 if (self->decoded_chars == NULL)
1756 return PyUnicode_FromStringAndSize(NULL, 0);
1757
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001758 /* decoded_chars is guaranteed to be "ready". */
1759 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001760 - self->decoded_chars_used);
1761
1762 assert(avail >= 0);
1763
1764 if (n < 0 || n > avail)
1765 n = avail;
1766
1767 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 chars = PyUnicode_Substring(self->decoded_chars,
1769 self->decoded_chars_used,
1770 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 if (chars == NULL)
1772 return NULL;
1773 }
1774 else {
1775 chars = self->decoded_chars;
1776 Py_INCREF(chars);
1777 }
1778
1779 self->decoded_chars_used += n;
1780 return chars;
1781}
1782
1783/* Read and decode the next chunk of data from the BufferedReader.
1784 */
1785static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001786textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787{
1788 PyObject *dec_buffer = NULL;
1789 PyObject *dec_flags = NULL;
1790 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001791 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001792 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001793 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 int eof;
1795
1796 /* The return value is True unless EOF was reached. The decoded string is
1797 * placed in self._decoded_chars (replacing its previous value). The
1798 * entire input chunk is sent to the decoder, though some of it may remain
1799 * buffered in the decoder, yet to be converted.
1800 */
1801
1802 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001803 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 return -1;
1805 }
1806
1807 if (self->telling) {
1808 /* To prepare for tell(), we need to snapshot a point in the file
1809 * where the decoder's input buffer is empty.
1810 */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001811 PyObject *state = _PyObject_CallMethodNoArgs(self->decoder,
1812 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001813 if (state == NULL)
1814 return -1;
1815 /* Given this, we know there was a valid snapshot point
1816 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1817 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001818 if (!PyTuple_Check(state)) {
1819 PyErr_SetString(PyExc_TypeError,
1820 "illegal decoder state");
1821 Py_DECREF(state);
1822 return -1;
1823 }
1824 if (!PyArg_ParseTuple(state,
1825 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1826 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 Py_DECREF(state);
1828 return -1;
1829 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001830
1831 if (!PyBytes_Check(dec_buffer)) {
1832 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001833 "illegal decoder state: the first item should be a "
1834 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001835 Py_TYPE(dec_buffer)->tp_name);
1836 Py_DECREF(state);
1837 return -1;
1838 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 Py_INCREF(dec_buffer);
1840 Py_INCREF(dec_flags);
1841 Py_DECREF(state);
1842 }
1843
1844 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001845 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001846 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001847 }
1848 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 if (chunk_size == NULL)
1850 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001851
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001852 input_chunk = _PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001853 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001854 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001855 Py_DECREF(chunk_size);
1856 if (input_chunk == NULL)
1857 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001858
1859 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001860 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001861 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001862 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1863 Py_TYPE(input_chunk)->tp_name);
1864 goto fail;
1865 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866
Antoine Pitroub8503892014-04-29 10:14:02 +02001867 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001868 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869
INADA Naoki507434f2017-12-21 09:59:53 +09001870 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1871 PyBuffer_Release(&input_chunk_buf);
1872 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001873 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001874
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001875 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001876 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001877 if (nchars > 0)
1878 self->b2cratio = (double) nbytes / nchars;
1879 else
1880 self->b2cratio = 0.0;
1881 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 eof = 0;
1883
1884 if (self->telling) {
1885 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1886 * next input to be decoded is dec_buffer + input_chunk.
1887 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001888 PyObject *next_input = dec_buffer;
1889 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001890 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001891 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001892 goto fail;
1893 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001894 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1895 if (snapshot == NULL) {
1896 dec_flags = NULL;
1897 goto fail;
1898 }
1899 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001900 }
1901 Py_DECREF(input_chunk);
1902
1903 return (eof == 0);
1904
1905 fail:
1906 Py_XDECREF(dec_buffer);
1907 Py_XDECREF(dec_flags);
1908 Py_XDECREF(input_chunk);
1909 return -1;
1910}
1911
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001912/*[clinic input]
1913_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001914 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001915 /
1916[clinic start generated code]*/
1917
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001919_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001920/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922 PyObject *result = NULL, *chunks = NULL;
1923
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001924 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 CHECK_CLOSED(self);
1926
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001927 if (self->decoder == NULL)
1928 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001929
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001930 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931 return NULL;
1932
1933 if (n < 0) {
1934 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001935 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936 PyObject *decoded;
1937 if (bytes == NULL)
1938 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001939
1940 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1941 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1942 bytes, 1);
1943 else
1944 decoded = PyObject_CallMethodObjArgs(
1945 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001947 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948 goto fail;
1949
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001950 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951
1952 if (result == NULL) {
1953 Py_DECREF(decoded);
1954 return NULL;
1955 }
1956
1957 PyUnicode_AppendAndDel(&result, decoded);
1958 if (result == NULL)
1959 goto fail;
1960
Zackery Spytz23db9352018-06-29 04:14:58 -06001961 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 Py_CLEAR(self->snapshot);
1963 return result;
1964 }
1965 else {
1966 int res = 1;
1967 Py_ssize_t remaining = n;
1968
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001969 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001970 if (result == NULL)
1971 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001972 if (PyUnicode_READY(result) == -1)
1973 goto fail;
1974 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001975
1976 /* Keep reading chunks until we have n characters to return */
1977 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001978 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001979 if (res < 0) {
1980 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1981 when EINTR occurs so we needn't do it ourselves. */
1982 if (_PyIO_trap_eintr()) {
1983 continue;
1984 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001985 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001986 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987 if (res == 0) /* EOF */
1988 break;
1989 if (chunks == NULL) {
1990 chunks = PyList_New(0);
1991 if (chunks == NULL)
1992 goto fail;
1993 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001994 if (PyUnicode_GET_LENGTH(result) > 0 &&
1995 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996 goto fail;
1997 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001998 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 if (result == NULL)
2000 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002001 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 }
2003 if (chunks != NULL) {
2004 if (result != NULL && PyList_Append(chunks, result) < 0)
2005 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002006 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007 if (result == NULL)
2008 goto fail;
2009 Py_CLEAR(chunks);
2010 }
2011 return result;
2012 }
2013 fail:
2014 Py_XDECREF(result);
2015 Py_XDECREF(chunks);
2016 return NULL;
2017}
2018
2019
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002020/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 that is to the NUL character. Otherwise the function will produce
2022 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002023static const char *
2024find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002026 if (kind == PyUnicode_1BYTE_KIND) {
2027 assert(ch < 256);
2028 return (char *) memchr((void *) s, (char) ch, end - s);
2029 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002031 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002032 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002033 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 return s;
2035 if (s == end)
2036 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002037 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 }
2039}
2040
2041Py_ssize_t
2042_PyIO_find_line_ending(
2043 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002044 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002046 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047
2048 if (translated) {
2049 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002050 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002052 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 else {
2054 *consumed = len;
2055 return -1;
2056 }
2057 }
2058 else if (universal) {
2059 /* Universal newline search. Find any of \r, \r\n, \n
2060 * The decoder ensures that \r\n are not split in two pieces
2061 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002062 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002064 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002065 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002066 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002067 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002068 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 if (s >= end) {
2070 *consumed = len;
2071 return -1;
2072 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002073 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002074 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002076 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002078 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002079 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002081 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 }
2083 }
2084 }
2085 else {
2086 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002087 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02002088 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002089 /* Assume that readnl is an ASCII character. */
2090 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002091 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002092 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002093 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002094 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 *consumed = len;
2096 return -1;
2097 }
2098 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002099 const char *s = start;
2100 const char *e = end - (readnl_len - 1)*kind;
2101 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 if (e < s)
2103 e = s;
2104 while (s < e) {
2105 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002106 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 if (pos == NULL || pos >= e)
2108 break;
2109 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002110 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002111 break;
2112 }
2113 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002114 return (pos - start)/kind + readnl_len;
2115 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002116 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002117 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 if (pos == NULL)
2119 *consumed = len;
2120 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002121 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002122 return -1;
2123 }
2124 }
2125}
2126
2127static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002128_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129{
2130 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2131 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2132 int res;
2133
2134 CHECK_CLOSED(self);
2135
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002136 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002137 return NULL;
2138
2139 chunked = 0;
2140
2141 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002142 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002144 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145 Py_ssize_t consumed = 0;
2146
2147 /* First, get some data if necessary */
2148 res = 1;
2149 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002150 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002151 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002152 if (res < 0) {
2153 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2154 when EINTR occurs so we needn't do it ourselves. */
2155 if (_PyIO_trap_eintr()) {
2156 continue;
2157 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002159 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160 if (res == 0)
2161 break;
2162 }
2163 if (res == 0) {
2164 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002165 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 Py_CLEAR(self->snapshot);
2167 start = endpos = offset_to_buffer = 0;
2168 break;
2169 }
2170
2171 if (remaining == NULL) {
2172 line = self->decoded_chars;
2173 start = self->decoded_chars_used;
2174 offset_to_buffer = 0;
2175 Py_INCREF(line);
2176 }
2177 else {
2178 assert(self->decoded_chars_used == 0);
2179 line = PyUnicode_Concat(remaining, self->decoded_chars);
2180 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002181 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182 Py_CLEAR(remaining);
2183 if (line == NULL)
2184 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002185 if (PyUnicode_READY(line) == -1)
2186 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187 }
2188
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002189 ptr = PyUnicode_DATA(line);
2190 line_len = PyUnicode_GET_LENGTH(line);
2191 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192
2193 endpos = _PyIO_find_line_ending(
2194 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002195 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002196 ptr + kind * start,
2197 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002198 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199 if (endpos >= 0) {
2200 endpos += start;
2201 if (limit >= 0 && (endpos - start) + chunked >= limit)
2202 endpos = start + limit - chunked;
2203 break;
2204 }
2205
2206 /* We can put aside up to `endpos` */
2207 endpos = consumed + start;
2208 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2209 /* Didn't find line ending, but reached length limit */
2210 endpos = start + limit - chunked;
2211 break;
2212 }
2213
2214 if (endpos > start) {
2215 /* No line ending seen yet - put aside current data */
2216 PyObject *s;
2217 if (chunks == NULL) {
2218 chunks = PyList_New(0);
2219 if (chunks == NULL)
2220 goto error;
2221 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002222 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002223 if (s == NULL)
2224 goto error;
2225 if (PyList_Append(chunks, s) < 0) {
2226 Py_DECREF(s);
2227 goto error;
2228 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002229 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 Py_DECREF(s);
2231 }
2232 /* There may be some remaining bytes we'll have to prepend to the
2233 next chunk of data */
2234 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002235 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 if (remaining == NULL)
2237 goto error;
2238 }
2239 Py_CLEAR(line);
2240 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002241 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 }
2243
2244 if (line != NULL) {
2245 /* Our line ends in the current buffer */
2246 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002247 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2248 PyObject *s = PyUnicode_Substring(line, start, endpos);
2249 Py_CLEAR(line);
2250 if (s == NULL)
2251 goto error;
2252 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253 }
2254 }
2255 if (remaining != NULL) {
2256 if (chunks == NULL) {
2257 chunks = PyList_New(0);
2258 if (chunks == NULL)
2259 goto error;
2260 }
2261 if (PyList_Append(chunks, remaining) < 0)
2262 goto error;
2263 Py_CLEAR(remaining);
2264 }
2265 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002266 if (line != NULL) {
2267 if (PyList_Append(chunks, line) < 0)
2268 goto error;
2269 Py_DECREF(line);
2270 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2272 if (line == NULL)
2273 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002274 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002275 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002276 if (line == NULL) {
2277 Py_INCREF(_PyIO_empty_str);
2278 line = _PyIO_empty_str;
2279 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002280
2281 return line;
2282
2283 error:
2284 Py_XDECREF(chunks);
2285 Py_XDECREF(remaining);
2286 Py_XDECREF(line);
2287 return NULL;
2288}
2289
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002290/*[clinic input]
2291_io.TextIOWrapper.readline
2292 size: Py_ssize_t = -1
2293 /
2294[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002295
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002296static PyObject *
2297_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2298/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2299{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002300 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002301 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302}
2303
2304/* Seek and Tell */
2305
2306typedef struct {
2307 Py_off_t start_pos;
2308 int dec_flags;
2309 int bytes_to_feed;
2310 int chars_to_skip;
2311 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002312} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002313
2314/*
2315 To speed up cookie packing/unpacking, we store the fields in a temporary
2316 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2317 The following macros define at which offsets in the intermediary byte
2318 string the various CookieStruct fields will be stored.
2319 */
2320
2321#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2322
Christian Heimes743e0cd2012-10-17 23:52:17 +02002323#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002324/* We want the least significant byte of start_pos to also be the least
2325 significant byte of the cookie, which means that in big-endian mode we
2326 must copy the fields in reverse order. */
2327
2328# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2329# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2330# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2331# define OFF_CHARS_TO_SKIP (sizeof(char))
2332# define OFF_NEED_EOF 0
2333
2334#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002335/* Little-endian mode: the least significant byte of start_pos will
2336 naturally end up the least significant byte of the cookie. */
2337
2338# define OFF_START_POS 0
2339# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2340# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2341# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2342# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2343
2344#endif
2345
2346static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002347textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348{
2349 unsigned char buffer[COOKIE_BUF_LEN];
2350 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2351 if (cookieLong == NULL)
2352 return -1;
2353
2354 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002355 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356 Py_DECREF(cookieLong);
2357 return -1;
2358 }
2359 Py_DECREF(cookieLong);
2360
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002361 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2362 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2363 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2364 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2365 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366
2367 return 0;
2368}
2369
2370static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002371textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372{
2373 unsigned char buffer[COOKIE_BUF_LEN];
2374
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002375 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2376 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2377 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2378 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2379 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002380
Christian Heimes743e0cd2012-10-17 23:52:17 +02002381 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2382 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384
2385static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002386_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387{
2388 PyObject *res;
2389 /* When seeking to the start of the stream, we call decoder.reset()
2390 rather than decoder.getstate().
2391 This is for a few decoders such as utf-16 for which the state value
2392 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2393 utf-16, that we are expecting a BOM).
2394 */
2395 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002396 res = _PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002398 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2399 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 if (res == NULL)
2401 return -1;
2402 Py_DECREF(res);
2403 return 0;
2404}
2405
Antoine Pitroue4501852009-05-14 18:55:55 +00002406static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002407_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002408{
2409 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002410 if (start_of_stream) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002411 res = _PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002412 self->encoding_start_of_stream = 1;
2413 }
2414 else {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002415 res = _PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
2416 _PyLong_Zero);
Antoine Pitroue4501852009-05-14 18:55:55 +00002417 self->encoding_start_of_stream = 0;
2418 }
2419 if (res == NULL)
2420 return -1;
2421 Py_DECREF(res);
2422 return 0;
2423}
2424
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002425static int
2426_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2427{
2428 /* Same as _textiowrapper_decoder_setstate() above. */
2429 return _textiowrapper_encoder_reset(
2430 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2431}
2432
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002433/*[clinic input]
2434_io.TextIOWrapper.seek
2435 cookie as cookieObj: object
2436 whence: int = 0
2437 /
2438[clinic start generated code]*/
2439
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002441_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2442/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002444 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002445 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446 PyObject *res;
2447 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002448 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002450 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451 CHECK_CLOSED(self);
2452
2453 Py_INCREF(cookieObj);
2454
2455 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002456 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457 goto fail;
2458 }
2459
ngie-eign848037c2019-03-02 23:28:26 -08002460 switch (whence) {
2461 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002463 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464 if (cmp < 0)
2465 goto fail;
2466
2467 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002468 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 goto fail;
2470 }
2471
2472 /* Seeking to the current position should attempt to
2473 * sync the underlying buffer with the current position.
2474 */
2475 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002476 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477 if (cookieObj == NULL)
2478 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002479 break;
2480
ngie-eign848037c2019-03-02 23:28:26 -08002481 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002483 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484 if (cmp < 0)
2485 goto fail;
2486
2487 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002488 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 goto fail;
2490 }
2491
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002492 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493 if (res == NULL)
2494 goto fail;
2495 Py_DECREF(res);
2496
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002497 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498 Py_CLEAR(self->snapshot);
2499 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002500 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002501 if (res == NULL)
2502 goto fail;
2503 Py_DECREF(res);
2504 }
2505
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002506 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002507 Py_CLEAR(cookieObj);
2508 if (res == NULL)
2509 goto fail;
2510 if (self->encoder) {
2511 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002512 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002513 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2514 Py_DECREF(res);
2515 goto fail;
2516 }
2517 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002519
ngie-eign848037c2019-03-02 23:28:26 -08002520 case SEEK_SET:
2521 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002522
ngie-eign848037c2019-03-02 23:28:26 -08002523 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002525 "invalid whence (%d, should be %d, %d or %d)", whence,
2526 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527 goto fail;
2528 }
2529
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002530 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531 if (cmp < 0)
2532 goto fail;
2533
2534 if (cmp == 1) {
2535 PyErr_Format(PyExc_ValueError,
2536 "negative seek position %R", cookieObj);
2537 goto fail;
2538 }
2539
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002540 res = _PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541 if (res == NULL)
2542 goto fail;
2543 Py_DECREF(res);
2544
2545 /* The strategy of seek() is to go back to the safe start point
2546 * and replay the effect of read(chars_to_skip) from there.
2547 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002548 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 goto fail;
2550
2551 /* Seek back to the safe start point. */
2552 posobj = PyLong_FromOff_t(cookie.start_pos);
2553 if (posobj == NULL)
2554 goto fail;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002555 res = _PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556 Py_DECREF(posobj);
2557 if (res == NULL)
2558 goto fail;
2559 Py_DECREF(res);
2560
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002561 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562 Py_CLEAR(self->snapshot);
2563
2564 /* Restore the decoder to its state from the safe start point. */
2565 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 goto fail;
2568 }
2569
2570 if (cookie.chars_to_skip) {
2571 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002572 PyObject *input_chunk = _PyObject_CallMethodId(
2573 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 PyObject *decoded;
2575
2576 if (input_chunk == NULL)
2577 goto fail;
2578
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002579 if (!PyBytes_Check(input_chunk)) {
2580 PyErr_Format(PyExc_TypeError,
2581 "underlying read() should have returned a bytes "
2582 "object, not '%.200s'",
2583 Py_TYPE(input_chunk)->tp_name);
2584 Py_DECREF(input_chunk);
2585 goto fail;
2586 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002588 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2589 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 goto fail;
2591 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002592 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002594 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2595 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002597 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002598 goto fail;
2599
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002600 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601
2602 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002603 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002604 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002605 goto fail;
2606 }
2607 self->decoded_chars_used = cookie.chars_to_skip;
2608 }
2609 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002610 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2611 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002613 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 }
2615
Antoine Pitroue4501852009-05-14 18:55:55 +00002616 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2617 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002618 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002619 goto fail;
2620 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002621 return cookieObj;
2622 fail:
2623 Py_XDECREF(cookieObj);
2624 return NULL;
2625
2626}
2627
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002628/*[clinic input]
2629_io.TextIOWrapper.tell
2630[clinic start generated code]*/
2631
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002633_io_TextIOWrapper_tell_impl(textio *self)
2634/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635{
2636 PyObject *res;
2637 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002638 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639 PyObject *next_input;
2640 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002641 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002642 PyObject *saved_state = NULL;
2643 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002644 Py_ssize_t dec_buffer_len;
2645 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002646
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002647 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648 CHECK_CLOSED(self);
2649
2650 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002651 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002652 goto fail;
2653 }
2654 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002655 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656 "telling position disabled by next() call");
2657 goto fail;
2658 }
2659
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002660 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002662 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663 if (res == NULL)
2664 goto fail;
2665 Py_DECREF(res);
2666
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002667 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002668 if (posobj == NULL)
2669 goto fail;
2670
2671 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002672 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 return posobj;
2674 }
2675
2676#if defined(HAVE_LARGEFILE_SUPPORT)
2677 cookie.start_pos = PyLong_AsLongLong(posobj);
2678#else
2679 cookie.start_pos = PyLong_AsLong(posobj);
2680#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002681 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682 if (PyErr_Occurred())
2683 goto fail;
2684
2685 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002686 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002687 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688 goto fail;
2689
2690 assert (PyBytes_Check(next_input));
2691
2692 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2693
2694 /* How many decoded characters have been used up since the snapshot? */
2695 if (self->decoded_chars_used == 0) {
2696 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002697 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002698 }
2699
2700 chars_to_skip = self->decoded_chars_used;
2701
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002702 /* Decoder state will be restored at the end */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002703 saved_state = _PyObject_CallMethodNoArgs(self->decoder,
2704 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002705 if (saved_state == NULL)
2706 goto fail;
2707
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002708#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002709 PyObject *dec_buffer; \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002710 PyObject *_state = _PyObject_CallMethodNoArgs(self->decoder, \
2711 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002712 if (_state == NULL) \
2713 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002714 if (!PyTuple_Check(_state)) { \
2715 PyErr_SetString(PyExc_TypeError, \
2716 "illegal decoder state"); \
2717 Py_DECREF(_state); \
2718 goto fail; \
2719 } \
2720 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2721 &dec_buffer, &dec_flags)) \
2722 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002723 Py_DECREF(_state); \
2724 goto fail; \
2725 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002726 if (!PyBytes_Check(dec_buffer)) { \
2727 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002728 "illegal decoder state: the first item should be a " \
2729 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002730 Py_TYPE(dec_buffer)->tp_name); \
2731 Py_DECREF(_state); \
2732 goto fail; \
2733 } \
2734 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002735 Py_DECREF(_state); \
2736 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002738#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002739 PyObject *_decoded = _PyObject_CallMethodId( \
2740 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002741 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002742 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002743 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002744 Py_DECREF(_decoded); \
2745 } while (0)
2746
2747 /* Fast search for an acceptable start point, close to our
2748 current pos */
2749 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2750 skip_back = 1;
2751 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2752 input = PyBytes_AS_STRING(next_input);
2753 while (skip_bytes > 0) {
2754 /* Decode up to temptative start point */
2755 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2756 goto fail;
2757 DECODER_DECODE(input, skip_bytes, chars_decoded);
2758 if (chars_decoded <= chars_to_skip) {
2759 DECODER_GETSTATE();
2760 if (dec_buffer_len == 0) {
2761 /* Before pos and no bytes buffered in decoder => OK */
2762 cookie.dec_flags = dec_flags;
2763 chars_to_skip -= chars_decoded;
2764 break;
2765 }
2766 /* Skip back by buffered amount and reset heuristic */
2767 skip_bytes -= dec_buffer_len;
2768 skip_back = 1;
2769 }
2770 else {
2771 /* We're too far ahead, skip back a bit */
2772 skip_bytes -= skip_back;
2773 skip_back *= 2;
2774 }
2775 }
2776 if (skip_bytes <= 0) {
2777 skip_bytes = 0;
2778 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2779 goto fail;
2780 }
2781
2782 /* Note our initial start point. */
2783 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002784 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002785 if (chars_to_skip == 0)
2786 goto finally;
2787
2788 /* We should be close to the desired position. Now feed the decoder one
2789 * byte at a time until we reach the `chars_to_skip` target.
2790 * As we go, note the nearest "safe start point" before the current
2791 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002792 * can safely start from there and advance to this location).
2793 */
2794 chars_decoded = 0;
2795 input = PyBytes_AS_STRING(next_input);
2796 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002797 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002798 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002799 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002800
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002801 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002802 /* We got n chars for 1 byte */
2803 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002804 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002805 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002806
2807 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2808 /* Decoder buffer is empty, so this is a safe start point. */
2809 cookie.start_pos += cookie.bytes_to_feed;
2810 chars_to_skip -= chars_decoded;
2811 cookie.dec_flags = dec_flags;
2812 cookie.bytes_to_feed = 0;
2813 chars_decoded = 0;
2814 }
2815 if (chars_decoded >= chars_to_skip)
2816 break;
2817 input++;
2818 }
2819 if (input == input_end) {
2820 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002821 PyObject *decoded = _PyObject_CallMethodId(
2822 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002823 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002824 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002825 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002826 Py_DECREF(decoded);
2827 cookie.need_eof = 1;
2828
2829 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002830 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002831 "can't reconstruct logical file position");
2832 goto fail;
2833 }
2834 }
2835
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002836finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002837 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002838 Py_DECREF(saved_state);
2839 if (res == NULL)
2840 return NULL;
2841 Py_DECREF(res);
2842
2843 /* The returned cookie corresponds to the last safe start point. */
2844 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002845 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002846
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002847fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002848 if (saved_state) {
2849 PyObject *type, *value, *traceback;
2850 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002851 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002852 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002854 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002855 }
2856 return NULL;
2857}
2858
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002859/*[clinic input]
2860_io.TextIOWrapper.truncate
2861 pos: object = None
2862 /
2863[clinic start generated code]*/
2864
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002865static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002866_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2867/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002868{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869 PyObject *res;
2870
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002871 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002872
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002873 res = _PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874 if (res == NULL)
2875 return NULL;
2876 Py_DECREF(res);
2877
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002878 return _PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002879}
2880
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002881static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002882textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002883{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002884 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002885 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002886
2887 CHECK_INITIALIZED(self);
2888
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002889 res = PyUnicode_FromString("<_io.TextIOWrapper");
2890 if (res == NULL)
2891 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002892
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002893 status = Py_ReprEnter((PyObject *)self);
2894 if (status != 0) {
2895 if (status > 0) {
2896 PyErr_Format(PyExc_RuntimeError,
2897 "reentrant call inside %s.__repr__",
2898 Py_TYPE(self)->tp_name);
2899 }
2900 goto error;
2901 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002902 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2903 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002904 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002905 }
2906 /* Ignore ValueError raised if the underlying stream was detached */
2907 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002908 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002909 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002910 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002911 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002912 if (s == NULL)
2913 goto error;
2914 PyUnicode_AppendAndDel(&res, s);
2915 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002916 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002917 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002918 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2919 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002920 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002921 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002922 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2923 Py_DECREF(modeobj);
2924 if (s == NULL)
2925 goto error;
2926 PyUnicode_AppendAndDel(&res, s);
2927 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002928 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002929 }
2930 s = PyUnicode_FromFormat("%U encoding=%R>",
2931 res, self->encoding);
2932 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002933 if (status == 0) {
2934 Py_ReprLeave((PyObject *)self);
2935 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002936 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002937
2938 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002939 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002940 if (status == 0) {
2941 Py_ReprLeave((PyObject *)self);
2942 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002943 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002944}
2945
2946
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002947/* Inquiries */
2948
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002949/*[clinic input]
2950_io.TextIOWrapper.fileno
2951[clinic start generated code]*/
2952
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002953static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002954_io_TextIOWrapper_fileno_impl(textio *self)
2955/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002956{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002957 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002958 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002959}
2960
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002961/*[clinic input]
2962_io.TextIOWrapper.seekable
2963[clinic start generated code]*/
2964
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002965static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002966_io_TextIOWrapper_seekable_impl(textio *self)
2967/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002968{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002969 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002970 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002971}
2972
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002973/*[clinic input]
2974_io.TextIOWrapper.readable
2975[clinic start generated code]*/
2976
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002977static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002978_io_TextIOWrapper_readable_impl(textio *self)
2979/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002980{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002981 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002982 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002983}
2984
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002985/*[clinic input]
2986_io.TextIOWrapper.writable
2987[clinic start generated code]*/
2988
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002989static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002990_io_TextIOWrapper_writable_impl(textio *self)
2991/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002992{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002993 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002994 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002995}
2996
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002997/*[clinic input]
2998_io.TextIOWrapper.isatty
2999[clinic start generated code]*/
3000
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003001static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003002_io_TextIOWrapper_isatty_impl(textio *self)
3003/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003004{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003005 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003006 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003007}
3008
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003009/*[clinic input]
3010_io.TextIOWrapper.flush
3011[clinic start generated code]*/
3012
Antoine Pitrou243757e2010-11-05 21:15:39 +00003013static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003014_io_TextIOWrapper_flush_impl(textio *self)
3015/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003016{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003017 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003018 CHECK_CLOSED(self);
3019 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003020 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003021 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003022 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003023}
3024
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003025/*[clinic input]
3026_io.TextIOWrapper.close
3027[clinic start generated code]*/
3028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003029static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003030_io_TextIOWrapper_close_impl(textio *self)
3031/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003032{
3033 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003034 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003035 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003036
Antoine Pitrou6be88762010-05-03 16:48:20 +00003037 res = textiowrapper_closed_get(self, NULL);
3038 if (res == NULL)
3039 return NULL;
3040 r = PyObject_IsTrue(res);
3041 Py_DECREF(res);
3042 if (r < 0)
3043 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003044
Antoine Pitrou6be88762010-05-03 16:48:20 +00003045 if (r > 0) {
3046 Py_RETURN_NONE; /* stream already closed */
3047 }
3048 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003049 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003050 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003051 res = _PyObject_CallMethodIdOneArg(self->buffer,
3052 &PyId__dealloc_warn,
3053 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003054 if (res)
3055 Py_DECREF(res);
3056 else
3057 PyErr_Clear();
3058 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003059 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003060 if (res == NULL)
3061 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003062 else
3063 Py_DECREF(res);
3064
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003065 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003066 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003067 _PyErr_ChainExceptions(exc, val, tb);
3068 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003069 }
3070 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003071 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003072}
3073
3074static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003075textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003076{
3077 PyObject *line;
3078
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003079 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003080
3081 self->telling = 0;
3082 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
3083 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003084 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003085 }
3086 else {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003087 line = _PyObject_CallMethodNoArgs((PyObject *)self,
3088 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003089 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003090 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003091 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003092 "not '%.200s'", Py_TYPE(line)->tp_name);
3093 Py_DECREF(line);
3094 return NULL;
3095 }
3096 }
3097
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003098 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003099 return NULL;
3100
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003101 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003102 /* Reached EOF or would have blocked */
3103 Py_DECREF(line);
3104 Py_CLEAR(self->snapshot);
3105 self->telling = self->seekable;
3106 return NULL;
3107 }
3108
3109 return line;
3110}
3111
3112static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003113textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003114{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003115 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003116 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003117}
3118
3119static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003120textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003121{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003122 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003123 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3124}
3125
3126static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003127textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003128{
3129 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003130 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003131 if (self->decoder == NULL ||
3132 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3133 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003134 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003135 }
3136 return res;
3137}
3138
3139static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003140textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003141{
3142 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003143 Py_INCREF(self->errors);
3144 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003145}
3146
3147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003148textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003149{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003150 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003151 return PyLong_FromSsize_t(self->chunk_size);
3152}
3153
3154static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003155textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003156{
3157 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003158 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003159 if (arg == NULL) {
3160 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3161 return -1;
3162 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003163 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003164 if (n == -1 && PyErr_Occurred())
3165 return -1;
3166 if (n <= 0) {
3167 PyErr_SetString(PyExc_ValueError,
3168 "a strictly positive integer is required");
3169 return -1;
3170 }
3171 self->chunk_size = n;
3172 return 0;
3173}
3174
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003175#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003176
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003177static PyMethodDef incrementalnewlinedecoder_methods[] = {
3178 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3179 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3180 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3181 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3182 {NULL}
3183};
3184
3185static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3186 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3187 {NULL}
3188};
3189
3190PyTypeObject PyIncrementalNewlineDecoder_Type = {
3191 PyVarObject_HEAD_INIT(NULL, 0)
3192 "_io.IncrementalNewlineDecoder", /*tp_name*/
3193 sizeof(nldecoder_object), /*tp_basicsize*/
3194 0, /*tp_itemsize*/
3195 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003196 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003197 0, /*tp_getattr*/
3198 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003199 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003200 0, /*tp_repr*/
3201 0, /*tp_as_number*/
3202 0, /*tp_as_sequence*/
3203 0, /*tp_as_mapping*/
3204 0, /*tp_hash */
3205 0, /*tp_call*/
3206 0, /*tp_str*/
3207 0, /*tp_getattro*/
3208 0, /*tp_setattro*/
3209 0, /*tp_as_buffer*/
3210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3211 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3212 0, /* tp_traverse */
3213 0, /* tp_clear */
3214 0, /* tp_richcompare */
3215 0, /*tp_weaklistoffset*/
3216 0, /* tp_iter */
3217 0, /* tp_iternext */
3218 incrementalnewlinedecoder_methods, /* tp_methods */
3219 0, /* tp_members */
3220 incrementalnewlinedecoder_getset, /* tp_getset */
3221 0, /* tp_base */
3222 0, /* tp_dict */
3223 0, /* tp_descr_get */
3224 0, /* tp_descr_set */
3225 0, /* tp_dictoffset */
3226 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3227 0, /* tp_alloc */
3228 PyType_GenericNew, /* tp_new */
3229};
3230
3231
3232static PyMethodDef textiowrapper_methods[] = {
3233 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003234 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003235 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3236 _IO_TEXTIOWRAPPER_READ_METHODDEF
3237 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3238 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3239 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3240
3241 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3242 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3243 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3244 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3245 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003246
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003247 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3248 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3249 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003250 {NULL, NULL}
3251};
3252
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003253static PyMemberDef textiowrapper_members[] = {
3254 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3255 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3256 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003257 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003258 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003259 {NULL}
3260};
3261
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003262static PyGetSetDef textiowrapper_getset[] = {
3263 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3264 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003265/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3266*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003267 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3268 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3269 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3270 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003271 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003272};
3273
3274PyTypeObject PyTextIOWrapper_Type = {
3275 PyVarObject_HEAD_INIT(NULL, 0)
3276 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003277 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003278 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003279 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003280 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003281 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003282 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003283 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003284 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003285 0, /*tp_as_number*/
3286 0, /*tp_as_sequence*/
3287 0, /*tp_as_mapping*/
3288 0, /*tp_hash */
3289 0, /*tp_call*/
3290 0, /*tp_str*/
3291 0, /*tp_getattro*/
3292 0, /*tp_setattro*/
3293 0, /*tp_as_buffer*/
3294 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003295 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003296 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003297 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3298 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003299 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003300 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003301 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003302 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3303 textiowrapper_methods, /* tp_methods */
3304 textiowrapper_members, /* tp_members */
3305 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003306 0, /* tp_base */
3307 0, /* tp_dict */
3308 0, /* tp_descr_get */
3309 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003310 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003311 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003312 0, /* tp_alloc */
3313 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003314 0, /* tp_free */
3315 0, /* tp_is_gc */
3316 0, /* tp_bases */
3317 0, /* tp_mro */
3318 0, /* tp_cache */
3319 0, /* tp_subclasses */
3320 0, /* tp_weaklist */
3321 0, /* tp_del */
3322 0, /* tp_version_tag */
3323 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003324};