blob: eb05ae1a16eb03adbdb01c0e1bcd769621d10bc3 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "pycore_interp.h" // PyInterpreterState.fs_codec
Victor Stinner37834132020-10-27 17:12:53 +010012#include "pycore_long.h" // _PyLong_GetZero()
Victor Stinner710e8262020-10-31 01:02:09 +010013#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
Victor Stinnerbcda8f12018-11-21 22:27:47 +010014#include "pycore_object.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020015#include "pycore_pystate.h" // _PyInterpreterState_GET()
16#include "structmember.h" // PyMemberDef
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017#include "_iomodule.h"
18
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030019/*[clinic input]
20module _io
21class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
23[clinic start generated code]*/
24/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
25
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(close);
27_Py_IDENTIFIER(_dealloc_warn);
28_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020029_Py_IDENTIFIER(fileno);
30_Py_IDENTIFIER(flush);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020032_Py_IDENTIFIER(mode);
33_Py_IDENTIFIER(name);
34_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020035_Py_IDENTIFIER(read);
36_Py_IDENTIFIER(readable);
37_Py_IDENTIFIER(replace);
38_Py_IDENTIFIER(reset);
39_Py_IDENTIFIER(seek);
40_Py_IDENTIFIER(seekable);
41_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090042_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020043_Py_IDENTIFIER(tell);
44_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020045
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046/* TextIOBase */
47
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000048PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049 "Base class for text I/O.\n"
50 "\n"
51 "This class provides a character and line based interface to stream\n"
52 "I/O. There is no readinto method because Python's character strings\n"
53 "are immutable. There is no public constructor.\n"
54 );
55
56static PyObject *
57_unsupported(const char *message)
58{
Antoine Pitrou712cb732013-12-21 15:51:54 +010059 _PyIO_State *state = IO_STATE();
60 if (state != NULL)
61 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 return NULL;
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000066 "Separate the underlying buffer from the TextIOBase and return it.\n"
67 "\n"
68 "After the underlying buffer has been detached, the TextIO is in an\n"
69 "unusable state.\n"
70 );
71
72static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053073textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000074{
75 return _unsupported("detach");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read at most n characters from stream.\n"
80 "\n"
81 "Read from underlying buffer until we have n characters or we hit EOF.\n"
82 "If n is negative or omitted, read until EOF.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("read");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Read until newline or EOF.\n"
93 "\n"
94 "Returns an empty string if EOF is hit immediately.\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("readline");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Write string to stream.\n"
105 "Returns the number of characters written (which is always equal to\n"
106 "the length of the string).\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 return _unsupported("write");
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Encoding of the text stream.\n"
117 "\n"
118 "Subclasses should override.\n"
119 );
120
121static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000122textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123{
124 Py_RETURN_NONE;
125}
126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000127PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000128 "Line endings translated so far.\n"
129 "\n"
130 "Only line endings translated during reading are considered.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000142 "The error setting of the decoder or encoder.\n"
143 "\n"
144 "Subclasses should override.\n"
145 );
146
147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000148textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000149{
150 Py_RETURN_NONE;
151}
152
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000154static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530155 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
157 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
158 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000159 {NULL, NULL}
160};
161
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000162static PyGetSetDef textiobase_getset[] = {
163 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
164 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
165 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000166 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167};
168
169PyTypeObject PyTextIOBase_Type = {
170 PyVarObject_HEAD_INIT(NULL, 0)
171 "_io._TextIOBase", /*tp_name*/
172 0, /*tp_basicsize*/
173 0, /*tp_itemsize*/
174 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200175 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 0, /*tp_getattr*/
177 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200178 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash */
184 0, /*tp_call*/
185 0, /*tp_str*/
186 0, /*tp_getattro*/
187 0, /*tp_setattro*/
188 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000190 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 0, /* tp_traverse */
192 0, /* tp_clear */
193 0, /* tp_richcompare */
194 0, /* tp_weaklistoffset */
195 0, /* tp_iter */
196 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 &PyIOBase_Type, /* tp_base */
201 0, /* tp_dict */
202 0, /* tp_descr_get */
203 0, /* tp_descr_set */
204 0, /* tp_dictoffset */
205 0, /* tp_init */
206 0, /* tp_alloc */
207 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200208 0, /* tp_free */
209 0, /* tp_is_gc */
210 0, /* tp_bases */
211 0, /* tp_mro */
212 0, /* tp_cache */
213 0, /* tp_subclasses */
214 0, /* tp_weaklist */
215 0, /* tp_del */
216 0, /* tp_version_tag */
217 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218};
219
220
221/* IncrementalNewlineDecoder */
222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300232/*[clinic input]
233_io.IncrementalNewlineDecoder.__init__
234 decoder: object
235 translate: int
236 errors: object(c_default="NULL") = "strict"
237
238Codec used when reading a file in universal newlines mode.
239
240It wraps another incremental decoder, translating \r\n and \r into \n.
241It also records the types of newlines encountered. When used with
242translate=False, it ensures that the newline sequence is returned in
243one piece. When used with decoder=None, it expects unicode strings as
244decode input and translates newlines without first invoking an external
245decoder.
246[clinic start generated code]*/
247
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000248static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300249_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
250 PyObject *decoder, int translate,
251 PyObject *errors)
252/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254 self->decoder = decoder;
255 Py_INCREF(decoder);
256
257 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900258 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259 if (self->errors == NULL)
260 return -1;
261 }
262 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263 self->errors = errors;
264 }
INADA Naoki507434f2017-12-21 09:59:53 +0900265 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266
Xiang Zhangb08746b2018-10-31 19:49:16 +0800267 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 self->seennl = 0;
269 self->pendingcr = 0;
270
271 return 0;
272}
273
274static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000275incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276{
277 Py_CLEAR(self->decoder);
278 Py_CLEAR(self->errors);
279 Py_TYPE(self)->tp_free((PyObject *)self);
280}
281
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200282static int
283check_decoded(PyObject *decoded)
284{
285 if (decoded == NULL)
286 return -1;
287 if (!PyUnicode_Check(decoded)) {
288 PyErr_Format(PyExc_TypeError,
289 "decoder should return a string result, not '%.200s'",
290 Py_TYPE(decoded)->tp_name);
291 Py_DECREF(decoded);
292 return -1;
293 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200294 if (PyUnicode_READY(decoded) < 0) {
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200298 return 0;
299}
300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301#define SEEN_CR 1
302#define SEEN_LF 2
303#define SEEN_CRLF 4
304#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
305
306PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200307_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 PyObject *input, int final)
309{
310 PyObject *output;
311 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200312 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314 if (self->decoder == NULL) {
315 PyErr_SetString(PyExc_ValueError,
316 "IncrementalNewlineDecoder.__init__ not called");
317 return NULL;
318 }
319
320 /* decode input (with the eventual \r from a previous pass) */
321 if (self->decoder != Py_None) {
322 output = PyObject_CallMethodObjArgs(self->decoder,
323 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
324 }
325 else {
326 output = input;
327 Py_INCREF(output);
328 }
329
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200330 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 return NULL;
332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 /* Prefix output with CR */
336 int kind;
337 PyObject *modified;
338 char *out;
339
340 modified = PyUnicode_New(output_len + 1,
341 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 if (modified == NULL)
343 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 kind = PyUnicode_KIND(modified);
345 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300346 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200347 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200349 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self->pendingcr = 0;
351 output_len++;
352 }
353
354 /* retain last \r even when not translating data:
355 * then readline() is sure to get \r\n in one pass
356 */
357 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000358 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
360 {
361 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
362 if (modified == NULL)
363 goto error;
364 Py_DECREF(output);
365 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 self->pendingcr = 1;
367 }
368 }
369
370 /* Record which newlines are read and do newline translation if desired,
371 all in one pass. */
372 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300373 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 Py_ssize_t len;
375 int seennl = self->seennl;
376 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 in_str = PyUnicode_DATA(output);
380 len = PyUnicode_GET_LENGTH(output);
381 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
383 if (len == 0)
384 return output;
385
386 /* If, up to now, newlines are consistently \n, do a quick check
387 for the \r *byte* with the libc's optimized memchr.
388 */
389 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200390 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 }
392
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 if (only_lf) {
394 /* If not already seen, quick scan for a possible "\n" character.
395 (there's nothing else to be done, even when in translation mode)
396 */
397 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200398 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100399 if (kind == PyUnicode_1BYTE_KIND)
400 seennl |= SEEN_LF;
401 else {
402 Py_ssize_t i = 0;
403 for (;;) {
404 Py_UCS4 c;
405 /* Fast loop for non-control characters */
406 while (PyUnicode_READ(kind, in_str, i) > '\n')
407 i++;
408 c = PyUnicode_READ(kind, in_str, i++);
409 if (c == '\n') {
410 seennl |= SEEN_LF;
411 break;
412 }
413 if (i >= len)
414 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000416 }
417 }
418 /* Finished: we have scanned for newlines, and none of them
419 need translating */
420 }
421 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200422 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000423 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 if (seennl == SEEN_ALL)
425 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 while (PyUnicode_READ(kind, in_str, i) > '\r')
430 i++;
431 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 if (c == '\n')
433 seennl |= SEEN_LF;
434 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 }
439 else
440 seennl |= SEEN_CR;
441 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 break;
444 if (seennl == SEEN_ALL)
445 break;
446 }
447 endscan:
448 ;
449 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000450 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 void *translated;
452 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300453 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 Py_ssize_t in, out;
455 /* XXX: Previous in-place translation here is disabled as
456 resizing is not possible anymore */
457 /* We could try to optimize this so that we only do a copy
458 when there is something to translate. On the other hand,
459 we already know there is a \r byte, so chances are high
460 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200461 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (translated == NULL) {
463 PyErr_NoMemory();
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 seennl |= SEEN_LF;
475 continue;
476 }
477 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 in++;
480 seennl |= SEEN_CRLF;
481 }
482 else
483 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 continue;
486 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_DECREF(output);
492 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100493 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200495 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000496 }
497 self->seennl |= seennl;
498 }
499
500 return output;
501
502 error:
503 Py_DECREF(output);
504 return NULL;
505}
506
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300507/*[clinic input]
508_io.IncrementalNewlineDecoder.decode
509 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200510 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300513static PyObject *
514_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
515 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200516/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
519}
520
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300521/*[clinic input]
522_io.IncrementalNewlineDecoder.getstate
523[clinic start generated code]*/
524
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300526_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
527/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528{
529 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700530 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000531
532 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100533 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200534 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535 if (state == NULL)
536 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300537 if (!PyTuple_Check(state)) {
538 PyErr_SetString(PyExc_TypeError,
539 "illegal decoder state");
540 Py_DECREF(state);
541 return NULL;
542 }
543 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
544 &buffer, &flag))
545 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546 Py_DECREF(state);
547 return NULL;
548 }
549 Py_INCREF(buffer);
550 Py_DECREF(state);
551 }
552 else {
553 buffer = PyBytes_FromString("");
554 flag = 0;
555 }
556 flag <<= 1;
557 if (self->pendingcr)
558 flag |= 1;
559 return Py_BuildValue("NK", buffer, flag);
560}
561
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300562/*[clinic input]
563_io.IncrementalNewlineDecoder.setstate
564 state: object
565 /
566[clinic start generated code]*/
567
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300569_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
570 PyObject *state)
571/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700574 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 if (!PyTuple_Check(state)) {
577 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300579 }
580 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
581 &buffer, &flag))
582 {
583 return NULL;
584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585
Victor Stinner7d7e7752014-06-17 23:31:25 +0200586 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 flag >>= 1;
588
589 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200590 return _PyObject_CallMethodId(self->decoder,
591 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 else
593 Py_RETURN_NONE;
594}
595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300596/*[clinic input]
597_io.IncrementalNewlineDecoder.reset
598[clinic start generated code]*/
599
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300601_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 self->seennl = 0;
605 self->pendingcr = 0;
606 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100607 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 else
609 Py_RETURN_NONE;
610}
611
612static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614{
615 switch (self->seennl) {
616 case SEEN_CR:
617 return PyUnicode_FromString("\r");
618 case SEEN_LF:
619 return PyUnicode_FromString("\n");
620 case SEEN_CRLF:
621 return PyUnicode_FromString("\r\n");
622 case SEEN_CR | SEEN_LF:
623 return Py_BuildValue("ss", "\r", "\n");
624 case SEEN_CR | SEEN_CRLF:
625 return Py_BuildValue("ss", "\r", "\r\n");
626 case SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("ss", "\n", "\r\n");
628 case SEEN_CR | SEEN_LF | SEEN_CRLF:
629 return Py_BuildValue("sss", "\r", "\n", "\r\n");
630 default:
631 Py_RETURN_NONE;
632 }
633
634}
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636/* TextIOWrapper */
637
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000638typedef PyObject *
639 (*encodefunc_t)(PyObject *, PyObject *);
640
641typedef struct
642{
643 PyObject_HEAD
644 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000645 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 Py_ssize_t chunk_size;
647 PyObject *buffer;
648 PyObject *encoding;
649 PyObject *encoder;
650 PyObject *decoder;
651 PyObject *readnl;
652 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900653 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200655 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200660 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200662 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 /* Specialized encoding func (see below) */
664 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000665 /* Whether or not it's the start of the stream */
666 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 /* Reads and writes are internally buffered in order to speed things up.
669 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 Please also note that text to be written is first encoded before being
672 buffered. This is necessary so that encoding errors are immediately
673 reported to the caller, but it unfortunately means that the
674 IncrementalEncoder (whose encode() method is always written in Python)
675 becomes a bottleneck for small writes.
676 */
677 PyObject *decoded_chars; /* buffer for text returned from decoder */
678 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900679 PyObject *pending_bytes; // data waiting to be written.
680 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000682
Oren Milman13614e32017-08-24 19:51:24 +0300683 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 * dec_flags is the second (integer) item of the decoder state and
685 * next_input is the chunk of input bytes that comes next after the
686 * snapshot point. We use this to reconstruct decoder states in tell().
687 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000688 PyObject *snapshot;
689 /* Bytes-to-characters ratio for the current chunk. Serves as input for
690 the heuristic in tell(). */
691 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692
693 /* Cache raw object if it's a FileIO object */
694 PyObject *raw;
695
696 PyObject *weakreflist;
697 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000698} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
Zackery Spytz23db9352018-06-29 04:14:58 -0600700static void
701textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703/* A couple of specialized cases in order to bypass the slow incremental
704 encoding methods for the most popular encodings. */
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
INADA Naoki507434f2017-12-21 09:59:53 +0900709 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100715 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900716 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717}
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100722 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900723 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724}
725
726static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000727utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728{
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 if (!self->encoding_start_of_stream) {
730 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200731#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000736 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900738 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
Antoine Pitroue4501852009-05-14 18:55:55 +0000741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900745 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900752 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000757{
758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200760#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf32be_encode(self, text);
762#else
763 return utf32le_encode(self, text);
764#endif
765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900767 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
INADA Naoki507434f2017-12-21 09:59:53 +0900773 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000777latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778{
INADA Naoki507434f2017-12-21 09:59:53 +0900779 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780}
781
Inada Naokibfba8c32019-05-16 15:03:20 +0900782// Return true when encoding can be skipped when text is ascii.
783static inline int
784is_asciicompat_encoding(encodefunc_t f)
785{
786 return f == (encodefunc_t) ascii_encode
787 || f == (encodefunc_t) latin1_encode
788 || f == (encodefunc_t) utf8_encode;
789}
790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791/* Map normalized encoding names onto the specialized encoding funcs */
792
793typedef struct {
794 const char *name;
795 encodefunc_t encodefunc;
796} encodefuncentry;
797
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200798static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"ascii", (encodefunc_t) ascii_encode},
800 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000801 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"utf-16-be", (encodefunc_t) utf16be_encode},
803 {"utf-16-le", (encodefunc_t) utf16le_encode},
804 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-32-be", (encodefunc_t) utf32be_encode},
806 {"utf-32-le", (encodefunc_t) utf32le_encode},
807 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 {NULL, NULL}
809};
810
INADA Naoki507434f2017-12-21 09:59:53 +0900811static int
812validate_newline(const char *newline)
813{
814 if (newline && newline[0] != '\0'
815 && !(newline[0] == '\n' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\0')
817 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
818 PyErr_Format(PyExc_ValueError,
819 "illegal newline value: %s", newline);
820 return -1;
821 }
822 return 0;
823}
824
825static int
826set_newline(textio *self, const char *newline)
827{
828 PyObject *old = self->readnl;
829 if (newline == NULL) {
830 self->readnl = NULL;
831 }
832 else {
833 self->readnl = PyUnicode_FromString(newline);
834 if (self->readnl == NULL) {
835 self->readnl = old;
836 return -1;
837 }
838 }
839 self->readuniversal = (newline == NULL || newline[0] == '\0');
840 self->readtranslate = (newline == NULL);
841 self->writetranslate = (newline == NULL || newline[0] != '\0');
842 if (!self->readuniversal && self->readnl != NULL) {
843 // validate_newline() accepts only ASCII newlines.
844 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
845 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
846 if (strcmp(self->writenl, "\n") == 0) {
847 self->writenl = NULL;
848 }
849 }
850 else {
851#ifdef MS_WINDOWS
852 self->writenl = "\r\n";
853#else
854 self->writenl = NULL;
855#endif
856 }
857 Py_XDECREF(old);
858 return 0;
859}
860
861static int
862_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
863 const char *errors)
864{
865 PyObject *res;
866 int r;
867
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200868 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900869 if (res == NULL)
870 return -1;
871
872 r = PyObject_IsTrue(res);
873 Py_DECREF(res);
874 if (r == -1)
875 return -1;
876
877 if (r != 1)
878 return 0;
879
880 Py_CLEAR(self->decoder);
881 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
882 if (self->decoder == NULL)
883 return -1;
884
885 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300886 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900887 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300888 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900889 if (incrementalDecoder == NULL)
890 return -1;
891 Py_CLEAR(self->decoder);
892 self->decoder = incrementalDecoder;
893 }
894
895 return 0;
896}
897
898static PyObject*
899_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
900{
901 PyObject *chars;
902
Andy Lesterdffe4c02020-03-04 07:15:20 -0600903 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900904 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
905 else
906 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
907 eof ? Py_True : Py_False, NULL);
908
909 if (check_decoded(chars) < 0)
910 // check_decoded already decreases refcount
911 return NULL;
912
913 return chars;
914}
915
916static int
917_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
918 const char *errors)
919{
920 PyObject *res;
921 int r;
922
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200923 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900924 if (res == NULL)
925 return -1;
926
927 r = PyObject_IsTrue(res);
928 Py_DECREF(res);
929 if (r == -1)
930 return -1;
931
932 if (r != 1)
933 return 0;
934
935 Py_CLEAR(self->encoder);
936 self->encodefunc = NULL;
937 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
938 if (self->encoder == NULL)
939 return -1;
940
941 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
943 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900944 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200945 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900946 const encodefuncentry *e = encodefuncs;
947 while (e->name != NULL) {
948 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
949 self->encodefunc = e->encodefunc;
950 break;
951 }
952 e++;
953 }
954 }
955 Py_XDECREF(res);
956
957 return 0;
958}
959
960static int
961_textiowrapper_fix_encoder_state(textio *self)
962{
963 if (!self->seekable || !self->encoder) {
964 return 0;
965 }
966
967 self->encoding_start_of_stream = 1;
968
Petr Viktorinffd97532020-02-11 17:46:57 +0100969 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200970 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900971 if (cookieObj == NULL) {
972 return -1;
973 }
974
Victor Stinner37834132020-10-27 17:12:53 +0100975 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
INADA Naoki507434f2017-12-21 09:59:53 +0900976 Py_DECREF(cookieObj);
977 if (cmp < 0) {
978 return -1;
979 }
980
981 if (cmp == 0) {
982 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100983 PyObject *res = PyObject_CallMethodOneArg(
Victor Stinner37834132020-10-27 17:12:53 +0100984 self->encoder, _PyIO_str_setstate, _PyLong_GetZero());
INADA Naoki507434f2017-12-21 09:59:53 +0900985 if (res == NULL) {
986 return -1;
987 }
988 Py_DECREF(res);
989 }
990
991 return 0;
992}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993
Victor Stinner22eb6892019-06-26 00:51:05 +0200994static int
995io_check_errors(PyObject *errors)
996{
997 assert(errors != NULL && errors != Py_None);
998
Victor Stinner81a7be32020-04-14 15:14:01 +0200999 PyInterpreterState *interp = _PyInterpreterState_GET();
Victor Stinner22eb6892019-06-26 00:51:05 +02001000#ifndef Py_DEBUG
1001 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001002 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001003 return 0;
1004 }
1005#else
1006 /* Always check in debug mode */
1007#endif
1008
1009 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1010 before_PyUnicode_InitEncodings() is called. */
Victor Stinner3d17c042020-05-14 01:48:38 +02001011 if (!interp->unicode.fs_codec.encoding) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001012 return 0;
1013 }
1014
1015 Py_ssize_t name_length;
1016 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1017 if (name == NULL) {
1018 return -1;
1019 }
1020 if (strlen(name) != (size_t)name_length) {
1021 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1022 return -1;
1023 }
1024 PyObject *handler = PyCodec_LookupError(name);
1025 if (handler != NULL) {
1026 Py_DECREF(handler);
1027 return 0;
1028 }
1029 return -1;
1030}
1031
1032
1033
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001034/*[clinic input]
1035_io.TextIOWrapper.__init__
1036 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001037 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001038 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001039 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001040 line_buffering: bool(accept={int}) = False
1041 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001043Character and line based layer over a BufferedIOBase object, buffer.
1044
1045encoding gives the name of the encoding that the stream will be
1046decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1047
1048errors determines the strictness of encoding and decoding (see
1049help(codecs.Codec) or the documentation for codecs.register) and
1050defaults to "strict".
1051
1052newline controls how line endings are handled. It can be None, '',
1053'\n', '\r', and '\r\n'. It works as follows:
1054
1055* On input, if newline is None, universal newlines mode is
1056 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1057 these are translated into '\n' before being returned to the
1058 caller. If it is '', universal newline mode is enabled, but line
1059 endings are returned to the caller untranslated. If it has any of
1060 the other legal values, input lines are only terminated by the given
1061 string, and the line ending is returned to the caller untranslated.
1062
1063* On output, if newline is None, any '\n' characters written are
1064 translated to the system default line separator, os.linesep. If
1065 newline is '' or '\n', no translation takes place. If newline is any
1066 of the other legal values, any '\n' characters written are translated
1067 to the given string.
1068
1069If line_buffering is True, a call to flush is implied when a call to
1070write contains a newline character.
1071[clinic start generated code]*/
1072
1073static int
1074_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001075 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001076 const char *newline, int line_buffering,
1077 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001078/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001079{
1080 PyObject *raw, *codec_info = NULL;
1081 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 PyObject *res;
1083 int r;
1084
1085 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001086 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
Inada Naokibec8c782021-04-02 17:38:59 +09001088 if (encoding == NULL) {
1089 PyInterpreterState *interp = _PyInterpreterState_GET();
1090 if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1091 if (PyErr_WarnEx(PyExc_EncodingWarning,
1092 "'encoding' argument not specified", 1)) {
1093 return -1;
1094 }
1095 }
1096 }
1097 else if (strcmp(encoding, "locale") == 0) {
1098 encoding = NULL;
1099 }
1100
INADA Naoki507434f2017-12-21 09:59:53 +09001101 if (errors == Py_None) {
1102 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001103 if (errors == NULL) {
1104 return -1;
1105 }
INADA Naoki507434f2017-12-21 09:59:53 +09001106 }
1107 else if (!PyUnicode_Check(errors)) {
1108 // Check 'errors' argument here because Argument Clinic doesn't support
1109 // 'str(accept={str, NoneType})' converter.
1110 PyErr_Format(
1111 PyExc_TypeError,
1112 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001113 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001114 return -1;
1115 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001116 else if (io_check_errors(errors)) {
1117 return -1;
1118 }
INADA Naoki507434f2017-12-21 09:59:53 +09001119
1120 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121 return -1;
1122 }
1123
1124 Py_CLEAR(self->buffer);
1125 Py_CLEAR(self->encoding);
1126 Py_CLEAR(self->encoder);
1127 Py_CLEAR(self->decoder);
1128 Py_CLEAR(self->readnl);
1129 Py_CLEAR(self->decoded_chars);
1130 Py_CLEAR(self->pending_bytes);
1131 Py_CLEAR(self->snapshot);
1132 Py_CLEAR(self->errors);
1133 Py_CLEAR(self->raw);
1134 self->decoded_chars_used = 0;
1135 self->pending_bytes_count = 0;
1136 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001137 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138
1139 if (encoding == NULL) {
1140 /* Try os.device_encoding(fileno) */
1141 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001142 state = IO_STATE();
1143 if (state == NULL)
1144 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001145 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 /* Ignore only AttributeError and UnsupportedOperation */
1147 if (fileno == NULL) {
1148 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1149 PyErr_ExceptionMatches(state->unsupported_operation)) {
1150 PyErr_Clear();
1151 }
1152 else {
1153 goto error;
1154 }
1155 }
1156 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001157 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001158 Py_DECREF(fileno);
1159 if (fd == -1 && PyErr_Occurred()) {
1160 goto error;
1161 }
1162
1163 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 if (self->encoding == NULL)
1165 goto error;
1166 else if (!PyUnicode_Check(self->encoding))
1167 Py_CLEAR(self->encoding);
1168 }
1169 }
1170 if (encoding == NULL && self->encoding == NULL) {
Victor Stinner82458b62020-11-01 20:59:35 +01001171 self->encoding = _Py_GetLocaleEncodingObject();
Antoine Pitrou932ff832013-08-01 21:04:50 +02001172 if (self->encoding == NULL) {
Victor Stinner710e8262020-10-31 01:02:09 +01001173 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 }
Victor Stinner710e8262020-10-31 01:02:09 +01001175 assert(PyUnicode_Check(self->encoding));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001177 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001178 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001179 if (encoding == NULL)
1180 goto error;
1181 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182 else if (encoding != NULL) {
1183 self->encoding = PyUnicode_FromString(encoding);
1184 if (self->encoding == NULL)
1185 goto error;
1186 }
1187 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001188 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001190 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 }
1192
Nick Coghlana9b15242014-02-04 22:11:18 +10001193 /* Check we have been asked for a real text encoding */
1194 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1195 if (codec_info == NULL) {
1196 Py_CLEAR(self->encoding);
1197 goto error;
1198 }
1199
1200 /* XXX: Failures beyond this point have the potential to leak elements
1201 * of the partially constructed object (like self->encoding)
1202 */
1203
INADA Naoki507434f2017-12-21 09:59:53 +09001204 Py_INCREF(errors);
1205 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001207 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001208 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001209 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 }
1212
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213 self->buffer = buffer;
1214 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001215
INADA Naoki507434f2017-12-21 09:59:53 +09001216 /* Build the decoder object */
1217 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1218 goto error;
1219
1220 /* Build the encoder object */
1221 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222 goto error;
1223
1224 /* Finished sorting out the codec details */
1225 Py_CLEAR(codec_info);
1226
Andy Lesterdffe4c02020-03-04 07:15:20 -06001227 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1228 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1229 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001230 {
1231 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1232 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001234 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001235 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001236 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001237 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001238 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001239 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240 }
1241
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001242 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 if (res == NULL)
1244 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001245 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001247 if (r < 0)
1248 goto error;
1249 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001251 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1252 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001253 goto error;
1254 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001255 Py_XDECREF(res);
1256 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001257
Antoine Pitroue4501852009-05-14 18:55:55 +00001258 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001259 if (_textiowrapper_fix_encoder_state(self) < 0) {
1260 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001261 }
1262
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 self->ok = 1;
1264 return 0;
1265
1266 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001267 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 return -1;
1269}
1270
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001271/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1272 * -1 on error.
1273 */
1274static int
1275convert_optional_bool(PyObject *obj, int default_value)
1276{
1277 long v;
1278 if (obj == Py_None) {
1279 v = default_value;
1280 }
1281 else {
1282 v = PyLong_AsLong(obj);
1283 if (v == -1 && PyErr_Occurred())
1284 return -1;
1285 }
1286 return v != 0;
1287}
1288
INADA Naoki507434f2017-12-21 09:59:53 +09001289static int
1290textiowrapper_change_encoding(textio *self, PyObject *encoding,
1291 PyObject *errors, int newline_changed)
1292{
1293 /* Use existing settings where new settings are not specified */
1294 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1295 return 0; // no change
1296 }
1297
1298 if (encoding == Py_None) {
1299 encoding = self->encoding;
1300 if (errors == Py_None) {
1301 errors = self->errors;
1302 }
1303 }
1304 else if (errors == Py_None) {
1305 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001306 if (errors == NULL) {
1307 return -1;
1308 }
INADA Naoki507434f2017-12-21 09:59:53 +09001309 }
1310
1311 const char *c_errors = PyUnicode_AsUTF8(errors);
1312 if (c_errors == NULL) {
1313 return -1;
1314 }
1315
1316 // Create new encoder & decoder
1317 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1318 PyUnicode_AsUTF8(encoding), "codecs.open()");
1319 if (codec_info == NULL) {
1320 return -1;
1321 }
1322 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1323 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1324 Py_DECREF(codec_info);
1325 return -1;
1326 }
1327 Py_DECREF(codec_info);
1328
1329 Py_INCREF(encoding);
1330 Py_INCREF(errors);
1331 Py_SETREF(self->encoding, encoding);
1332 Py_SETREF(self->errors, errors);
1333
1334 return _textiowrapper_fix_encoder_state(self);
1335}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001336
1337/*[clinic input]
1338_io.TextIOWrapper.reconfigure
1339 *
INADA Naoki507434f2017-12-21 09:59:53 +09001340 encoding: object = None
1341 errors: object = None
1342 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001343 line_buffering as line_buffering_obj: object = None
1344 write_through as write_through_obj: object = None
1345
1346Reconfigure the text stream with new parameters.
1347
1348This also does an implicit stream flush.
1349
1350[clinic start generated code]*/
1351
1352static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001353_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1354 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001355 PyObject *line_buffering_obj,
1356 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001357/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001358{
1359 int line_buffering;
1360 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001361 const char *newline = NULL;
1362
1363 /* Check if something is in the read buffer */
1364 if (self->decoded_chars != NULL) {
1365 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001366 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001367 "of stream after the first read");
1368 return NULL;
1369 }
1370 }
1371
1372 if (newline_obj != NULL && newline_obj != Py_None) {
1373 newline = PyUnicode_AsUTF8(newline_obj);
1374 if (newline == NULL || validate_newline(newline) < 0) {
1375 return NULL;
1376 }
1377 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001378
1379 line_buffering = convert_optional_bool(line_buffering_obj,
1380 self->line_buffering);
1381 write_through = convert_optional_bool(write_through_obj,
1382 self->write_through);
1383 if (line_buffering < 0 || write_through < 0) {
1384 return NULL;
1385 }
INADA Naoki507434f2017-12-21 09:59:53 +09001386
Petr Viktorinffd97532020-02-11 17:46:57 +01001387 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001388 if (res == NULL) {
1389 return NULL;
1390 }
INADA Naoki507434f2017-12-21 09:59:53 +09001391 Py_DECREF(res);
1392 self->b2cratio = 0;
1393
1394 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1395 return NULL;
1396 }
1397
1398 if (textiowrapper_change_encoding(
1399 self, encoding, errors, newline_obj != NULL) < 0) {
1400 return NULL;
1401 }
1402
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001403 self->line_buffering = line_buffering;
1404 self->write_through = write_through;
1405 Py_RETURN_NONE;
1406}
1407
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001409textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 self->ok = 0;
1412 Py_CLEAR(self->buffer);
1413 Py_CLEAR(self->encoding);
1414 Py_CLEAR(self->encoder);
1415 Py_CLEAR(self->decoder);
1416 Py_CLEAR(self->readnl);
1417 Py_CLEAR(self->decoded_chars);
1418 Py_CLEAR(self->pending_bytes);
1419 Py_CLEAR(self->snapshot);
1420 Py_CLEAR(self->errors);
1421 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001422
1423 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 return 0;
1425}
1426
1427static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001428textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001430 self->finalizing = 1;
1431 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001433 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 _PyObject_GC_UNTRACK(self);
1435 if (self->weakreflist != NULL)
1436 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001437 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001438 Py_TYPE(self)->tp_free((PyObject *)self);
1439}
1440
1441static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001442textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443{
1444 Py_VISIT(self->buffer);
1445 Py_VISIT(self->encoding);
1446 Py_VISIT(self->encoder);
1447 Py_VISIT(self->decoder);
1448 Py_VISIT(self->readnl);
1449 Py_VISIT(self->decoded_chars);
1450 Py_VISIT(self->pending_bytes);
1451 Py_VISIT(self->snapshot);
1452 Py_VISIT(self->errors);
1453 Py_VISIT(self->raw);
1454
1455 Py_VISIT(self->dict);
1456 return 0;
1457}
1458
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001459static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001460textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001461
1462/* This macro takes some shortcuts to make the common case faster. */
1463#define CHECK_CLOSED(self) \
1464 do { \
1465 int r; \
1466 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001467 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 if (self->raw != NULL) \
1469 r = _PyFileIO_closed(self->raw); \
1470 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001471 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 if (_res == NULL) \
1473 return NULL; \
1474 r = PyObject_IsTrue(_res); \
1475 Py_DECREF(_res); \
1476 if (r < 0) \
1477 return NULL; \
1478 } \
1479 if (r > 0) { \
1480 PyErr_SetString(PyExc_ValueError, \
1481 "I/O operation on closed file."); \
1482 return NULL; \
1483 } \
1484 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001485 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486 return NULL; \
1487 } while (0)
1488
1489#define CHECK_INITIALIZED(self) \
1490 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001491 PyErr_SetString(PyExc_ValueError, \
1492 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493 return NULL; \
1494 }
1495
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001496#define CHECK_ATTACHED(self) \
1497 CHECK_INITIALIZED(self); \
1498 if (self->detached) { \
1499 PyErr_SetString(PyExc_ValueError, \
1500 "underlying buffer has been detached"); \
1501 return NULL; \
1502 }
1503
1504#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001506 PyErr_SetString(PyExc_ValueError, \
1507 "I/O operation on uninitialized object"); \
1508 return -1; \
1509 } else if (self->detached) { \
1510 PyErr_SetString(PyExc_ValueError, \
1511 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512 return -1; \
1513 }
1514
1515
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001516/*[clinic input]
1517_io.TextIOWrapper.detach
1518[clinic start generated code]*/
1519
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001520static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001521_io_TextIOWrapper_detach_impl(textio *self)
1522/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001523{
1524 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001525 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001526 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001527 if (res == NULL)
1528 return NULL;
1529 Py_DECREF(res);
1530 buffer = self->buffer;
1531 self->buffer = NULL;
1532 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001533 return buffer;
1534}
1535
Antoine Pitrou24f36292009-03-28 22:16:42 +00001536/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001537 underlying buffered object, though. */
1538static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001539_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001540{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 if (self->pending_bytes == NULL)
1542 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001543
Inada Naokibfba8c32019-05-16 15:03:20 +09001544 PyObject *pending = self->pending_bytes;
1545 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001546
Inada Naokibfba8c32019-05-16 15:03:20 +09001547 if (PyBytes_Check(pending)) {
1548 b = pending;
1549 Py_INCREF(b);
1550 }
1551 else if (PyUnicode_Check(pending)) {
1552 assert(PyUnicode_IS_ASCII(pending));
1553 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1554 b = PyBytes_FromStringAndSize(
1555 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1556 if (b == NULL) {
1557 return -1;
1558 }
1559 }
1560 else {
1561 assert(PyList_Check(pending));
1562 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1563 if (b == NULL) {
1564 return -1;
1565 }
1566
1567 char *buf = PyBytes_AsString(b);
1568 Py_ssize_t pos = 0;
1569
1570 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1571 PyObject *obj = PyList_GET_ITEM(pending, i);
1572 char *src;
1573 Py_ssize_t len;
1574 if (PyUnicode_Check(obj)) {
1575 assert(PyUnicode_IS_ASCII(obj));
1576 src = PyUnicode_DATA(obj);
1577 len = PyUnicode_GET_LENGTH(obj);
1578 }
1579 else {
1580 assert(PyBytes_Check(obj));
1581 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1582 Py_DECREF(b);
1583 return -1;
1584 }
1585 }
1586 memcpy(buf + pos, src, len);
1587 pos += len;
1588 }
1589 assert(pos == self->pending_bytes_count);
1590 }
1591
1592 self->pending_bytes_count = 0;
1593 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001594 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001595
1596 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001597 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001598 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001599 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001600 Py_DECREF(b);
Inada Naoki01806d52021-02-22 08:29:30 +09001601 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1602 // when an error occurred.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 if (ret == NULL)
1604 return -1;
1605 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 return 0;
1607}
1608
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001609/*[clinic input]
1610_io.TextIOWrapper.write
1611 text: unicode
1612 /
1613[clinic start generated code]*/
1614
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001616_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1617/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618{
1619 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 PyObject *b;
1621 Py_ssize_t textlen;
1622 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001623 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001625 if (PyUnicode_READY(text) == -1)
1626 return NULL;
1627
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001628 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 CHECK_CLOSED(self);
1630
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001631 if (self->encoder == NULL)
1632 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001634 Py_INCREF(text);
1635
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001636 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637
1638 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 haslf = 1;
1641
1642 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001643 PyObject *newtext = _PyObject_CallMethodId(
1644 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 Py_DECREF(text);
1646 if (newtext == NULL)
1647 return NULL;
1648 text = newtext;
1649 }
1650
Antoine Pitroue96ec682011-07-23 21:46:35 +02001651 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001652 text_needflush = 1;
1653 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001655 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 needflush = 1;
1657
1658 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001659 if (self->encodefunc != NULL) {
Inada Naoki01806d52021-02-22 08:29:30 +09001660 if (PyUnicode_IS_ASCII(text) &&
1661 // See bpo-43260
1662 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1663 is_asciicompat_encoding(self->encodefunc)) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001664 b = text;
1665 Py_INCREF(b);
1666 }
1667 else {
1668 b = (*self->encodefunc)((PyObject *) self, text);
1669 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001670 self->encoding_start_of_stream = 0;
1671 }
Inada Naoki01806d52021-02-22 08:29:30 +09001672 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01001673 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naoki01806d52021-02-22 08:29:30 +09001674 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001675
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 Py_DECREF(text);
1677 if (b == NULL)
1678 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001679 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001680 PyErr_Format(PyExc_TypeError,
1681 "encoder should return a bytes object, not '%.200s'",
1682 Py_TYPE(b)->tp_name);
1683 Py_DECREF(b);
1684 return NULL;
1685 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686
Inada Naokibfba8c32019-05-16 15:03:20 +09001687 Py_ssize_t bytes_len;
1688 if (b == text) {
1689 bytes_len = PyUnicode_GET_LENGTH(b);
1690 }
1691 else {
1692 bytes_len = PyBytes_GET_SIZE(b);
1693 }
1694
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001695 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001696 self->pending_bytes_count = 0;
1697 self->pending_bytes = b;
1698 }
Inada Naoki01806d52021-02-22 08:29:30 +09001699 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1700 // Prevent to concatenate more than chunk_size data.
1701 if (_textiowrapper_writeflush(self) < 0) {
1702 Py_DECREF(b);
1703 return NULL;
1704 }
1705 self->pending_bytes = b;
1706 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001707 else if (!PyList_CheckExact(self->pending_bytes)) {
1708 PyObject *list = PyList_New(2);
1709 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 Py_DECREF(b);
1711 return NULL;
1712 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001713 PyList_SET_ITEM(list, 0, self->pending_bytes);
1714 PyList_SET_ITEM(list, 1, b);
1715 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001717 else {
1718 if (PyList_Append(self->pending_bytes, b) < 0) {
1719 Py_DECREF(b);
1720 return NULL;
1721 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001724
1725 self->pending_bytes_count += bytes_len;
Inada Naoki01806d52021-02-22 08:29:30 +09001726 if (self->pending_bytes_count >= self->chunk_size || needflush ||
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001727 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001728 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 return NULL;
1730 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001731
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001733 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 if (ret == NULL)
1735 return NULL;
1736 Py_DECREF(ret);
1737 }
1738
Zackery Spytz23db9352018-06-29 04:14:58 -06001739 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 Py_CLEAR(self->snapshot);
1741
1742 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001743 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744 if (ret == NULL)
1745 return NULL;
1746 Py_DECREF(ret);
1747 }
1748
1749 return PyLong_FromSsize_t(textlen);
1750}
1751
1752/* Steal a reference to chars and store it in the decoded_char buffer;
1753 */
1754static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001755textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001757 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 self->decoded_chars_used = 0;
1759}
1760
1761static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001762textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763{
1764 PyObject *chars;
1765 Py_ssize_t avail;
1766
1767 if (self->decoded_chars == NULL)
1768 return PyUnicode_FromStringAndSize(NULL, 0);
1769
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 /* decoded_chars is guaranteed to be "ready". */
1771 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 - self->decoded_chars_used);
1773
1774 assert(avail >= 0);
1775
1776 if (n < 0 || n > avail)
1777 n = avail;
1778
1779 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001780 chars = PyUnicode_Substring(self->decoded_chars,
1781 self->decoded_chars_used,
1782 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 if (chars == NULL)
1784 return NULL;
1785 }
1786 else {
1787 chars = self->decoded_chars;
1788 Py_INCREF(chars);
1789 }
1790
1791 self->decoded_chars_used += n;
1792 return chars;
1793}
1794
1795/* Read and decode the next chunk of data from the BufferedReader.
1796 */
1797static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001798textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799{
1800 PyObject *dec_buffer = NULL;
1801 PyObject *dec_flags = NULL;
1802 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001803 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001805 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 int eof;
1807
1808 /* The return value is True unless EOF was reached. The decoded string is
1809 * placed in self._decoded_chars (replacing its previous value). The
1810 * entire input chunk is sent to the decoder, though some of it may remain
1811 * buffered in the decoder, yet to be converted.
1812 */
1813
1814 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001815 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 return -1;
1817 }
1818
1819 if (self->telling) {
1820 /* To prepare for tell(), we need to snapshot a point in the file
1821 * where the decoder's input buffer is empty.
1822 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001823 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001824 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 if (state == NULL)
1826 return -1;
1827 /* Given this, we know there was a valid snapshot point
1828 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1829 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001830 if (!PyTuple_Check(state)) {
1831 PyErr_SetString(PyExc_TypeError,
1832 "illegal decoder state");
1833 Py_DECREF(state);
1834 return -1;
1835 }
1836 if (!PyArg_ParseTuple(state,
1837 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1838 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 Py_DECREF(state);
1840 return -1;
1841 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001842
1843 if (!PyBytes_Check(dec_buffer)) {
1844 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001845 "illegal decoder state: the first item should be a "
1846 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001847 Py_TYPE(dec_buffer)->tp_name);
1848 Py_DECREF(state);
1849 return -1;
1850 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 Py_INCREF(dec_buffer);
1852 Py_INCREF(dec_flags);
1853 Py_DECREF(state);
1854 }
1855
1856 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001857 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001858 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001859 }
1860 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 if (chunk_size == NULL)
1862 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001863
Petr Viktorinffd97532020-02-11 17:46:57 +01001864 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001865 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001866 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001867 Py_DECREF(chunk_size);
1868 if (input_chunk == NULL)
1869 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001870
1871 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001872 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001873 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001874 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1875 Py_TYPE(input_chunk)->tp_name);
1876 goto fail;
1877 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001878
Antoine Pitroub8503892014-04-29 10:14:02 +02001879 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001880 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881
INADA Naoki507434f2017-12-21 09:59:53 +09001882 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1883 PyBuffer_Release(&input_chunk_buf);
1884 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001885 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001886
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001887 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001888 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001889 if (nchars > 0)
1890 self->b2cratio = (double) nbytes / nchars;
1891 else
1892 self->b2cratio = 0.0;
1893 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 eof = 0;
1895
1896 if (self->telling) {
1897 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1898 * next input to be decoded is dec_buffer + input_chunk.
1899 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001900 PyObject *next_input = dec_buffer;
1901 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001902 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001903 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001904 goto fail;
1905 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001906 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1907 if (snapshot == NULL) {
1908 dec_flags = NULL;
1909 goto fail;
1910 }
1911 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912 }
1913 Py_DECREF(input_chunk);
1914
1915 return (eof == 0);
1916
1917 fail:
1918 Py_XDECREF(dec_buffer);
1919 Py_XDECREF(dec_flags);
1920 Py_XDECREF(input_chunk);
1921 return -1;
1922}
1923
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001924/*[clinic input]
1925_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001926 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001927 /
1928[clinic start generated code]*/
1929
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001931_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001932/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001933{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934 PyObject *result = NULL, *chunks = NULL;
1935
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001936 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 CHECK_CLOSED(self);
1938
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001939 if (self->decoder == NULL)
1940 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001941
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001942 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943 return NULL;
1944
1945 if (n < 0) {
1946 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001947 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948 PyObject *decoded;
1949 if (bytes == NULL)
1950 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001951
Andy Lesterdffe4c02020-03-04 07:15:20 -06001952 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001953 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1954 bytes, 1);
1955 else
1956 decoded = PyObject_CallMethodObjArgs(
1957 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001959 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960 goto fail;
1961
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001962 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963
1964 if (result == NULL) {
1965 Py_DECREF(decoded);
1966 return NULL;
1967 }
1968
1969 PyUnicode_AppendAndDel(&result, decoded);
1970 if (result == NULL)
1971 goto fail;
1972
Zackery Spytz23db9352018-06-29 04:14:58 -06001973 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 Py_CLEAR(self->snapshot);
1975 return result;
1976 }
1977 else {
1978 int res = 1;
1979 Py_ssize_t remaining = n;
1980
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001981 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 if (result == NULL)
1983 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001984 if (PyUnicode_READY(result) == -1)
1985 goto fail;
1986 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987
1988 /* Keep reading chunks until we have n characters to return */
1989 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001990 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001991 if (res < 0) {
1992 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1993 when EINTR occurs so we needn't do it ourselves. */
1994 if (_PyIO_trap_eintr()) {
1995 continue;
1996 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001998 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 if (res == 0) /* EOF */
2000 break;
2001 if (chunks == NULL) {
2002 chunks = PyList_New(0);
2003 if (chunks == NULL)
2004 goto fail;
2005 }
Antoine Pitroue5324562011-11-19 00:39:01 +01002006 if (PyUnicode_GET_LENGTH(result) > 0 &&
2007 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 goto fail;
2009 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002010 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 if (result == NULL)
2012 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002013 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 }
2015 if (chunks != NULL) {
2016 if (result != NULL && PyList_Append(chunks, result) < 0)
2017 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002018 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 if (result == NULL)
2020 goto fail;
2021 Py_CLEAR(chunks);
2022 }
2023 return result;
2024 }
2025 fail:
2026 Py_XDECREF(result);
2027 Py_XDECREF(chunks);
2028 return NULL;
2029}
2030
2031
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002032/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 that is to the NUL character. Otherwise the function will produce
2034 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002035static const char *
2036find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002037{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002038 if (kind == PyUnicode_1BYTE_KIND) {
2039 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002040 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002041 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002043 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002044 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002045 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046 return s;
2047 if (s == end)
2048 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002049 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 }
2051}
2052
2053Py_ssize_t
2054_PyIO_find_line_ending(
2055 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002056 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057{
Andy Lestere6be9b52020-02-11 20:28:35 -06002058 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059
2060 if (translated) {
2061 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002062 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002064 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002065 else {
2066 *consumed = len;
2067 return -1;
2068 }
2069 }
2070 else if (universal) {
2071 /* Universal newline search. Find any of \r, \r\n, \n
2072 * The decoder ensures that \r\n are not split in two pieces
2073 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002074 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002076 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002078 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002079 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002080 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 if (s >= end) {
2082 *consumed = len;
2083 return -1;
2084 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002085 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002086 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002088 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002090 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002091 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002093 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002094 }
2095 }
2096 }
2097 else {
2098 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002099 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002100 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002101 /* Assume that readnl is an ASCII character. */
2102 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002104 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002106 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 *consumed = len;
2108 return -1;
2109 }
2110 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002111 const char *s = start;
2112 const char *e = end - (readnl_len - 1)*kind;
2113 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 if (e < s)
2115 e = s;
2116 while (s < e) {
2117 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002118 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 if (pos == NULL || pos >= e)
2120 break;
2121 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002122 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 break;
2124 }
2125 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002126 return (pos - start)/kind + readnl_len;
2127 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002129 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130 if (pos == NULL)
2131 *consumed = len;
2132 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002133 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002134 return -1;
2135 }
2136 }
2137}
2138
2139static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002140_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002141{
2142 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2143 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2144 int res;
2145
2146 CHECK_CLOSED(self);
2147
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002148 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002149 return NULL;
2150
2151 chunked = 0;
2152
2153 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002154 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002156 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 Py_ssize_t consumed = 0;
2158
2159 /* First, get some data if necessary */
2160 res = 1;
2161 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002162 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002163 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002164 if (res < 0) {
2165 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2166 when EINTR occurs so we needn't do it ourselves. */
2167 if (_PyIO_trap_eintr()) {
2168 continue;
2169 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002171 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002172 if (res == 0)
2173 break;
2174 }
2175 if (res == 0) {
2176 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002177 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002178 Py_CLEAR(self->snapshot);
2179 start = endpos = offset_to_buffer = 0;
2180 break;
2181 }
2182
2183 if (remaining == NULL) {
2184 line = self->decoded_chars;
2185 start = self->decoded_chars_used;
2186 offset_to_buffer = 0;
2187 Py_INCREF(line);
2188 }
2189 else {
2190 assert(self->decoded_chars_used == 0);
2191 line = PyUnicode_Concat(remaining, self->decoded_chars);
2192 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002193 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 Py_CLEAR(remaining);
2195 if (line == NULL)
2196 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002197 if (PyUnicode_READY(line) == -1)
2198 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199 }
2200
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002201 ptr = PyUnicode_DATA(line);
2202 line_len = PyUnicode_GET_LENGTH(line);
2203 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002204
2205 endpos = _PyIO_find_line_ending(
2206 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002207 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002208 ptr + kind * start,
2209 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002210 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002211 if (endpos >= 0) {
2212 endpos += start;
2213 if (limit >= 0 && (endpos - start) + chunked >= limit)
2214 endpos = start + limit - chunked;
2215 break;
2216 }
2217
2218 /* We can put aside up to `endpos` */
2219 endpos = consumed + start;
2220 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2221 /* Didn't find line ending, but reached length limit */
2222 endpos = start + limit - chunked;
2223 break;
2224 }
2225
2226 if (endpos > start) {
2227 /* No line ending seen yet - put aside current data */
2228 PyObject *s;
2229 if (chunks == NULL) {
2230 chunks = PyList_New(0);
2231 if (chunks == NULL)
2232 goto error;
2233 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002234 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 if (s == NULL)
2236 goto error;
2237 if (PyList_Append(chunks, s) < 0) {
2238 Py_DECREF(s);
2239 goto error;
2240 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002241 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 Py_DECREF(s);
2243 }
2244 /* There may be some remaining bytes we'll have to prepend to the
2245 next chunk of data */
2246 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002247 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002248 if (remaining == NULL)
2249 goto error;
2250 }
2251 Py_CLEAR(line);
2252 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002253 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002254 }
2255
2256 if (line != NULL) {
2257 /* Our line ends in the current buffer */
2258 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002259 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2260 PyObject *s = PyUnicode_Substring(line, start, endpos);
2261 Py_CLEAR(line);
2262 if (s == NULL)
2263 goto error;
2264 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002265 }
2266 }
2267 if (remaining != NULL) {
2268 if (chunks == NULL) {
2269 chunks = PyList_New(0);
2270 if (chunks == NULL)
2271 goto error;
2272 }
2273 if (PyList_Append(chunks, remaining) < 0)
2274 goto error;
2275 Py_CLEAR(remaining);
2276 }
2277 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002278 if (line != NULL) {
2279 if (PyList_Append(chunks, line) < 0)
2280 goto error;
2281 Py_DECREF(line);
2282 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002283 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2284 if (line == NULL)
2285 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002286 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002288 if (line == NULL) {
2289 Py_INCREF(_PyIO_empty_str);
2290 line = _PyIO_empty_str;
2291 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292
2293 return line;
2294
2295 error:
2296 Py_XDECREF(chunks);
2297 Py_XDECREF(remaining);
2298 Py_XDECREF(line);
2299 return NULL;
2300}
2301
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002302/*[clinic input]
2303_io.TextIOWrapper.readline
2304 size: Py_ssize_t = -1
2305 /
2306[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002307
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002308static PyObject *
2309_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2310/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2311{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002312 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002313 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314}
2315
2316/* Seek and Tell */
2317
2318typedef struct {
2319 Py_off_t start_pos;
2320 int dec_flags;
2321 int bytes_to_feed;
2322 int chars_to_skip;
2323 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002324} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325
2326/*
2327 To speed up cookie packing/unpacking, we store the fields in a temporary
2328 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2329 The following macros define at which offsets in the intermediary byte
2330 string the various CookieStruct fields will be stored.
2331 */
2332
2333#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2334
Christian Heimes743e0cd2012-10-17 23:52:17 +02002335#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336/* We want the least significant byte of start_pos to also be the least
2337 significant byte of the cookie, which means that in big-endian mode we
2338 must copy the fields in reverse order. */
2339
2340# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2341# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2342# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2343# define OFF_CHARS_TO_SKIP (sizeof(char))
2344# define OFF_NEED_EOF 0
2345
2346#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347/* Little-endian mode: the least significant byte of start_pos will
2348 naturally end up the least significant byte of the cookie. */
2349
2350# define OFF_START_POS 0
2351# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2352# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2353# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2354# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2355
2356#endif
2357
2358static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002359textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360{
2361 unsigned char buffer[COOKIE_BUF_LEN];
2362 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2363 if (cookieLong == NULL)
2364 return -1;
2365
2366 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002367 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368 Py_DECREF(cookieLong);
2369 return -1;
2370 }
2371 Py_DECREF(cookieLong);
2372
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002373 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2374 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2375 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2376 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2377 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378
2379 return 0;
2380}
2381
2382static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002383textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384{
2385 unsigned char buffer[COOKIE_BUF_LEN];
2386
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002387 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2388 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2389 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2390 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2391 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002392
Christian Heimes743e0cd2012-10-17 23:52:17 +02002393 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2394 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002395}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396
2397static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002398_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399{
2400 PyObject *res;
2401 /* When seeking to the start of the stream, we call decoder.reset()
2402 rather than decoder.getstate().
2403 This is for a few decoders such as utf-16 for which the state value
2404 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2405 utf-16, that we are expecting a BOM).
2406 */
2407 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002408 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002409 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002410 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2411 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412 if (res == NULL)
2413 return -1;
2414 Py_DECREF(res);
2415 return 0;
2416}
2417
Antoine Pitroue4501852009-05-14 18:55:55 +00002418static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002419_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002420{
2421 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002422 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002423 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002424 self->encoding_start_of_stream = 1;
2425 }
2426 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002427 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Victor Stinner37834132020-10-27 17:12:53 +01002428 _PyLong_GetZero());
Antoine Pitroue4501852009-05-14 18:55:55 +00002429 self->encoding_start_of_stream = 0;
2430 }
2431 if (res == NULL)
2432 return -1;
2433 Py_DECREF(res);
2434 return 0;
2435}
2436
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002437static int
2438_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2439{
2440 /* Same as _textiowrapper_decoder_setstate() above. */
2441 return _textiowrapper_encoder_reset(
2442 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2443}
2444
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002445/*[clinic input]
2446_io.TextIOWrapper.seek
2447 cookie as cookieObj: object
2448 whence: int = 0
2449 /
2450[clinic start generated code]*/
2451
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002453_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2454/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002456 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002457 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 PyObject *res;
2459 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002460 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002462 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 CHECK_CLOSED(self);
2464
2465 Py_INCREF(cookieObj);
2466
2467 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002468 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 goto fail;
2470 }
2471
Victor Stinner37834132020-10-27 17:12:53 +01002472 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2473
ngie-eign848037c2019-03-02 23:28:26 -08002474 switch (whence) {
2475 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476 /* seek relative to current position */
Victor Stinner37834132020-10-27 17:12:53 +01002477 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 if (cmp < 0)
2479 goto fail;
2480
2481 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002482 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483 goto fail;
2484 }
2485
2486 /* Seeking to the current position should attempt to
2487 * sync the underlying buffer with the current position.
2488 */
2489 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002490 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 if (cookieObj == NULL)
2492 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002493 break;
2494
ngie-eign848037c2019-03-02 23:28:26 -08002495 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496 /* seek relative to end of file */
Victor Stinner37834132020-10-27 17:12:53 +01002497 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498 if (cmp < 0)
2499 goto fail;
2500
2501 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002502 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 goto fail;
2504 }
2505
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002506 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507 if (res == NULL)
2508 goto fail;
2509 Py_DECREF(res);
2510
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002511 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512 Py_CLEAR(self->snapshot);
2513 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002514 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515 if (res == NULL)
2516 goto fail;
2517 Py_DECREF(res);
2518 }
2519
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002520 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002521 Py_CLEAR(cookieObj);
2522 if (res == NULL)
2523 goto fail;
2524 if (self->encoder) {
2525 /* If seek() == 0, we are at the start of stream, otherwise not */
Victor Stinner37834132020-10-27 17:12:53 +01002526 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002527 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2528 Py_DECREF(res);
2529 goto fail;
2530 }
2531 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002533
ngie-eign848037c2019-03-02 23:28:26 -08002534 case SEEK_SET:
2535 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002536
ngie-eign848037c2019-03-02 23:28:26 -08002537 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002539 "invalid whence (%d, should be %d, %d or %d)", whence,
2540 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541 goto fail;
2542 }
2543
Victor Stinner37834132020-10-27 17:12:53 +01002544 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545 if (cmp < 0)
2546 goto fail;
2547
2548 if (cmp == 1) {
2549 PyErr_Format(PyExc_ValueError,
2550 "negative seek position %R", cookieObj);
2551 goto fail;
2552 }
2553
Petr Viktorinffd97532020-02-11 17:46:57 +01002554 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555 if (res == NULL)
2556 goto fail;
2557 Py_DECREF(res);
2558
2559 /* The strategy of seek() is to go back to the safe start point
2560 * and replay the effect of read(chars_to_skip) from there.
2561 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 goto fail;
2564
2565 /* Seek back to the safe start point. */
2566 posobj = PyLong_FromOff_t(cookie.start_pos);
2567 if (posobj == NULL)
2568 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002569 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570 Py_DECREF(posobj);
2571 if (res == NULL)
2572 goto fail;
2573 Py_DECREF(res);
2574
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002575 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576 Py_CLEAR(self->snapshot);
2577
2578 /* Restore the decoder to its state from the safe start point. */
2579 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002580 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002581 goto fail;
2582 }
2583
2584 if (cookie.chars_to_skip) {
2585 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002586 PyObject *input_chunk = _PyObject_CallMethodId(
2587 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588 PyObject *decoded;
2589
2590 if (input_chunk == NULL)
2591 goto fail;
2592
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002593 if (!PyBytes_Check(input_chunk)) {
2594 PyErr_Format(PyExc_TypeError,
2595 "underlying read() should have returned a bytes "
2596 "object, not '%.200s'",
2597 Py_TYPE(input_chunk)->tp_name);
2598 Py_DECREF(input_chunk);
2599 goto fail;
2600 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002602 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2603 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002604 goto fail;
2605 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002606 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002608 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2609 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002611 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612 goto fail;
2613
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002614 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615
2616 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002617 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002618 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619 goto fail;
2620 }
2621 self->decoded_chars_used = cookie.chars_to_skip;
2622 }
2623 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002624 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2625 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002627 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002628 }
2629
Antoine Pitroue4501852009-05-14 18:55:55 +00002630 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2631 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002632 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002633 goto fail;
2634 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635 return cookieObj;
2636 fail:
2637 Py_XDECREF(cookieObj);
2638 return NULL;
2639
2640}
2641
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002642/*[clinic input]
2643_io.TextIOWrapper.tell
2644[clinic start generated code]*/
2645
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002646static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002647_io_TextIOWrapper_tell_impl(textio *self)
2648/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649{
2650 PyObject *res;
2651 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002652 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653 PyObject *next_input;
2654 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002655 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002657 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002658 Py_ssize_t dec_buffer_len;
2659 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002661 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662 CHECK_CLOSED(self);
2663
2664 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002665 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002666 goto fail;
2667 }
2668 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002669 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002670 "telling position disabled by next() call");
2671 goto fail;
2672 }
2673
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002676 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677 if (res == NULL)
2678 goto fail;
2679 Py_DECREF(res);
2680
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002681 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682 if (posobj == NULL)
2683 goto fail;
2684
2685 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002686 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687 return posobj;
2688 }
2689
2690#if defined(HAVE_LARGEFILE_SUPPORT)
2691 cookie.start_pos = PyLong_AsLongLong(posobj);
2692#else
2693 cookie.start_pos = PyLong_AsLong(posobj);
2694#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002695 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 if (PyErr_Occurred())
2697 goto fail;
2698
2699 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002700 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002701 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002702 goto fail;
2703
2704 assert (PyBytes_Check(next_input));
2705
2706 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2707
2708 /* How many decoded characters have been used up since the snapshot? */
2709 if (self->decoded_chars_used == 0) {
2710 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002711 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002712 }
2713
2714 chars_to_skip = self->decoded_chars_used;
2715
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002716 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002717 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002718 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 if (saved_state == NULL)
2720 goto fail;
2721
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002722#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002723 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002724 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002725 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002726 if (_state == NULL) \
2727 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002728 if (!PyTuple_Check(_state)) { \
2729 PyErr_SetString(PyExc_TypeError, \
2730 "illegal decoder state"); \
2731 Py_DECREF(_state); \
2732 goto fail; \
2733 } \
2734 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2735 &dec_buffer, &dec_flags)) \
2736 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002737 Py_DECREF(_state); \
2738 goto fail; \
2739 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002740 if (!PyBytes_Check(dec_buffer)) { \
2741 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002742 "illegal decoder state: the first item should be a " \
2743 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002744 Py_TYPE(dec_buffer)->tp_name); \
2745 Py_DECREF(_state); \
2746 goto fail; \
2747 } \
2748 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002749 Py_DECREF(_state); \
2750 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002751
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002752#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002753 PyObject *_decoded = _PyObject_CallMethodId( \
2754 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002755 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002756 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002757 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002758 Py_DECREF(_decoded); \
2759 } while (0)
2760
2761 /* Fast search for an acceptable start point, close to our
2762 current pos */
2763 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2764 skip_back = 1;
2765 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2766 input = PyBytes_AS_STRING(next_input);
2767 while (skip_bytes > 0) {
2768 /* Decode up to temptative start point */
2769 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2770 goto fail;
2771 DECODER_DECODE(input, skip_bytes, chars_decoded);
2772 if (chars_decoded <= chars_to_skip) {
2773 DECODER_GETSTATE();
2774 if (dec_buffer_len == 0) {
2775 /* Before pos and no bytes buffered in decoder => OK */
2776 cookie.dec_flags = dec_flags;
2777 chars_to_skip -= chars_decoded;
2778 break;
2779 }
2780 /* Skip back by buffered amount and reset heuristic */
2781 skip_bytes -= dec_buffer_len;
2782 skip_back = 1;
2783 }
2784 else {
2785 /* We're too far ahead, skip back a bit */
2786 skip_bytes -= skip_back;
2787 skip_back *= 2;
2788 }
2789 }
2790 if (skip_bytes <= 0) {
2791 skip_bytes = 0;
2792 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2793 goto fail;
2794 }
2795
2796 /* Note our initial start point. */
2797 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002798 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002799 if (chars_to_skip == 0)
2800 goto finally;
2801
2802 /* We should be close to the desired position. Now feed the decoder one
2803 * byte at a time until we reach the `chars_to_skip` target.
2804 * As we go, note the nearest "safe start point" before the current
2805 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002806 * can safely start from there and advance to this location).
2807 */
2808 chars_decoded = 0;
2809 input = PyBytes_AS_STRING(next_input);
2810 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002811 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002812 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002813 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002814
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002815 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002816 /* We got n chars for 1 byte */
2817 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002818 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002819 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002820
2821 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2822 /* Decoder buffer is empty, so this is a safe start point. */
2823 cookie.start_pos += cookie.bytes_to_feed;
2824 chars_to_skip -= chars_decoded;
2825 cookie.dec_flags = dec_flags;
2826 cookie.bytes_to_feed = 0;
2827 chars_decoded = 0;
2828 }
2829 if (chars_decoded >= chars_to_skip)
2830 break;
2831 input++;
2832 }
2833 if (input == input_end) {
2834 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002835 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002836 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002837 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002838 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002839 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002840 Py_DECREF(decoded);
2841 cookie.need_eof = 1;
2842
2843 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002844 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002845 "can't reconstruct logical file position");
2846 goto fail;
2847 }
2848 }
2849
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002850finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002851 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002852 Py_DECREF(saved_state);
2853 if (res == NULL)
2854 return NULL;
2855 Py_DECREF(res);
2856
2857 /* The returned cookie corresponds to the last safe start point. */
2858 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002859 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002860
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002861fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002862 if (saved_state) {
2863 PyObject *type, *value, *traceback;
2864 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002865 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002866 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002867 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002868 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869 }
2870 return NULL;
2871}
2872
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002873/*[clinic input]
2874_io.TextIOWrapper.truncate
2875 pos: object = None
2876 /
2877[clinic start generated code]*/
2878
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002879static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002880_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2881/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002882{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002883 PyObject *res;
2884
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002885 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002886
Petr Viktorinffd97532020-02-11 17:46:57 +01002887 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002888 if (res == NULL)
2889 return NULL;
2890 Py_DECREF(res);
2891
Petr Viktorinffd97532020-02-11 17:46:57 +01002892 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002893}
2894
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002895static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002896textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002897{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002898 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002899 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002900
2901 CHECK_INITIALIZED(self);
2902
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002903 res = PyUnicode_FromString("<_io.TextIOWrapper");
2904 if (res == NULL)
2905 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002906
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002907 status = Py_ReprEnter((PyObject *)self);
2908 if (status != 0) {
2909 if (status > 0) {
2910 PyErr_Format(PyExc_RuntimeError,
2911 "reentrant call inside %s.__repr__",
2912 Py_TYPE(self)->tp_name);
2913 }
2914 goto error;
2915 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002916 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2917 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002918 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002919 }
2920 /* Ignore ValueError raised if the underlying stream was detached */
2921 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002922 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002923 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002924 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002925 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002926 if (s == NULL)
2927 goto error;
2928 PyUnicode_AppendAndDel(&res, s);
2929 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002930 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002931 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002932 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2933 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002934 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002935 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002936 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2937 Py_DECREF(modeobj);
2938 if (s == NULL)
2939 goto error;
2940 PyUnicode_AppendAndDel(&res, s);
2941 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002942 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002943 }
2944 s = PyUnicode_FromFormat("%U encoding=%R>",
2945 res, self->encoding);
2946 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002947 if (status == 0) {
2948 Py_ReprLeave((PyObject *)self);
2949 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002950 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002951
2952 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002953 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002954 if (status == 0) {
2955 Py_ReprLeave((PyObject *)self);
2956 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002957 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002958}
2959
2960
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002961/* Inquiries */
2962
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002963/*[clinic input]
2964_io.TextIOWrapper.fileno
2965[clinic start generated code]*/
2966
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002967static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002968_io_TextIOWrapper_fileno_impl(textio *self)
2969/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002970{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002971 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002972 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973}
2974
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002975/*[clinic input]
2976_io.TextIOWrapper.seekable
2977[clinic start generated code]*/
2978
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002979static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002980_io_TextIOWrapper_seekable_impl(textio *self)
2981/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002982{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002983 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002984 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002985}
2986
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002987/*[clinic input]
2988_io.TextIOWrapper.readable
2989[clinic start generated code]*/
2990
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002991static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002992_io_TextIOWrapper_readable_impl(textio *self)
2993/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002995 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002996 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002997}
2998
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002999/*[clinic input]
3000_io.TextIOWrapper.writable
3001[clinic start generated code]*/
3002
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003003static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003004_io_TextIOWrapper_writable_impl(textio *self)
3005/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003006{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003007 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003008 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009}
3010
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003011/*[clinic input]
3012_io.TextIOWrapper.isatty
3013[clinic start generated code]*/
3014
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003015static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003016_io_TextIOWrapper_isatty_impl(textio *self)
3017/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003018{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003019 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003020 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003021}
3022
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003023/*[clinic input]
3024_io.TextIOWrapper.flush
3025[clinic start generated code]*/
3026
Antoine Pitrou243757e2010-11-05 21:15:39 +00003027static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003028_io_TextIOWrapper_flush_impl(textio *self)
3029/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003030{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003031 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003032 CHECK_CLOSED(self);
3033 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003034 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003035 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003036 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037}
3038
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003039/*[clinic input]
3040_io.TextIOWrapper.close
3041[clinic start generated code]*/
3042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003043static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003044_io_TextIOWrapper_close_impl(textio *self)
3045/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003046{
3047 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003048 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003049 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003050
Antoine Pitrou6be88762010-05-03 16:48:20 +00003051 res = textiowrapper_closed_get(self, NULL);
3052 if (res == NULL)
3053 return NULL;
3054 r = PyObject_IsTrue(res);
3055 Py_DECREF(res);
3056 if (r < 0)
3057 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003058
Antoine Pitrou6be88762010-05-03 16:48:20 +00003059 if (r > 0) {
3060 Py_RETURN_NONE; /* stream already closed */
3061 }
3062 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003063 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003064 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003065 res = _PyObject_CallMethodIdOneArg(self->buffer,
3066 &PyId__dealloc_warn,
3067 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003068 if (res)
3069 Py_DECREF(res);
3070 else
3071 PyErr_Clear();
3072 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003073 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003074 if (res == NULL)
3075 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003076 else
3077 Py_DECREF(res);
3078
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003079 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003080 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003081 _PyErr_ChainExceptions(exc, val, tb);
3082 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003083 }
3084 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003085 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003086}
3087
3088static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003089textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003090{
3091 PyObject *line;
3092
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003093 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003094
3095 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003096 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003097 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003098 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003099 }
3100 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003101 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003102 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003103 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003104 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003105 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003106 "not '%.200s'", Py_TYPE(line)->tp_name);
3107 Py_DECREF(line);
3108 return NULL;
3109 }
3110 }
3111
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003112 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003113 return NULL;
3114
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003115 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003116 /* Reached EOF or would have blocked */
3117 Py_DECREF(line);
3118 Py_CLEAR(self->snapshot);
3119 self->telling = self->seekable;
3120 return NULL;
3121 }
3122
3123 return line;
3124}
3125
3126static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003127textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003128{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003129 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003130 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003131}
3132
3133static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003134textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003135{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003136 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003137 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3138}
3139
3140static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003141textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003142{
3143 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003144 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003145 if (self->decoder == NULL ||
3146 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3147 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003148 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003149 }
3150 return res;
3151}
3152
3153static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003154textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003155{
3156 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003157 Py_INCREF(self->errors);
3158 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003159}
3160
3161static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003162textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003163{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003164 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003165 return PyLong_FromSsize_t(self->chunk_size);
3166}
3167
3168static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003169textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003170{
3171 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003172 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003173 if (arg == NULL) {
3174 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3175 return -1;
3176 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003177 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003178 if (n == -1 && PyErr_Occurred())
3179 return -1;
3180 if (n <= 0) {
3181 PyErr_SetString(PyExc_ValueError,
3182 "a strictly positive integer is required");
3183 return -1;
3184 }
3185 self->chunk_size = n;
3186 return 0;
3187}
3188
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003189#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003190
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003191static PyMethodDef incrementalnewlinedecoder_methods[] = {
3192 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3193 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3194 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3195 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3196 {NULL}
3197};
3198
3199static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3200 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3201 {NULL}
3202};
3203
3204PyTypeObject PyIncrementalNewlineDecoder_Type = {
3205 PyVarObject_HEAD_INIT(NULL, 0)
3206 "_io.IncrementalNewlineDecoder", /*tp_name*/
3207 sizeof(nldecoder_object), /*tp_basicsize*/
3208 0, /*tp_itemsize*/
3209 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003210 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003211 0, /*tp_getattr*/
3212 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003213 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003214 0, /*tp_repr*/
3215 0, /*tp_as_number*/
3216 0, /*tp_as_sequence*/
3217 0, /*tp_as_mapping*/
3218 0, /*tp_hash */
3219 0, /*tp_call*/
3220 0, /*tp_str*/
3221 0, /*tp_getattro*/
3222 0, /*tp_setattro*/
3223 0, /*tp_as_buffer*/
3224 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3225 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3226 0, /* tp_traverse */
3227 0, /* tp_clear */
3228 0, /* tp_richcompare */
3229 0, /*tp_weaklistoffset*/
3230 0, /* tp_iter */
3231 0, /* tp_iternext */
3232 incrementalnewlinedecoder_methods, /* tp_methods */
3233 0, /* tp_members */
3234 incrementalnewlinedecoder_getset, /* tp_getset */
3235 0, /* tp_base */
3236 0, /* tp_dict */
3237 0, /* tp_descr_get */
3238 0, /* tp_descr_set */
3239 0, /* tp_dictoffset */
3240 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3241 0, /* tp_alloc */
3242 PyType_GenericNew, /* tp_new */
3243};
3244
3245
3246static PyMethodDef textiowrapper_methods[] = {
3247 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003248 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003249 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3250 _IO_TEXTIOWRAPPER_READ_METHODDEF
3251 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3252 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3253 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3254
3255 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3256 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3257 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3258 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3259 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003260
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003261 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3262 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3263 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003264 {NULL, NULL}
3265};
3266
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003267static PyMemberDef textiowrapper_members[] = {
3268 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3269 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3270 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003271 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003272 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003273 {NULL}
3274};
3275
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003276static PyGetSetDef textiowrapper_getset[] = {
3277 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3278 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003279/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3280*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003281 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3282 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3283 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3284 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003285 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003286};
3287
3288PyTypeObject PyTextIOWrapper_Type = {
3289 PyVarObject_HEAD_INIT(NULL, 0)
3290 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003291 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003292 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003293 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003294 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003295 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003296 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003297 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003298 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003299 0, /*tp_as_number*/
3300 0, /*tp_as_sequence*/
3301 0, /*tp_as_mapping*/
3302 0, /*tp_hash */
3303 0, /*tp_call*/
3304 0, /*tp_str*/
3305 0, /*tp_getattro*/
3306 0, /*tp_setattro*/
3307 0, /*tp_as_buffer*/
3308 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003309 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003310 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003311 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3312 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003313 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003314 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003315 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003316 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3317 textiowrapper_methods, /* tp_methods */
3318 textiowrapper_members, /* tp_members */
3319 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003320 0, /* tp_base */
3321 0, /* tp_dict */
3322 0, /* tp_descr_get */
3323 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003324 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003325 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003326 0, /* tp_alloc */
3327 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003328 0, /* tp_free */
3329 0, /* tp_is_gc */
3330 0, /* tp_bases */
3331 0, /* tp_mro */
3332 0, /* tp_cache */
3333 0, /* tp_subclasses */
3334 0, /* tp_weaklist */
3335 0, /* tp_del */
3336 0, /* tp_version_tag */
3337 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003338};