blob: 6f89a879c9c2bf233cf85d00b2331d894c43b1af [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "pycore_interp.h" // PyInterpreterState.fs_codec
Victor Stinner37834132020-10-27 17:12:53 +010012#include "pycore_long.h" // _PyLong_GetZero()
Victor Stinner710e8262020-10-31 01:02:09 +010013#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
Victor Stinnerbcda8f12018-11-21 22:27:47 +010014#include "pycore_object.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020015#include "pycore_pystate.h" // _PyInterpreterState_GET()
16#include "structmember.h" // PyMemberDef
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017#include "_iomodule.h"
18
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030019/*[clinic input]
20module _io
21class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
23[clinic start generated code]*/
24/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
25
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(close);
27_Py_IDENTIFIER(_dealloc_warn);
28_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020029_Py_IDENTIFIER(fileno);
30_Py_IDENTIFIER(flush);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020032_Py_IDENTIFIER(mode);
33_Py_IDENTIFIER(name);
34_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020035_Py_IDENTIFIER(read);
36_Py_IDENTIFIER(readable);
37_Py_IDENTIFIER(replace);
38_Py_IDENTIFIER(reset);
39_Py_IDENTIFIER(seek);
40_Py_IDENTIFIER(seekable);
41_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090042_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020043_Py_IDENTIFIER(tell);
44_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020045
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046/* TextIOBase */
47
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000048PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049 "Base class for text I/O.\n"
50 "\n"
51 "This class provides a character and line based interface to stream\n"
52 "I/O. There is no readinto method because Python's character strings\n"
53 "are immutable. There is no public constructor.\n"
54 );
55
56static PyObject *
57_unsupported(const char *message)
58{
Antoine Pitrou712cb732013-12-21 15:51:54 +010059 _PyIO_State *state = IO_STATE();
60 if (state != NULL)
61 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 return NULL;
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000066 "Separate the underlying buffer from the TextIOBase and return it.\n"
67 "\n"
68 "After the underlying buffer has been detached, the TextIO is in an\n"
69 "unusable state.\n"
70 );
71
72static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053073textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000074{
75 return _unsupported("detach");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read at most n characters from stream.\n"
80 "\n"
81 "Read from underlying buffer until we have n characters or we hit EOF.\n"
82 "If n is negative or omitted, read until EOF.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("read");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Read until newline or EOF.\n"
93 "\n"
94 "Returns an empty string if EOF is hit immediately.\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("readline");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Write string to stream.\n"
105 "Returns the number of characters written (which is always equal to\n"
106 "the length of the string).\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 return _unsupported("write");
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Encoding of the text stream.\n"
117 "\n"
118 "Subclasses should override.\n"
119 );
120
121static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000122textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123{
124 Py_RETURN_NONE;
125}
126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000127PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000128 "Line endings translated so far.\n"
129 "\n"
130 "Only line endings translated during reading are considered.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000142 "The error setting of the decoder or encoder.\n"
143 "\n"
144 "Subclasses should override.\n"
145 );
146
147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000148textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000149{
150 Py_RETURN_NONE;
151}
152
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000154static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530155 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
157 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
158 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000159 {NULL, NULL}
160};
161
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000162static PyGetSetDef textiobase_getset[] = {
163 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
164 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
165 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000166 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167};
168
169PyTypeObject PyTextIOBase_Type = {
170 PyVarObject_HEAD_INIT(NULL, 0)
171 "_io._TextIOBase", /*tp_name*/
172 0, /*tp_basicsize*/
173 0, /*tp_itemsize*/
174 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200175 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 0, /*tp_getattr*/
177 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200178 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash */
184 0, /*tp_call*/
185 0, /*tp_str*/
186 0, /*tp_getattro*/
187 0, /*tp_setattro*/
188 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000190 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 0, /* tp_traverse */
192 0, /* tp_clear */
193 0, /* tp_richcompare */
194 0, /* tp_weaklistoffset */
195 0, /* tp_iter */
196 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 &PyIOBase_Type, /* tp_base */
201 0, /* tp_dict */
202 0, /* tp_descr_get */
203 0, /* tp_descr_set */
204 0, /* tp_dictoffset */
205 0, /* tp_init */
206 0, /* tp_alloc */
207 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200208 0, /* tp_free */
209 0, /* tp_is_gc */
210 0, /* tp_bases */
211 0, /* tp_mro */
212 0, /* tp_cache */
213 0, /* tp_subclasses */
214 0, /* tp_weaklist */
215 0, /* tp_del */
216 0, /* tp_version_tag */
217 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218};
219
220
221/* IncrementalNewlineDecoder */
222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300232/*[clinic input]
233_io.IncrementalNewlineDecoder.__init__
234 decoder: object
235 translate: int
236 errors: object(c_default="NULL") = "strict"
237
238Codec used when reading a file in universal newlines mode.
239
240It wraps another incremental decoder, translating \r\n and \r into \n.
241It also records the types of newlines encountered. When used with
242translate=False, it ensures that the newline sequence is returned in
243one piece. When used with decoder=None, it expects unicode strings as
244decode input and translates newlines without first invoking an external
245decoder.
246[clinic start generated code]*/
247
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000248static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300249_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
250 PyObject *decoder, int translate,
251 PyObject *errors)
252/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254 self->decoder = decoder;
255 Py_INCREF(decoder);
256
257 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900258 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259 if (self->errors == NULL)
260 return -1;
261 }
262 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263 self->errors = errors;
264 }
INADA Naoki507434f2017-12-21 09:59:53 +0900265 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266
Xiang Zhangb08746b2018-10-31 19:49:16 +0800267 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 self->seennl = 0;
269 self->pendingcr = 0;
270
271 return 0;
272}
273
274static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000275incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276{
277 Py_CLEAR(self->decoder);
278 Py_CLEAR(self->errors);
279 Py_TYPE(self)->tp_free((PyObject *)self);
280}
281
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200282static int
283check_decoded(PyObject *decoded)
284{
285 if (decoded == NULL)
286 return -1;
287 if (!PyUnicode_Check(decoded)) {
288 PyErr_Format(PyExc_TypeError,
289 "decoder should return a string result, not '%.200s'",
290 Py_TYPE(decoded)->tp_name);
291 Py_DECREF(decoded);
292 return -1;
293 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200294 if (PyUnicode_READY(decoded) < 0) {
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200298 return 0;
299}
300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301#define SEEN_CR 1
302#define SEEN_LF 2
303#define SEEN_CRLF 4
304#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
305
306PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200307_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 PyObject *input, int final)
309{
310 PyObject *output;
311 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200312 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314 if (self->decoder == NULL) {
315 PyErr_SetString(PyExc_ValueError,
316 "IncrementalNewlineDecoder.__init__ not called");
317 return NULL;
318 }
319
320 /* decode input (with the eventual \r from a previous pass) */
321 if (self->decoder != Py_None) {
322 output = PyObject_CallMethodObjArgs(self->decoder,
323 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
324 }
325 else {
326 output = input;
327 Py_INCREF(output);
328 }
329
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200330 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 return NULL;
332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 /* Prefix output with CR */
336 int kind;
337 PyObject *modified;
338 char *out;
339
340 modified = PyUnicode_New(output_len + 1,
341 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 if (modified == NULL)
343 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 kind = PyUnicode_KIND(modified);
345 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300346 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200347 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200349 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self->pendingcr = 0;
351 output_len++;
352 }
353
354 /* retain last \r even when not translating data:
355 * then readline() is sure to get \r\n in one pass
356 */
357 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000358 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
360 {
361 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
362 if (modified == NULL)
363 goto error;
364 Py_DECREF(output);
365 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 self->pendingcr = 1;
367 }
368 }
369
370 /* Record which newlines are read and do newline translation if desired,
371 all in one pass. */
372 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300373 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 Py_ssize_t len;
375 int seennl = self->seennl;
376 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 in_str = PyUnicode_DATA(output);
380 len = PyUnicode_GET_LENGTH(output);
381 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
383 if (len == 0)
384 return output;
385
386 /* If, up to now, newlines are consistently \n, do a quick check
387 for the \r *byte* with the libc's optimized memchr.
388 */
389 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200390 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 }
392
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 if (only_lf) {
394 /* If not already seen, quick scan for a possible "\n" character.
395 (there's nothing else to be done, even when in translation mode)
396 */
397 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200398 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100399 if (kind == PyUnicode_1BYTE_KIND)
400 seennl |= SEEN_LF;
401 else {
402 Py_ssize_t i = 0;
403 for (;;) {
404 Py_UCS4 c;
405 /* Fast loop for non-control characters */
406 while (PyUnicode_READ(kind, in_str, i) > '\n')
407 i++;
408 c = PyUnicode_READ(kind, in_str, i++);
409 if (c == '\n') {
410 seennl |= SEEN_LF;
411 break;
412 }
413 if (i >= len)
414 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000416 }
417 }
418 /* Finished: we have scanned for newlines, and none of them
419 need translating */
420 }
421 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200422 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000423 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 if (seennl == SEEN_ALL)
425 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 while (PyUnicode_READ(kind, in_str, i) > '\r')
430 i++;
431 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 if (c == '\n')
433 seennl |= SEEN_LF;
434 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 }
439 else
440 seennl |= SEEN_CR;
441 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 break;
444 if (seennl == SEEN_ALL)
445 break;
446 }
447 endscan:
448 ;
449 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000450 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 void *translated;
452 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300453 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 Py_ssize_t in, out;
455 /* XXX: Previous in-place translation here is disabled as
456 resizing is not possible anymore */
457 /* We could try to optimize this so that we only do a copy
458 when there is something to translate. On the other hand,
459 we already know there is a \r byte, so chances are high
460 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200461 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (translated == NULL) {
463 PyErr_NoMemory();
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 seennl |= SEEN_LF;
475 continue;
476 }
477 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 in++;
480 seennl |= SEEN_CRLF;
481 }
482 else
483 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 continue;
486 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_DECREF(output);
492 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100493 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200495 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000496 }
497 self->seennl |= seennl;
498 }
499
500 return output;
501
502 error:
503 Py_DECREF(output);
504 return NULL;
505}
506
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300507/*[clinic input]
508_io.IncrementalNewlineDecoder.decode
509 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200510 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300513static PyObject *
514_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
515 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200516/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
519}
520
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300521/*[clinic input]
522_io.IncrementalNewlineDecoder.getstate
523[clinic start generated code]*/
524
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300526_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
527/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528{
529 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700530 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000531
532 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100533 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200534 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535 if (state == NULL)
536 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300537 if (!PyTuple_Check(state)) {
538 PyErr_SetString(PyExc_TypeError,
539 "illegal decoder state");
540 Py_DECREF(state);
541 return NULL;
542 }
543 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
544 &buffer, &flag))
545 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546 Py_DECREF(state);
547 return NULL;
548 }
549 Py_INCREF(buffer);
550 Py_DECREF(state);
551 }
552 else {
553 buffer = PyBytes_FromString("");
554 flag = 0;
555 }
556 flag <<= 1;
557 if (self->pendingcr)
558 flag |= 1;
559 return Py_BuildValue("NK", buffer, flag);
560}
561
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300562/*[clinic input]
563_io.IncrementalNewlineDecoder.setstate
564 state: object
565 /
566[clinic start generated code]*/
567
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300569_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
570 PyObject *state)
571/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700574 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 if (!PyTuple_Check(state)) {
577 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300579 }
580 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
581 &buffer, &flag))
582 {
583 return NULL;
584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585
Victor Stinner7d7e7752014-06-17 23:31:25 +0200586 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 flag >>= 1;
588
589 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200590 return _PyObject_CallMethodId(self->decoder,
591 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 else
593 Py_RETURN_NONE;
594}
595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300596/*[clinic input]
597_io.IncrementalNewlineDecoder.reset
598[clinic start generated code]*/
599
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300601_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 self->seennl = 0;
605 self->pendingcr = 0;
606 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100607 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 else
609 Py_RETURN_NONE;
610}
611
612static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614{
615 switch (self->seennl) {
616 case SEEN_CR:
617 return PyUnicode_FromString("\r");
618 case SEEN_LF:
619 return PyUnicode_FromString("\n");
620 case SEEN_CRLF:
621 return PyUnicode_FromString("\r\n");
622 case SEEN_CR | SEEN_LF:
623 return Py_BuildValue("ss", "\r", "\n");
624 case SEEN_CR | SEEN_CRLF:
625 return Py_BuildValue("ss", "\r", "\r\n");
626 case SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("ss", "\n", "\r\n");
628 case SEEN_CR | SEEN_LF | SEEN_CRLF:
629 return Py_BuildValue("sss", "\r", "\n", "\r\n");
630 default:
631 Py_RETURN_NONE;
632 }
633
634}
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636/* TextIOWrapper */
637
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000638typedef PyObject *
639 (*encodefunc_t)(PyObject *, PyObject *);
640
641typedef struct
642{
643 PyObject_HEAD
644 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000645 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 Py_ssize_t chunk_size;
647 PyObject *buffer;
648 PyObject *encoding;
649 PyObject *encoder;
650 PyObject *decoder;
651 PyObject *readnl;
652 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900653 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200655 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200660 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200662 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 /* Specialized encoding func (see below) */
664 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000665 /* Whether or not it's the start of the stream */
666 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 /* Reads and writes are internally buffered in order to speed things up.
669 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 Please also note that text to be written is first encoded before being
672 buffered. This is necessary so that encoding errors are immediately
673 reported to the caller, but it unfortunately means that the
674 IncrementalEncoder (whose encode() method is always written in Python)
675 becomes a bottleneck for small writes.
676 */
677 PyObject *decoded_chars; /* buffer for text returned from decoder */
678 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900679 PyObject *pending_bytes; // data waiting to be written.
680 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000682
Oren Milman13614e32017-08-24 19:51:24 +0300683 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 * dec_flags is the second (integer) item of the decoder state and
685 * next_input is the chunk of input bytes that comes next after the
686 * snapshot point. We use this to reconstruct decoder states in tell().
687 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000688 PyObject *snapshot;
689 /* Bytes-to-characters ratio for the current chunk. Serves as input for
690 the heuristic in tell(). */
691 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692
693 /* Cache raw object if it's a FileIO object */
694 PyObject *raw;
695
696 PyObject *weakreflist;
697 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000698} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
Zackery Spytz23db9352018-06-29 04:14:58 -0600700static void
701textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703/* A couple of specialized cases in order to bypass the slow incremental
704 encoding methods for the most popular encodings. */
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
INADA Naoki507434f2017-12-21 09:59:53 +0900709 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100715 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900716 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717}
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100722 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900723 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724}
725
726static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000727utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728{
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 if (!self->encoding_start_of_stream) {
730 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200731#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000736 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900738 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
Antoine Pitroue4501852009-05-14 18:55:55 +0000741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900745 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900752 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000757{
758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200760#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf32be_encode(self, text);
762#else
763 return utf32le_encode(self, text);
764#endif
765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900767 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
INADA Naoki507434f2017-12-21 09:59:53 +0900773 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000777latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778{
INADA Naoki507434f2017-12-21 09:59:53 +0900779 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780}
781
Inada Naokibfba8c32019-05-16 15:03:20 +0900782// Return true when encoding can be skipped when text is ascii.
783static inline int
784is_asciicompat_encoding(encodefunc_t f)
785{
786 return f == (encodefunc_t) ascii_encode
787 || f == (encodefunc_t) latin1_encode
788 || f == (encodefunc_t) utf8_encode;
789}
790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791/* Map normalized encoding names onto the specialized encoding funcs */
792
793typedef struct {
794 const char *name;
795 encodefunc_t encodefunc;
796} encodefuncentry;
797
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200798static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"ascii", (encodefunc_t) ascii_encode},
800 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000801 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"utf-16-be", (encodefunc_t) utf16be_encode},
803 {"utf-16-le", (encodefunc_t) utf16le_encode},
804 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-32-be", (encodefunc_t) utf32be_encode},
806 {"utf-32-le", (encodefunc_t) utf32le_encode},
807 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 {NULL, NULL}
809};
810
INADA Naoki507434f2017-12-21 09:59:53 +0900811static int
812validate_newline(const char *newline)
813{
814 if (newline && newline[0] != '\0'
815 && !(newline[0] == '\n' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\0')
817 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
818 PyErr_Format(PyExc_ValueError,
819 "illegal newline value: %s", newline);
820 return -1;
821 }
822 return 0;
823}
824
825static int
826set_newline(textio *self, const char *newline)
827{
828 PyObject *old = self->readnl;
829 if (newline == NULL) {
830 self->readnl = NULL;
831 }
832 else {
833 self->readnl = PyUnicode_FromString(newline);
834 if (self->readnl == NULL) {
835 self->readnl = old;
836 return -1;
837 }
838 }
839 self->readuniversal = (newline == NULL || newline[0] == '\0');
840 self->readtranslate = (newline == NULL);
841 self->writetranslate = (newline == NULL || newline[0] != '\0');
842 if (!self->readuniversal && self->readnl != NULL) {
843 // validate_newline() accepts only ASCII newlines.
844 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
845 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
846 if (strcmp(self->writenl, "\n") == 0) {
847 self->writenl = NULL;
848 }
849 }
850 else {
851#ifdef MS_WINDOWS
852 self->writenl = "\r\n";
853#else
854 self->writenl = NULL;
855#endif
856 }
857 Py_XDECREF(old);
858 return 0;
859}
860
861static int
862_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
863 const char *errors)
864{
865 PyObject *res;
866 int r;
867
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200868 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900869 if (res == NULL)
870 return -1;
871
872 r = PyObject_IsTrue(res);
873 Py_DECREF(res);
874 if (r == -1)
875 return -1;
876
877 if (r != 1)
878 return 0;
879
880 Py_CLEAR(self->decoder);
881 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
882 if (self->decoder == NULL)
883 return -1;
884
885 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300886 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900887 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300888 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900889 if (incrementalDecoder == NULL)
890 return -1;
891 Py_CLEAR(self->decoder);
892 self->decoder = incrementalDecoder;
893 }
894
895 return 0;
896}
897
898static PyObject*
899_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
900{
901 PyObject *chars;
902
Andy Lesterdffe4c02020-03-04 07:15:20 -0600903 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900904 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
905 else
906 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
907 eof ? Py_True : Py_False, NULL);
908
909 if (check_decoded(chars) < 0)
910 // check_decoded already decreases refcount
911 return NULL;
912
913 return chars;
914}
915
916static int
917_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
918 const char *errors)
919{
920 PyObject *res;
921 int r;
922
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200923 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900924 if (res == NULL)
925 return -1;
926
927 r = PyObject_IsTrue(res);
928 Py_DECREF(res);
929 if (r == -1)
930 return -1;
931
932 if (r != 1)
933 return 0;
934
935 Py_CLEAR(self->encoder);
936 self->encodefunc = NULL;
937 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
938 if (self->encoder == NULL)
939 return -1;
940
941 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
943 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900944 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200945 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900946 const encodefuncentry *e = encodefuncs;
947 while (e->name != NULL) {
948 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
949 self->encodefunc = e->encodefunc;
950 break;
951 }
952 e++;
953 }
954 }
955 Py_XDECREF(res);
956
957 return 0;
958}
959
960static int
961_textiowrapper_fix_encoder_state(textio *self)
962{
963 if (!self->seekable || !self->encoder) {
964 return 0;
965 }
966
967 self->encoding_start_of_stream = 1;
968
Petr Viktorinffd97532020-02-11 17:46:57 +0100969 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200970 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900971 if (cookieObj == NULL) {
972 return -1;
973 }
974
Victor Stinner37834132020-10-27 17:12:53 +0100975 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
INADA Naoki507434f2017-12-21 09:59:53 +0900976 Py_DECREF(cookieObj);
977 if (cmp < 0) {
978 return -1;
979 }
980
981 if (cmp == 0) {
982 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100983 PyObject *res = PyObject_CallMethodOneArg(
Victor Stinner37834132020-10-27 17:12:53 +0100984 self->encoder, _PyIO_str_setstate, _PyLong_GetZero());
INADA Naoki507434f2017-12-21 09:59:53 +0900985 if (res == NULL) {
986 return -1;
987 }
988 Py_DECREF(res);
989 }
990
991 return 0;
992}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993
Victor Stinner22eb6892019-06-26 00:51:05 +0200994static int
995io_check_errors(PyObject *errors)
996{
997 assert(errors != NULL && errors != Py_None);
998
Victor Stinner81a7be32020-04-14 15:14:01 +0200999 PyInterpreterState *interp = _PyInterpreterState_GET();
Victor Stinner22eb6892019-06-26 00:51:05 +02001000#ifndef Py_DEBUG
1001 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001002 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001003 return 0;
1004 }
1005#else
1006 /* Always check in debug mode */
1007#endif
1008
1009 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1010 before_PyUnicode_InitEncodings() is called. */
Victor Stinner3d17c042020-05-14 01:48:38 +02001011 if (!interp->unicode.fs_codec.encoding) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001012 return 0;
1013 }
1014
1015 Py_ssize_t name_length;
1016 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1017 if (name == NULL) {
1018 return -1;
1019 }
1020 if (strlen(name) != (size_t)name_length) {
1021 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1022 return -1;
1023 }
1024 PyObject *handler = PyCodec_LookupError(name);
1025 if (handler != NULL) {
1026 Py_DECREF(handler);
1027 return 0;
1028 }
1029 return -1;
1030}
1031
1032
1033
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001034/*[clinic input]
1035_io.TextIOWrapper.__init__
1036 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001037 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001038 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001039 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001040 line_buffering: bool(accept={int}) = False
1041 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001043Character and line based layer over a BufferedIOBase object, buffer.
1044
1045encoding gives the name of the encoding that the stream will be
1046decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1047
1048errors determines the strictness of encoding and decoding (see
1049help(codecs.Codec) or the documentation for codecs.register) and
1050defaults to "strict".
1051
1052newline controls how line endings are handled. It can be None, '',
1053'\n', '\r', and '\r\n'. It works as follows:
1054
1055* On input, if newline is None, universal newlines mode is
1056 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1057 these are translated into '\n' before being returned to the
1058 caller. If it is '', universal newline mode is enabled, but line
1059 endings are returned to the caller untranslated. If it has any of
1060 the other legal values, input lines are only terminated by the given
1061 string, and the line ending is returned to the caller untranslated.
1062
1063* On output, if newline is None, any '\n' characters written are
1064 translated to the system default line separator, os.linesep. If
1065 newline is '' or '\n', no translation takes place. If newline is any
1066 of the other legal values, any '\n' characters written are translated
1067 to the given string.
1068
1069If line_buffering is True, a call to flush is implied when a call to
1070write contains a newline character.
1071[clinic start generated code]*/
1072
1073static int
1074_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001075 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001076 const char *newline, int line_buffering,
1077 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001078/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001079{
1080 PyObject *raw, *codec_info = NULL;
1081 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 PyObject *res;
1083 int r;
1084
1085 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001086 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
INADA Naoki507434f2017-12-21 09:59:53 +09001088 if (errors == Py_None) {
1089 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001090 if (errors == NULL) {
1091 return -1;
1092 }
INADA Naoki507434f2017-12-21 09:59:53 +09001093 }
1094 else if (!PyUnicode_Check(errors)) {
1095 // Check 'errors' argument here because Argument Clinic doesn't support
1096 // 'str(accept={str, NoneType})' converter.
1097 PyErr_Format(
1098 PyExc_TypeError,
1099 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001100 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001101 return -1;
1102 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001103 else if (io_check_errors(errors)) {
1104 return -1;
1105 }
INADA Naoki507434f2017-12-21 09:59:53 +09001106
1107 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 return -1;
1109 }
1110
1111 Py_CLEAR(self->buffer);
1112 Py_CLEAR(self->encoding);
1113 Py_CLEAR(self->encoder);
1114 Py_CLEAR(self->decoder);
1115 Py_CLEAR(self->readnl);
1116 Py_CLEAR(self->decoded_chars);
1117 Py_CLEAR(self->pending_bytes);
1118 Py_CLEAR(self->snapshot);
1119 Py_CLEAR(self->errors);
1120 Py_CLEAR(self->raw);
1121 self->decoded_chars_used = 0;
1122 self->pending_bytes_count = 0;
1123 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001124 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125
1126 if (encoding == NULL) {
Inada Naoki48274832021-03-29 12:28:14 +09001127 PyInterpreterState *interp = _PyInterpreterState_GET();
1128 if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1129 PyErr_WarnEx(PyExc_EncodingWarning,
1130 "'encoding' argument not specified", 1);
1131 }
1132 }
1133 else if (strcmp(encoding, "locale") == 0) {
1134 encoding = NULL;
1135 }
1136
1137 if (encoding == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 /* Try os.device_encoding(fileno) */
1139 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001140 state = IO_STATE();
1141 if (state == NULL)
1142 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001143 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144 /* Ignore only AttributeError and UnsupportedOperation */
1145 if (fileno == NULL) {
1146 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1147 PyErr_ExceptionMatches(state->unsupported_operation)) {
1148 PyErr_Clear();
1149 }
1150 else {
1151 goto error;
1152 }
1153 }
1154 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001155 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001156 Py_DECREF(fileno);
1157 if (fd == -1 && PyErr_Occurred()) {
1158 goto error;
1159 }
1160
1161 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 if (self->encoding == NULL)
1163 goto error;
1164 else if (!PyUnicode_Check(self->encoding))
1165 Py_CLEAR(self->encoding);
1166 }
1167 }
1168 if (encoding == NULL && self->encoding == NULL) {
Victor Stinner82458b62020-11-01 20:59:35 +01001169 self->encoding = _Py_GetLocaleEncodingObject();
Antoine Pitrou932ff832013-08-01 21:04:50 +02001170 if (self->encoding == NULL) {
Victor Stinner710e8262020-10-31 01:02:09 +01001171 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 }
Victor Stinner710e8262020-10-31 01:02:09 +01001173 assert(PyUnicode_Check(self->encoding));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001175 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001176 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001177 if (encoding == NULL)
1178 goto error;
1179 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 else if (encoding != NULL) {
1181 self->encoding = PyUnicode_FromString(encoding);
1182 if (self->encoding == NULL)
1183 goto error;
1184 }
1185 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001186 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001188 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189 }
1190
Nick Coghlana9b15242014-02-04 22:11:18 +10001191 /* Check we have been asked for a real text encoding */
1192 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1193 if (codec_info == NULL) {
1194 Py_CLEAR(self->encoding);
1195 goto error;
1196 }
1197
1198 /* XXX: Failures beyond this point have the potential to leak elements
1199 * of the partially constructed object (like self->encoding)
1200 */
1201
INADA Naoki507434f2017-12-21 09:59:53 +09001202 Py_INCREF(errors);
1203 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001205 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001206 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001207 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001208 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 }
1210
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 self->buffer = buffer;
1212 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001213
INADA Naoki507434f2017-12-21 09:59:53 +09001214 /* Build the decoder object */
1215 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1216 goto error;
1217
1218 /* Build the encoder object */
1219 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1220 goto error;
1221
1222 /* Finished sorting out the codec details */
1223 Py_CLEAR(codec_info);
1224
Andy Lesterdffe4c02020-03-04 07:15:20 -06001225 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1226 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1227 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001228 {
1229 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1230 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001232 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001233 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001234 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001235 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001236 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001237 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 }
1239
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001240 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 if (res == NULL)
1242 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001243 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001245 if (r < 0)
1246 goto error;
1247 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001249 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1250 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001251 goto error;
1252 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001253 Py_XDECREF(res);
1254 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001255
Antoine Pitroue4501852009-05-14 18:55:55 +00001256 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001257 if (_textiowrapper_fix_encoder_state(self) < 0) {
1258 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001259 }
1260
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261 self->ok = 1;
1262 return 0;
1263
1264 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001265 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 return -1;
1267}
1268
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001269/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1270 * -1 on error.
1271 */
1272static int
1273convert_optional_bool(PyObject *obj, int default_value)
1274{
1275 long v;
1276 if (obj == Py_None) {
1277 v = default_value;
1278 }
1279 else {
1280 v = PyLong_AsLong(obj);
1281 if (v == -1 && PyErr_Occurred())
1282 return -1;
1283 }
1284 return v != 0;
1285}
1286
INADA Naoki507434f2017-12-21 09:59:53 +09001287static int
1288textiowrapper_change_encoding(textio *self, PyObject *encoding,
1289 PyObject *errors, int newline_changed)
1290{
1291 /* Use existing settings where new settings are not specified */
1292 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1293 return 0; // no change
1294 }
1295
1296 if (encoding == Py_None) {
1297 encoding = self->encoding;
1298 if (errors == Py_None) {
1299 errors = self->errors;
1300 }
1301 }
1302 else if (errors == Py_None) {
1303 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001304 if (errors == NULL) {
1305 return -1;
1306 }
INADA Naoki507434f2017-12-21 09:59:53 +09001307 }
1308
1309 const char *c_errors = PyUnicode_AsUTF8(errors);
1310 if (c_errors == NULL) {
1311 return -1;
1312 }
1313
1314 // Create new encoder & decoder
1315 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1316 PyUnicode_AsUTF8(encoding), "codecs.open()");
1317 if (codec_info == NULL) {
1318 return -1;
1319 }
1320 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1321 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1322 Py_DECREF(codec_info);
1323 return -1;
1324 }
1325 Py_DECREF(codec_info);
1326
1327 Py_INCREF(encoding);
1328 Py_INCREF(errors);
1329 Py_SETREF(self->encoding, encoding);
1330 Py_SETREF(self->errors, errors);
1331
1332 return _textiowrapper_fix_encoder_state(self);
1333}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001334
1335/*[clinic input]
1336_io.TextIOWrapper.reconfigure
1337 *
INADA Naoki507434f2017-12-21 09:59:53 +09001338 encoding: object = None
1339 errors: object = None
1340 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001341 line_buffering as line_buffering_obj: object = None
1342 write_through as write_through_obj: object = None
1343
1344Reconfigure the text stream with new parameters.
1345
1346This also does an implicit stream flush.
1347
1348[clinic start generated code]*/
1349
1350static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001351_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1352 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001353 PyObject *line_buffering_obj,
1354 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001355/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001356{
1357 int line_buffering;
1358 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001359 const char *newline = NULL;
1360
1361 /* Check if something is in the read buffer */
1362 if (self->decoded_chars != NULL) {
1363 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001364 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001365 "of stream after the first read");
1366 return NULL;
1367 }
1368 }
1369
1370 if (newline_obj != NULL && newline_obj != Py_None) {
1371 newline = PyUnicode_AsUTF8(newline_obj);
1372 if (newline == NULL || validate_newline(newline) < 0) {
1373 return NULL;
1374 }
1375 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001376
1377 line_buffering = convert_optional_bool(line_buffering_obj,
1378 self->line_buffering);
1379 write_through = convert_optional_bool(write_through_obj,
1380 self->write_through);
1381 if (line_buffering < 0 || write_through < 0) {
1382 return NULL;
1383 }
INADA Naoki507434f2017-12-21 09:59:53 +09001384
Petr Viktorinffd97532020-02-11 17:46:57 +01001385 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001386 if (res == NULL) {
1387 return NULL;
1388 }
INADA Naoki507434f2017-12-21 09:59:53 +09001389 Py_DECREF(res);
1390 self->b2cratio = 0;
1391
1392 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1393 return NULL;
1394 }
1395
1396 if (textiowrapper_change_encoding(
1397 self, encoding, errors, newline_obj != NULL) < 0) {
1398 return NULL;
1399 }
1400
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001401 self->line_buffering = line_buffering;
1402 self->write_through = write_through;
1403 Py_RETURN_NONE;
1404}
1405
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001406static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001407textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001409 self->ok = 0;
1410 Py_CLEAR(self->buffer);
1411 Py_CLEAR(self->encoding);
1412 Py_CLEAR(self->encoder);
1413 Py_CLEAR(self->decoder);
1414 Py_CLEAR(self->readnl);
1415 Py_CLEAR(self->decoded_chars);
1416 Py_CLEAR(self->pending_bytes);
1417 Py_CLEAR(self->snapshot);
1418 Py_CLEAR(self->errors);
1419 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001420
1421 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001422 return 0;
1423}
1424
1425static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001426textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001427{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001428 self->finalizing = 1;
1429 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001430 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001431 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432 _PyObject_GC_UNTRACK(self);
1433 if (self->weakreflist != NULL)
1434 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001435 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 Py_TYPE(self)->tp_free((PyObject *)self);
1437}
1438
1439static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001440textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001441{
1442 Py_VISIT(self->buffer);
1443 Py_VISIT(self->encoding);
1444 Py_VISIT(self->encoder);
1445 Py_VISIT(self->decoder);
1446 Py_VISIT(self->readnl);
1447 Py_VISIT(self->decoded_chars);
1448 Py_VISIT(self->pending_bytes);
1449 Py_VISIT(self->snapshot);
1450 Py_VISIT(self->errors);
1451 Py_VISIT(self->raw);
1452
1453 Py_VISIT(self->dict);
1454 return 0;
1455}
1456
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001457static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001458textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001459
1460/* This macro takes some shortcuts to make the common case faster. */
1461#define CHECK_CLOSED(self) \
1462 do { \
1463 int r; \
1464 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001465 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466 if (self->raw != NULL) \
1467 r = _PyFileIO_closed(self->raw); \
1468 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001469 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001470 if (_res == NULL) \
1471 return NULL; \
1472 r = PyObject_IsTrue(_res); \
1473 Py_DECREF(_res); \
1474 if (r < 0) \
1475 return NULL; \
1476 } \
1477 if (r > 0) { \
1478 PyErr_SetString(PyExc_ValueError, \
1479 "I/O operation on closed file."); \
1480 return NULL; \
1481 } \
1482 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001483 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 return NULL; \
1485 } while (0)
1486
1487#define CHECK_INITIALIZED(self) \
1488 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001489 PyErr_SetString(PyExc_ValueError, \
1490 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491 return NULL; \
1492 }
1493
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001494#define CHECK_ATTACHED(self) \
1495 CHECK_INITIALIZED(self); \
1496 if (self->detached) { \
1497 PyErr_SetString(PyExc_ValueError, \
1498 "underlying buffer has been detached"); \
1499 return NULL; \
1500 }
1501
1502#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001504 PyErr_SetString(PyExc_ValueError, \
1505 "I/O operation on uninitialized object"); \
1506 return -1; \
1507 } else if (self->detached) { \
1508 PyErr_SetString(PyExc_ValueError, \
1509 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 return -1; \
1511 }
1512
1513
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001514/*[clinic input]
1515_io.TextIOWrapper.detach
1516[clinic start generated code]*/
1517
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001518static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001519_io_TextIOWrapper_detach_impl(textio *self)
1520/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001521{
1522 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001523 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001524 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001525 if (res == NULL)
1526 return NULL;
1527 Py_DECREF(res);
1528 buffer = self->buffer;
1529 self->buffer = NULL;
1530 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001531 return buffer;
1532}
1533
Antoine Pitrou24f36292009-03-28 22:16:42 +00001534/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001535 underlying buffered object, though. */
1536static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001537_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539 if (self->pending_bytes == NULL)
1540 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001541
Inada Naokibfba8c32019-05-16 15:03:20 +09001542 PyObject *pending = self->pending_bytes;
1543 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001544
Inada Naokibfba8c32019-05-16 15:03:20 +09001545 if (PyBytes_Check(pending)) {
1546 b = pending;
1547 Py_INCREF(b);
1548 }
1549 else if (PyUnicode_Check(pending)) {
1550 assert(PyUnicode_IS_ASCII(pending));
1551 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1552 b = PyBytes_FromStringAndSize(
1553 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1554 if (b == NULL) {
1555 return -1;
1556 }
1557 }
1558 else {
1559 assert(PyList_Check(pending));
1560 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1561 if (b == NULL) {
1562 return -1;
1563 }
1564
1565 char *buf = PyBytes_AsString(b);
1566 Py_ssize_t pos = 0;
1567
1568 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1569 PyObject *obj = PyList_GET_ITEM(pending, i);
1570 char *src;
1571 Py_ssize_t len;
1572 if (PyUnicode_Check(obj)) {
1573 assert(PyUnicode_IS_ASCII(obj));
1574 src = PyUnicode_DATA(obj);
1575 len = PyUnicode_GET_LENGTH(obj);
1576 }
1577 else {
1578 assert(PyBytes_Check(obj));
1579 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1580 Py_DECREF(b);
1581 return -1;
1582 }
1583 }
1584 memcpy(buf + pos, src, len);
1585 pos += len;
1586 }
1587 assert(pos == self->pending_bytes_count);
1588 }
1589
1590 self->pending_bytes_count = 0;
1591 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001592 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001593
1594 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001595 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001596 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001597 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598 Py_DECREF(b);
Inada Naoki01806d52021-02-22 08:29:30 +09001599 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1600 // when an error occurred.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001601 if (ret == NULL)
1602 return -1;
1603 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 return 0;
1605}
1606
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001607/*[clinic input]
1608_io.TextIOWrapper.write
1609 text: unicode
1610 /
1611[clinic start generated code]*/
1612
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001614_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1615/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616{
1617 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618 PyObject *b;
1619 Py_ssize_t textlen;
1620 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001621 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001622
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001623 if (PyUnicode_READY(text) == -1)
1624 return NULL;
1625
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001626 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 CHECK_CLOSED(self);
1628
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001629 if (self->encoder == NULL)
1630 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001631
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 Py_INCREF(text);
1633
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001634 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001635
1636 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001637 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 haslf = 1;
1639
1640 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001641 PyObject *newtext = _PyObject_CallMethodId(
1642 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 Py_DECREF(text);
1644 if (newtext == NULL)
1645 return NULL;
1646 text = newtext;
1647 }
1648
Antoine Pitroue96ec682011-07-23 21:46:35 +02001649 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001650 text_needflush = 1;
1651 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001653 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 needflush = 1;
1655
1656 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001657 if (self->encodefunc != NULL) {
Inada Naoki01806d52021-02-22 08:29:30 +09001658 if (PyUnicode_IS_ASCII(text) &&
1659 // See bpo-43260
1660 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1661 is_asciicompat_encoding(self->encodefunc)) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001662 b = text;
1663 Py_INCREF(b);
1664 }
1665 else {
1666 b = (*self->encodefunc)((PyObject *) self, text);
1667 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001668 self->encoding_start_of_stream = 0;
1669 }
Inada Naoki01806d52021-02-22 08:29:30 +09001670 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01001671 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naoki01806d52021-02-22 08:29:30 +09001672 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001673
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 Py_DECREF(text);
1675 if (b == NULL)
1676 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001677 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001678 PyErr_Format(PyExc_TypeError,
1679 "encoder should return a bytes object, not '%.200s'",
1680 Py_TYPE(b)->tp_name);
1681 Py_DECREF(b);
1682 return NULL;
1683 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684
Inada Naokibfba8c32019-05-16 15:03:20 +09001685 Py_ssize_t bytes_len;
1686 if (b == text) {
1687 bytes_len = PyUnicode_GET_LENGTH(b);
1688 }
1689 else {
1690 bytes_len = PyBytes_GET_SIZE(b);
1691 }
1692
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001694 self->pending_bytes_count = 0;
1695 self->pending_bytes = b;
1696 }
Inada Naoki01806d52021-02-22 08:29:30 +09001697 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1698 // Prevent to concatenate more than chunk_size data.
1699 if (_textiowrapper_writeflush(self) < 0) {
1700 Py_DECREF(b);
1701 return NULL;
1702 }
1703 self->pending_bytes = b;
1704 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001705 else if (!PyList_CheckExact(self->pending_bytes)) {
1706 PyObject *list = PyList_New(2);
1707 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 Py_DECREF(b);
1709 return NULL;
1710 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001711 PyList_SET_ITEM(list, 0, self->pending_bytes);
1712 PyList_SET_ITEM(list, 1, b);
1713 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001715 else {
1716 if (PyList_Append(self->pending_bytes, b) < 0) {
1717 Py_DECREF(b);
1718 return NULL;
1719 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001722
1723 self->pending_bytes_count += bytes_len;
Inada Naoki01806d52021-02-22 08:29:30 +09001724 if (self->pending_bytes_count >= self->chunk_size || needflush ||
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001725 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001726 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727 return NULL;
1728 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001731 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 if (ret == NULL)
1733 return NULL;
1734 Py_DECREF(ret);
1735 }
1736
Zackery Spytz23db9352018-06-29 04:14:58 -06001737 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 Py_CLEAR(self->snapshot);
1739
1740 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001741 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 if (ret == NULL)
1743 return NULL;
1744 Py_DECREF(ret);
1745 }
1746
1747 return PyLong_FromSsize_t(textlen);
1748}
1749
1750/* Steal a reference to chars and store it in the decoded_char buffer;
1751 */
1752static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001753textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001755 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 self->decoded_chars_used = 0;
1757}
1758
1759static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001760textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761{
1762 PyObject *chars;
1763 Py_ssize_t avail;
1764
1765 if (self->decoded_chars == NULL)
1766 return PyUnicode_FromStringAndSize(NULL, 0);
1767
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 /* decoded_chars is guaranteed to be "ready". */
1769 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001770 - self->decoded_chars_used);
1771
1772 assert(avail >= 0);
1773
1774 if (n < 0 || n > avail)
1775 n = avail;
1776
1777 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001778 chars = PyUnicode_Substring(self->decoded_chars,
1779 self->decoded_chars_used,
1780 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781 if (chars == NULL)
1782 return NULL;
1783 }
1784 else {
1785 chars = self->decoded_chars;
1786 Py_INCREF(chars);
1787 }
1788
1789 self->decoded_chars_used += n;
1790 return chars;
1791}
1792
1793/* Read and decode the next chunk of data from the BufferedReader.
1794 */
1795static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001796textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001797{
1798 PyObject *dec_buffer = NULL;
1799 PyObject *dec_flags = NULL;
1800 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001801 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001803 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 int eof;
1805
1806 /* The return value is True unless EOF was reached. The decoded string is
1807 * placed in self._decoded_chars (replacing its previous value). The
1808 * entire input chunk is sent to the decoder, though some of it may remain
1809 * buffered in the decoder, yet to be converted.
1810 */
1811
1812 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001813 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001814 return -1;
1815 }
1816
1817 if (self->telling) {
1818 /* To prepare for tell(), we need to snapshot a point in the file
1819 * where the decoder's input buffer is empty.
1820 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001821 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001822 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 if (state == NULL)
1824 return -1;
1825 /* Given this, we know there was a valid snapshot point
1826 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1827 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001828 if (!PyTuple_Check(state)) {
1829 PyErr_SetString(PyExc_TypeError,
1830 "illegal decoder state");
1831 Py_DECREF(state);
1832 return -1;
1833 }
1834 if (!PyArg_ParseTuple(state,
1835 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1836 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837 Py_DECREF(state);
1838 return -1;
1839 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001840
1841 if (!PyBytes_Check(dec_buffer)) {
1842 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001843 "illegal decoder state: the first item should be a "
1844 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001845 Py_TYPE(dec_buffer)->tp_name);
1846 Py_DECREF(state);
1847 return -1;
1848 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 Py_INCREF(dec_buffer);
1850 Py_INCREF(dec_flags);
1851 Py_DECREF(state);
1852 }
1853
1854 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001855 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001856 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001857 }
1858 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 if (chunk_size == NULL)
1860 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001861
Petr Viktorinffd97532020-02-11 17:46:57 +01001862 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001863 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001864 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865 Py_DECREF(chunk_size);
1866 if (input_chunk == NULL)
1867 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001868
1869 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001870 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001871 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001872 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1873 Py_TYPE(input_chunk)->tp_name);
1874 goto fail;
1875 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001876
Antoine Pitroub8503892014-04-29 10:14:02 +02001877 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001878 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001879
INADA Naoki507434f2017-12-21 09:59:53 +09001880 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1881 PyBuffer_Release(&input_chunk_buf);
1882 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001883 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001884
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001885 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001886 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001887 if (nchars > 0)
1888 self->b2cratio = (double) nbytes / nchars;
1889 else
1890 self->b2cratio = 0.0;
1891 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001892 eof = 0;
1893
1894 if (self->telling) {
1895 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1896 * next input to be decoded is dec_buffer + input_chunk.
1897 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001898 PyObject *next_input = dec_buffer;
1899 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001900 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001901 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001902 goto fail;
1903 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001904 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1905 if (snapshot == NULL) {
1906 dec_flags = NULL;
1907 goto fail;
1908 }
1909 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 }
1911 Py_DECREF(input_chunk);
1912
1913 return (eof == 0);
1914
1915 fail:
1916 Py_XDECREF(dec_buffer);
1917 Py_XDECREF(dec_flags);
1918 Py_XDECREF(input_chunk);
1919 return -1;
1920}
1921
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001922/*[clinic input]
1923_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001924 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001925 /
1926[clinic start generated code]*/
1927
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001928static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001929_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001930/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932 PyObject *result = NULL, *chunks = NULL;
1933
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001934 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935 CHECK_CLOSED(self);
1936
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001937 if (self->decoder == NULL)
1938 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001939
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001940 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941 return NULL;
1942
1943 if (n < 0) {
1944 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001945 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946 PyObject *decoded;
1947 if (bytes == NULL)
1948 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001949
Andy Lesterdffe4c02020-03-04 07:15:20 -06001950 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001951 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1952 bytes, 1);
1953 else
1954 decoded = PyObject_CallMethodObjArgs(
1955 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001957 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958 goto fail;
1959
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001960 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961
1962 if (result == NULL) {
1963 Py_DECREF(decoded);
1964 return NULL;
1965 }
1966
1967 PyUnicode_AppendAndDel(&result, decoded);
1968 if (result == NULL)
1969 goto fail;
1970
Zackery Spytz23db9352018-06-29 04:14:58 -06001971 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 Py_CLEAR(self->snapshot);
1973 return result;
1974 }
1975 else {
1976 int res = 1;
1977 Py_ssize_t remaining = n;
1978
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001979 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 if (result == NULL)
1981 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001982 if (PyUnicode_READY(result) == -1)
1983 goto fail;
1984 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001985
1986 /* Keep reading chunks until we have n characters to return */
1987 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001988 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001989 if (res < 0) {
1990 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1991 when EINTR occurs so we needn't do it ourselves. */
1992 if (_PyIO_trap_eintr()) {
1993 continue;
1994 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001996 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 if (res == 0) /* EOF */
1998 break;
1999 if (chunks == NULL) {
2000 chunks = PyList_New(0);
2001 if (chunks == NULL)
2002 goto fail;
2003 }
Antoine Pitroue5324562011-11-19 00:39:01 +01002004 if (PyUnicode_GET_LENGTH(result) > 0 &&
2005 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002006 goto fail;
2007 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002008 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 if (result == NULL)
2010 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002011 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002012 }
2013 if (chunks != NULL) {
2014 if (result != NULL && PyList_Append(chunks, result) < 0)
2015 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002016 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 if (result == NULL)
2018 goto fail;
2019 Py_CLEAR(chunks);
2020 }
2021 return result;
2022 }
2023 fail:
2024 Py_XDECREF(result);
2025 Py_XDECREF(chunks);
2026 return NULL;
2027}
2028
2029
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002030/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 that is to the NUL character. Otherwise the function will produce
2032 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002033static const char *
2034find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002036 if (kind == PyUnicode_1BYTE_KIND) {
2037 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002038 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002039 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002041 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002042 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002043 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 return s;
2045 if (s == end)
2046 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002047 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048 }
2049}
2050
2051Py_ssize_t
2052_PyIO_find_line_ending(
2053 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002054 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055{
Andy Lestere6be9b52020-02-11 20:28:35 -06002056 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057
2058 if (translated) {
2059 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002060 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002062 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 else {
2064 *consumed = len;
2065 return -1;
2066 }
2067 }
2068 else if (universal) {
2069 /* Universal newline search. Find any of \r, \r\n, \n
2070 * The decoder ensures that \r\n are not split in two pieces
2071 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002072 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002074 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002076 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002077 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002078 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 if (s >= end) {
2080 *consumed = len;
2081 return -1;
2082 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002083 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002084 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002086 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002088 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002089 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002091 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 }
2093 }
2094 }
2095 else {
2096 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002097 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002098 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002099 /* Assume that readnl is an ASCII character. */
2100 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002102 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002104 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 *consumed = len;
2106 return -1;
2107 }
2108 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002109 const char *s = start;
2110 const char *e = end - (readnl_len - 1)*kind;
2111 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002112 if (e < s)
2113 e = s;
2114 while (s < e) {
2115 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002116 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 if (pos == NULL || pos >= e)
2118 break;
2119 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002120 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 break;
2122 }
2123 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002124 return (pos - start)/kind + readnl_len;
2125 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002127 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128 if (pos == NULL)
2129 *consumed = len;
2130 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002131 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 return -1;
2133 }
2134 }
2135}
2136
2137static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002138_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139{
2140 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2141 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2142 int res;
2143
2144 CHECK_CLOSED(self);
2145
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002146 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 return NULL;
2148
2149 chunked = 0;
2150
2151 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002152 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002153 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002154 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155 Py_ssize_t consumed = 0;
2156
2157 /* First, get some data if necessary */
2158 res = 1;
2159 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002160 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002161 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002162 if (res < 0) {
2163 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2164 when EINTR occurs so we needn't do it ourselves. */
2165 if (_PyIO_trap_eintr()) {
2166 continue;
2167 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002169 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 if (res == 0)
2171 break;
2172 }
2173 if (res == 0) {
2174 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002175 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176 Py_CLEAR(self->snapshot);
2177 start = endpos = offset_to_buffer = 0;
2178 break;
2179 }
2180
2181 if (remaining == NULL) {
2182 line = self->decoded_chars;
2183 start = self->decoded_chars_used;
2184 offset_to_buffer = 0;
2185 Py_INCREF(line);
2186 }
2187 else {
2188 assert(self->decoded_chars_used == 0);
2189 line = PyUnicode_Concat(remaining, self->decoded_chars);
2190 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002191 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 Py_CLEAR(remaining);
2193 if (line == NULL)
2194 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002195 if (PyUnicode_READY(line) == -1)
2196 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197 }
2198
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002199 ptr = PyUnicode_DATA(line);
2200 line_len = PyUnicode_GET_LENGTH(line);
2201 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202
2203 endpos = _PyIO_find_line_ending(
2204 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002205 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002206 ptr + kind * start,
2207 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002208 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 if (endpos >= 0) {
2210 endpos += start;
2211 if (limit >= 0 && (endpos - start) + chunked >= limit)
2212 endpos = start + limit - chunked;
2213 break;
2214 }
2215
2216 /* We can put aside up to `endpos` */
2217 endpos = consumed + start;
2218 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2219 /* Didn't find line ending, but reached length limit */
2220 endpos = start + limit - chunked;
2221 break;
2222 }
2223
2224 if (endpos > start) {
2225 /* No line ending seen yet - put aside current data */
2226 PyObject *s;
2227 if (chunks == NULL) {
2228 chunks = PyList_New(0);
2229 if (chunks == NULL)
2230 goto error;
2231 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002232 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002233 if (s == NULL)
2234 goto error;
2235 if (PyList_Append(chunks, s) < 0) {
2236 Py_DECREF(s);
2237 goto error;
2238 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002239 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240 Py_DECREF(s);
2241 }
2242 /* There may be some remaining bytes we'll have to prepend to the
2243 next chunk of data */
2244 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002245 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 if (remaining == NULL)
2247 goto error;
2248 }
2249 Py_CLEAR(line);
2250 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002251 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 }
2253
2254 if (line != NULL) {
2255 /* Our line ends in the current buffer */
2256 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002257 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2258 PyObject *s = PyUnicode_Substring(line, start, endpos);
2259 Py_CLEAR(line);
2260 if (s == NULL)
2261 goto error;
2262 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263 }
2264 }
2265 if (remaining != NULL) {
2266 if (chunks == NULL) {
2267 chunks = PyList_New(0);
2268 if (chunks == NULL)
2269 goto error;
2270 }
2271 if (PyList_Append(chunks, remaining) < 0)
2272 goto error;
2273 Py_CLEAR(remaining);
2274 }
2275 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002276 if (line != NULL) {
2277 if (PyList_Append(chunks, line) < 0)
2278 goto error;
2279 Py_DECREF(line);
2280 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2282 if (line == NULL)
2283 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002284 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002286 if (line == NULL) {
2287 Py_INCREF(_PyIO_empty_str);
2288 line = _PyIO_empty_str;
2289 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002290
2291 return line;
2292
2293 error:
2294 Py_XDECREF(chunks);
2295 Py_XDECREF(remaining);
2296 Py_XDECREF(line);
2297 return NULL;
2298}
2299
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002300/*[clinic input]
2301_io.TextIOWrapper.readline
2302 size: Py_ssize_t = -1
2303 /
2304[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002305
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002306static PyObject *
2307_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2308/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2309{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002310 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002311 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312}
2313
2314/* Seek and Tell */
2315
2316typedef struct {
2317 Py_off_t start_pos;
2318 int dec_flags;
2319 int bytes_to_feed;
2320 int chars_to_skip;
2321 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002322} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323
2324/*
2325 To speed up cookie packing/unpacking, we store the fields in a temporary
2326 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2327 The following macros define at which offsets in the intermediary byte
2328 string the various CookieStruct fields will be stored.
2329 */
2330
2331#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2332
Christian Heimes743e0cd2012-10-17 23:52:17 +02002333#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002334/* We want the least significant byte of start_pos to also be the least
2335 significant byte of the cookie, which means that in big-endian mode we
2336 must copy the fields in reverse order. */
2337
2338# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2339# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2340# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2341# define OFF_CHARS_TO_SKIP (sizeof(char))
2342# define OFF_NEED_EOF 0
2343
2344#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345/* Little-endian mode: the least significant byte of start_pos will
2346 naturally end up the least significant byte of the cookie. */
2347
2348# define OFF_START_POS 0
2349# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2350# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2351# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2352# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2353
2354#endif
2355
2356static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002357textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358{
2359 unsigned char buffer[COOKIE_BUF_LEN];
2360 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2361 if (cookieLong == NULL)
2362 return -1;
2363
2364 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002365 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366 Py_DECREF(cookieLong);
2367 return -1;
2368 }
2369 Py_DECREF(cookieLong);
2370
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002371 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2372 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2373 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2374 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2375 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376
2377 return 0;
2378}
2379
2380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002381textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382{
2383 unsigned char buffer[COOKIE_BUF_LEN];
2384
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002385 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2386 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2387 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2388 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2389 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390
Christian Heimes743e0cd2012-10-17 23:52:17 +02002391 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2392 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002394
2395static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002396_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397{
2398 PyObject *res;
2399 /* When seeking to the start of the stream, we call decoder.reset()
2400 rather than decoder.getstate().
2401 This is for a few decoders such as utf-16 for which the state value
2402 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2403 utf-16, that we are expecting a BOM).
2404 */
2405 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002406 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002408 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2409 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410 if (res == NULL)
2411 return -1;
2412 Py_DECREF(res);
2413 return 0;
2414}
2415
Antoine Pitroue4501852009-05-14 18:55:55 +00002416static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002417_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002418{
2419 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002420 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002421 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002422 self->encoding_start_of_stream = 1;
2423 }
2424 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002425 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Victor Stinner37834132020-10-27 17:12:53 +01002426 _PyLong_GetZero());
Antoine Pitroue4501852009-05-14 18:55:55 +00002427 self->encoding_start_of_stream = 0;
2428 }
2429 if (res == NULL)
2430 return -1;
2431 Py_DECREF(res);
2432 return 0;
2433}
2434
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002435static int
2436_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2437{
2438 /* Same as _textiowrapper_decoder_setstate() above. */
2439 return _textiowrapper_encoder_reset(
2440 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2441}
2442
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002443/*[clinic input]
2444_io.TextIOWrapper.seek
2445 cookie as cookieObj: object
2446 whence: int = 0
2447 /
2448[clinic start generated code]*/
2449
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002451_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2452/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002454 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002455 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 PyObject *res;
2457 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002458 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002460 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461 CHECK_CLOSED(self);
2462
2463 Py_INCREF(cookieObj);
2464
2465 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002466 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467 goto fail;
2468 }
2469
Victor Stinner37834132020-10-27 17:12:53 +01002470 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2471
ngie-eign848037c2019-03-02 23:28:26 -08002472 switch (whence) {
2473 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474 /* seek relative to current position */
Victor Stinner37834132020-10-27 17:12:53 +01002475 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476 if (cmp < 0)
2477 goto fail;
2478
2479 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002480 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481 goto fail;
2482 }
2483
2484 /* Seeking to the current position should attempt to
2485 * sync the underlying buffer with the current position.
2486 */
2487 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002488 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 if (cookieObj == NULL)
2490 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002491 break;
2492
ngie-eign848037c2019-03-02 23:28:26 -08002493 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 /* seek relative to end of file */
Victor Stinner37834132020-10-27 17:12:53 +01002495 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496 if (cmp < 0)
2497 goto fail;
2498
2499 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002500 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002501 goto fail;
2502 }
2503
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002504 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505 if (res == NULL)
2506 goto fail;
2507 Py_DECREF(res);
2508
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002509 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510 Py_CLEAR(self->snapshot);
2511 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002512 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513 if (res == NULL)
2514 goto fail;
2515 Py_DECREF(res);
2516 }
2517
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002518 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002519 Py_CLEAR(cookieObj);
2520 if (res == NULL)
2521 goto fail;
2522 if (self->encoder) {
2523 /* If seek() == 0, we are at the start of stream, otherwise not */
Victor Stinner37834132020-10-27 17:12:53 +01002524 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002525 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2526 Py_DECREF(res);
2527 goto fail;
2528 }
2529 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002531
ngie-eign848037c2019-03-02 23:28:26 -08002532 case SEEK_SET:
2533 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002534
ngie-eign848037c2019-03-02 23:28:26 -08002535 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002537 "invalid whence (%d, should be %d, %d or %d)", whence,
2538 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539 goto fail;
2540 }
2541
Victor Stinner37834132020-10-27 17:12:53 +01002542 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543 if (cmp < 0)
2544 goto fail;
2545
2546 if (cmp == 1) {
2547 PyErr_Format(PyExc_ValueError,
2548 "negative seek position %R", cookieObj);
2549 goto fail;
2550 }
2551
Petr Viktorinffd97532020-02-11 17:46:57 +01002552 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553 if (res == NULL)
2554 goto fail;
2555 Py_DECREF(res);
2556
2557 /* The strategy of seek() is to go back to the safe start point
2558 * and replay the effect of read(chars_to_skip) from there.
2559 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002560 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 goto fail;
2562
2563 /* Seek back to the safe start point. */
2564 posobj = PyLong_FromOff_t(cookie.start_pos);
2565 if (posobj == NULL)
2566 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002567 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568 Py_DECREF(posobj);
2569 if (res == NULL)
2570 goto fail;
2571 Py_DECREF(res);
2572
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002573 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 Py_CLEAR(self->snapshot);
2575
2576 /* Restore the decoder to its state from the safe start point. */
2577 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002578 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002579 goto fail;
2580 }
2581
2582 if (cookie.chars_to_skip) {
2583 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002584 PyObject *input_chunk = _PyObject_CallMethodId(
2585 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586 PyObject *decoded;
2587
2588 if (input_chunk == NULL)
2589 goto fail;
2590
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002591 if (!PyBytes_Check(input_chunk)) {
2592 PyErr_Format(PyExc_TypeError,
2593 "underlying read() should have returned a bytes "
2594 "object, not '%.200s'",
2595 Py_TYPE(input_chunk)->tp_name);
2596 Py_DECREF(input_chunk);
2597 goto fail;
2598 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002600 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2601 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602 goto fail;
2603 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002604 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002605
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002606 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2607 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002608
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002609 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610 goto fail;
2611
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002612 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002613
2614 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002615 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002616 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617 goto fail;
2618 }
2619 self->decoded_chars_used = cookie.chars_to_skip;
2620 }
2621 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002622 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2623 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002625 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626 }
2627
Antoine Pitroue4501852009-05-14 18:55:55 +00002628 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2629 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002630 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002631 goto fail;
2632 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633 return cookieObj;
2634 fail:
2635 Py_XDECREF(cookieObj);
2636 return NULL;
2637
2638}
2639
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002640/*[clinic input]
2641_io.TextIOWrapper.tell
2642[clinic start generated code]*/
2643
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002645_io_TextIOWrapper_tell_impl(textio *self)
2646/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647{
2648 PyObject *res;
2649 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002650 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002651 PyObject *next_input;
2652 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002653 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002655 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002656 Py_ssize_t dec_buffer_len;
2657 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002658
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002659 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660 CHECK_CLOSED(self);
2661
2662 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002663 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 goto fail;
2665 }
2666 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002667 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002668 "telling position disabled by next() call");
2669 goto fail;
2670 }
2671
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002672 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002674 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675 if (res == NULL)
2676 goto fail;
2677 Py_DECREF(res);
2678
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002679 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 if (posobj == NULL)
2681 goto fail;
2682
2683 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002684 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685 return posobj;
2686 }
2687
2688#if defined(HAVE_LARGEFILE_SUPPORT)
2689 cookie.start_pos = PyLong_AsLongLong(posobj);
2690#else
2691 cookie.start_pos = PyLong_AsLong(posobj);
2692#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002693 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694 if (PyErr_Occurred())
2695 goto fail;
2696
2697 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002698 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002699 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 goto fail;
2701
2702 assert (PyBytes_Check(next_input));
2703
2704 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2705
2706 /* How many decoded characters have been used up since the snapshot? */
2707 if (self->decoded_chars_used == 0) {
2708 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002709 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002710 }
2711
2712 chars_to_skip = self->decoded_chars_used;
2713
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002714 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002715 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002716 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002717 if (saved_state == NULL)
2718 goto fail;
2719
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002720#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002721 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002722 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002723 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002724 if (_state == NULL) \
2725 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002726 if (!PyTuple_Check(_state)) { \
2727 PyErr_SetString(PyExc_TypeError, \
2728 "illegal decoder state"); \
2729 Py_DECREF(_state); \
2730 goto fail; \
2731 } \
2732 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2733 &dec_buffer, &dec_flags)) \
2734 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002735 Py_DECREF(_state); \
2736 goto fail; \
2737 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002738 if (!PyBytes_Check(dec_buffer)) { \
2739 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002740 "illegal decoder state: the first item should be a " \
2741 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002742 Py_TYPE(dec_buffer)->tp_name); \
2743 Py_DECREF(_state); \
2744 goto fail; \
2745 } \
2746 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002747 Py_DECREF(_state); \
2748 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002750#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002751 PyObject *_decoded = _PyObject_CallMethodId( \
2752 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002753 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002754 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002755 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002756 Py_DECREF(_decoded); \
2757 } while (0)
2758
2759 /* Fast search for an acceptable start point, close to our
2760 current pos */
2761 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2762 skip_back = 1;
2763 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2764 input = PyBytes_AS_STRING(next_input);
2765 while (skip_bytes > 0) {
2766 /* Decode up to temptative start point */
2767 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2768 goto fail;
2769 DECODER_DECODE(input, skip_bytes, chars_decoded);
2770 if (chars_decoded <= chars_to_skip) {
2771 DECODER_GETSTATE();
2772 if (dec_buffer_len == 0) {
2773 /* Before pos and no bytes buffered in decoder => OK */
2774 cookie.dec_flags = dec_flags;
2775 chars_to_skip -= chars_decoded;
2776 break;
2777 }
2778 /* Skip back by buffered amount and reset heuristic */
2779 skip_bytes -= dec_buffer_len;
2780 skip_back = 1;
2781 }
2782 else {
2783 /* We're too far ahead, skip back a bit */
2784 skip_bytes -= skip_back;
2785 skip_back *= 2;
2786 }
2787 }
2788 if (skip_bytes <= 0) {
2789 skip_bytes = 0;
2790 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2791 goto fail;
2792 }
2793
2794 /* Note our initial start point. */
2795 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002796 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002797 if (chars_to_skip == 0)
2798 goto finally;
2799
2800 /* We should be close to the desired position. Now feed the decoder one
2801 * byte at a time until we reach the `chars_to_skip` target.
2802 * As we go, note the nearest "safe start point" before the current
2803 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002804 * can safely start from there and advance to this location).
2805 */
2806 chars_decoded = 0;
2807 input = PyBytes_AS_STRING(next_input);
2808 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002809 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002810 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002811 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002812
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002813 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002814 /* We got n chars for 1 byte */
2815 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002816 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002817 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002818
2819 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2820 /* Decoder buffer is empty, so this is a safe start point. */
2821 cookie.start_pos += cookie.bytes_to_feed;
2822 chars_to_skip -= chars_decoded;
2823 cookie.dec_flags = dec_flags;
2824 cookie.bytes_to_feed = 0;
2825 chars_decoded = 0;
2826 }
2827 if (chars_decoded >= chars_to_skip)
2828 break;
2829 input++;
2830 }
2831 if (input == input_end) {
2832 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002833 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002834 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002835 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002836 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002837 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002838 Py_DECREF(decoded);
2839 cookie.need_eof = 1;
2840
2841 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002842 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002843 "can't reconstruct logical file position");
2844 goto fail;
2845 }
2846 }
2847
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002848finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002849 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002850 Py_DECREF(saved_state);
2851 if (res == NULL)
2852 return NULL;
2853 Py_DECREF(res);
2854
2855 /* The returned cookie corresponds to the last safe start point. */
2856 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002857 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002858
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002859fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002860 if (saved_state) {
2861 PyObject *type, *value, *traceback;
2862 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002863 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002864 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002865 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002866 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002867 }
2868 return NULL;
2869}
2870
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002871/*[clinic input]
2872_io.TextIOWrapper.truncate
2873 pos: object = None
2874 /
2875[clinic start generated code]*/
2876
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002877static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002878_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2879/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002881 PyObject *res;
2882
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002883 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002884
Petr Viktorinffd97532020-02-11 17:46:57 +01002885 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002886 if (res == NULL)
2887 return NULL;
2888 Py_DECREF(res);
2889
Petr Viktorinffd97532020-02-11 17:46:57 +01002890 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002891}
2892
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002893static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002894textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002895{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002896 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002897 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002898
2899 CHECK_INITIALIZED(self);
2900
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002901 res = PyUnicode_FromString("<_io.TextIOWrapper");
2902 if (res == NULL)
2903 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002904
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002905 status = Py_ReprEnter((PyObject *)self);
2906 if (status != 0) {
2907 if (status > 0) {
2908 PyErr_Format(PyExc_RuntimeError,
2909 "reentrant call inside %s.__repr__",
2910 Py_TYPE(self)->tp_name);
2911 }
2912 goto error;
2913 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002914 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2915 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002916 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002917 }
2918 /* Ignore ValueError raised if the underlying stream was detached */
2919 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002920 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002921 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002922 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002923 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002924 if (s == NULL)
2925 goto error;
2926 PyUnicode_AppendAndDel(&res, s);
2927 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002928 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002929 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002930 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2931 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002932 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002933 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002934 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2935 Py_DECREF(modeobj);
2936 if (s == NULL)
2937 goto error;
2938 PyUnicode_AppendAndDel(&res, s);
2939 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002940 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002941 }
2942 s = PyUnicode_FromFormat("%U encoding=%R>",
2943 res, self->encoding);
2944 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002945 if (status == 0) {
2946 Py_ReprLeave((PyObject *)self);
2947 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002948 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002949
2950 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002951 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002952 if (status == 0) {
2953 Py_ReprLeave((PyObject *)self);
2954 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002955 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002956}
2957
2958
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002959/* Inquiries */
2960
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002961/*[clinic input]
2962_io.TextIOWrapper.fileno
2963[clinic start generated code]*/
2964
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002965static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002966_io_TextIOWrapper_fileno_impl(textio *self)
2967/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002968{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002969 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002970 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002971}
2972
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002973/*[clinic input]
2974_io.TextIOWrapper.seekable
2975[clinic start generated code]*/
2976
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002977static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002978_io_TextIOWrapper_seekable_impl(textio *self)
2979/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002980{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002981 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002982 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002983}
2984
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002985/*[clinic input]
2986_io.TextIOWrapper.readable
2987[clinic start generated code]*/
2988
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002989static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002990_io_TextIOWrapper_readable_impl(textio *self)
2991/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002992{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002993 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002994 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002995}
2996
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002997/*[clinic input]
2998_io.TextIOWrapper.writable
2999[clinic start generated code]*/
3000
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003001static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003002_io_TextIOWrapper_writable_impl(textio *self)
3003/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003004{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003005 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003006 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003007}
3008
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003009/*[clinic input]
3010_io.TextIOWrapper.isatty
3011[clinic start generated code]*/
3012
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003013static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003014_io_TextIOWrapper_isatty_impl(textio *self)
3015/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003016{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003017 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003018 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003019}
3020
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003021/*[clinic input]
3022_io.TextIOWrapper.flush
3023[clinic start generated code]*/
3024
Antoine Pitrou243757e2010-11-05 21:15:39 +00003025static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003026_io_TextIOWrapper_flush_impl(textio *self)
3027/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003028{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003029 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003030 CHECK_CLOSED(self);
3031 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003032 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003033 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003034 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003035}
3036
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003037/*[clinic input]
3038_io.TextIOWrapper.close
3039[clinic start generated code]*/
3040
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003041static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003042_io_TextIOWrapper_close_impl(textio *self)
3043/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003044{
3045 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003046 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003047 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003048
Antoine Pitrou6be88762010-05-03 16:48:20 +00003049 res = textiowrapper_closed_get(self, NULL);
3050 if (res == NULL)
3051 return NULL;
3052 r = PyObject_IsTrue(res);
3053 Py_DECREF(res);
3054 if (r < 0)
3055 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003056
Antoine Pitrou6be88762010-05-03 16:48:20 +00003057 if (r > 0) {
3058 Py_RETURN_NONE; /* stream already closed */
3059 }
3060 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003061 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003062 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003063 res = _PyObject_CallMethodIdOneArg(self->buffer,
3064 &PyId__dealloc_warn,
3065 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003066 if (res)
3067 Py_DECREF(res);
3068 else
3069 PyErr_Clear();
3070 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003071 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003072 if (res == NULL)
3073 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003074 else
3075 Py_DECREF(res);
3076
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003077 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003078 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003079 _PyErr_ChainExceptions(exc, val, tb);
3080 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003081 }
3082 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003083 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003084}
3085
3086static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003087textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003088{
3089 PyObject *line;
3090
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003091 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003092
3093 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003094 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003095 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003096 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003097 }
3098 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003099 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003100 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003101 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003102 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003103 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003104 "not '%.200s'", Py_TYPE(line)->tp_name);
3105 Py_DECREF(line);
3106 return NULL;
3107 }
3108 }
3109
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003110 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003111 return NULL;
3112
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003113 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003114 /* Reached EOF or would have blocked */
3115 Py_DECREF(line);
3116 Py_CLEAR(self->snapshot);
3117 self->telling = self->seekable;
3118 return NULL;
3119 }
3120
3121 return line;
3122}
3123
3124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003125textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003126{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003127 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003128 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003129}
3130
3131static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003132textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003133{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003134 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003135 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3136}
3137
3138static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003139textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003140{
3141 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003142 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003143 if (self->decoder == NULL ||
3144 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3145 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003146 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003147 }
3148 return res;
3149}
3150
3151static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003152textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003153{
3154 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003155 Py_INCREF(self->errors);
3156 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003157}
3158
3159static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003160textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003161{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003162 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003163 return PyLong_FromSsize_t(self->chunk_size);
3164}
3165
3166static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003167textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003168{
3169 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003170 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003171 if (arg == NULL) {
3172 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3173 return -1;
3174 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003175 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003176 if (n == -1 && PyErr_Occurred())
3177 return -1;
3178 if (n <= 0) {
3179 PyErr_SetString(PyExc_ValueError,
3180 "a strictly positive integer is required");
3181 return -1;
3182 }
3183 self->chunk_size = n;
3184 return 0;
3185}
3186
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003187#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003188
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003189static PyMethodDef incrementalnewlinedecoder_methods[] = {
3190 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3191 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3192 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3193 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3194 {NULL}
3195};
3196
3197static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3198 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3199 {NULL}
3200};
3201
3202PyTypeObject PyIncrementalNewlineDecoder_Type = {
3203 PyVarObject_HEAD_INIT(NULL, 0)
3204 "_io.IncrementalNewlineDecoder", /*tp_name*/
3205 sizeof(nldecoder_object), /*tp_basicsize*/
3206 0, /*tp_itemsize*/
3207 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003208 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003209 0, /*tp_getattr*/
3210 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003211 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003212 0, /*tp_repr*/
3213 0, /*tp_as_number*/
3214 0, /*tp_as_sequence*/
3215 0, /*tp_as_mapping*/
3216 0, /*tp_hash */
3217 0, /*tp_call*/
3218 0, /*tp_str*/
3219 0, /*tp_getattro*/
3220 0, /*tp_setattro*/
3221 0, /*tp_as_buffer*/
3222 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3223 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3224 0, /* tp_traverse */
3225 0, /* tp_clear */
3226 0, /* tp_richcompare */
3227 0, /*tp_weaklistoffset*/
3228 0, /* tp_iter */
3229 0, /* tp_iternext */
3230 incrementalnewlinedecoder_methods, /* tp_methods */
3231 0, /* tp_members */
3232 incrementalnewlinedecoder_getset, /* tp_getset */
3233 0, /* tp_base */
3234 0, /* tp_dict */
3235 0, /* tp_descr_get */
3236 0, /* tp_descr_set */
3237 0, /* tp_dictoffset */
3238 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3239 0, /* tp_alloc */
3240 PyType_GenericNew, /* tp_new */
3241};
3242
3243
3244static PyMethodDef textiowrapper_methods[] = {
3245 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003246 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003247 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3248 _IO_TEXTIOWRAPPER_READ_METHODDEF
3249 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3250 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3251 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3252
3253 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3254 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3255 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3256 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3257 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003258
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003259 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3260 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3261 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003262 {NULL, NULL}
3263};
3264
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003265static PyMemberDef textiowrapper_members[] = {
3266 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3267 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3268 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003269 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003270 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003271 {NULL}
3272};
3273
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003274static PyGetSetDef textiowrapper_getset[] = {
3275 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3276 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003277/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3278*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003279 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3280 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3281 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3282 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003283 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003284};
3285
3286PyTypeObject PyTextIOWrapper_Type = {
3287 PyVarObject_HEAD_INIT(NULL, 0)
3288 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003289 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003290 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003291 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003292 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003293 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003294 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003295 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003296 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003297 0, /*tp_as_number*/
3298 0, /*tp_as_sequence*/
3299 0, /*tp_as_mapping*/
3300 0, /*tp_hash */
3301 0, /*tp_call*/
3302 0, /*tp_str*/
3303 0, /*tp_getattro*/
3304 0, /*tp_setattro*/
3305 0, /*tp_as_buffer*/
3306 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003307 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003308 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003309 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3310 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003311 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003312 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003313 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003314 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3315 textiowrapper_methods, /* tp_methods */
3316 textiowrapper_members, /* tp_members */
3317 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003318 0, /* tp_base */
3319 0, /* tp_dict */
3320 0, /* tp_descr_get */
3321 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003322 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003323 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003324 0, /* tp_alloc */
3325 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003326 0, /* tp_free */
3327 0, /* tp_is_gc */
3328 0, /* tp_bases */
3329 0, /* tp_mro */
3330 0, /* tp_cache */
3331 0, /* tp_subclasses */
3332 0, /* tp_weaklist */
3333 0, /* tp_del */
3334 0, /* tp_version_tag */
3335 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003336};