blob: 699b7e94c93bbf3554c2ca38014df0c9c301bdd2 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "pycore_interp.h" // PyInterpreterState.fs_codec
Victor Stinner37834132020-10-27 17:12:53 +010012#include "pycore_long.h" // _PyLong_GetZero()
Victor Stinnerbcda8f12018-11-21 22:27:47 +010013#include "pycore_object.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "pycore_pystate.h" // _PyInterpreterState_GET()
15#include "structmember.h" // PyMemberDef
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000016#include "_iomodule.h"
17
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030018/*[clinic input]
19module _io
20class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
21class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
22[clinic start generated code]*/
23/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
24
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(close);
26_Py_IDENTIFIER(_dealloc_warn);
27_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020028_Py_IDENTIFIER(fileno);
29_Py_IDENTIFIER(flush);
30_Py_IDENTIFIER(getpreferredencoding);
31_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020032_Py_IDENTIFIER(mode);
33_Py_IDENTIFIER(name);
34_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020035_Py_IDENTIFIER(read);
36_Py_IDENTIFIER(readable);
37_Py_IDENTIFIER(replace);
38_Py_IDENTIFIER(reset);
39_Py_IDENTIFIER(seek);
40_Py_IDENTIFIER(seekable);
41_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090042_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020043_Py_IDENTIFIER(tell);
44_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020045
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046/* TextIOBase */
47
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000048PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049 "Base class for text I/O.\n"
50 "\n"
51 "This class provides a character and line based interface to stream\n"
52 "I/O. There is no readinto method because Python's character strings\n"
53 "are immutable. There is no public constructor.\n"
54 );
55
56static PyObject *
57_unsupported(const char *message)
58{
Antoine Pitrou712cb732013-12-21 15:51:54 +010059 _PyIO_State *state = IO_STATE();
60 if (state != NULL)
61 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 return NULL;
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000066 "Separate the underlying buffer from the TextIOBase and return it.\n"
67 "\n"
68 "After the underlying buffer has been detached, the TextIO is in an\n"
69 "unusable state.\n"
70 );
71
72static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053073textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000074{
75 return _unsupported("detach");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read at most n characters from stream.\n"
80 "\n"
81 "Read from underlying buffer until we have n characters or we hit EOF.\n"
82 "If n is negative or omitted, read until EOF.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("read");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Read until newline or EOF.\n"
93 "\n"
94 "Returns an empty string if EOF is hit immediately.\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("readline");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Write string to stream.\n"
105 "Returns the number of characters written (which is always equal to\n"
106 "the length of the string).\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 return _unsupported("write");
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Encoding of the text stream.\n"
117 "\n"
118 "Subclasses should override.\n"
119 );
120
121static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000122textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123{
124 Py_RETURN_NONE;
125}
126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000127PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000128 "Line endings translated so far.\n"
129 "\n"
130 "Only line endings translated during reading are considered.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000142 "The error setting of the decoder or encoder.\n"
143 "\n"
144 "Subclasses should override.\n"
145 );
146
147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000148textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000149{
150 Py_RETURN_NONE;
151}
152
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000154static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530155 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
157 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
158 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000159 {NULL, NULL}
160};
161
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000162static PyGetSetDef textiobase_getset[] = {
163 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
164 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
165 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000166 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167};
168
169PyTypeObject PyTextIOBase_Type = {
170 PyVarObject_HEAD_INIT(NULL, 0)
171 "_io._TextIOBase", /*tp_name*/
172 0, /*tp_basicsize*/
173 0, /*tp_itemsize*/
174 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200175 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 0, /*tp_getattr*/
177 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200178 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash */
184 0, /*tp_call*/
185 0, /*tp_str*/
186 0, /*tp_getattro*/
187 0, /*tp_setattro*/
188 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000190 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 0, /* tp_traverse */
192 0, /* tp_clear */
193 0, /* tp_richcompare */
194 0, /* tp_weaklistoffset */
195 0, /* tp_iter */
196 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 &PyIOBase_Type, /* tp_base */
201 0, /* tp_dict */
202 0, /* tp_descr_get */
203 0, /* tp_descr_set */
204 0, /* tp_dictoffset */
205 0, /* tp_init */
206 0, /* tp_alloc */
207 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200208 0, /* tp_free */
209 0, /* tp_is_gc */
210 0, /* tp_bases */
211 0, /* tp_mro */
212 0, /* tp_cache */
213 0, /* tp_subclasses */
214 0, /* tp_weaklist */
215 0, /* tp_del */
216 0, /* tp_version_tag */
217 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218};
219
220
221/* IncrementalNewlineDecoder */
222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300232/*[clinic input]
233_io.IncrementalNewlineDecoder.__init__
234 decoder: object
235 translate: int
236 errors: object(c_default="NULL") = "strict"
237
238Codec used when reading a file in universal newlines mode.
239
240It wraps another incremental decoder, translating \r\n and \r into \n.
241It also records the types of newlines encountered. When used with
242translate=False, it ensures that the newline sequence is returned in
243one piece. When used with decoder=None, it expects unicode strings as
244decode input and translates newlines without first invoking an external
245decoder.
246[clinic start generated code]*/
247
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000248static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300249_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
250 PyObject *decoder, int translate,
251 PyObject *errors)
252/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254 self->decoder = decoder;
255 Py_INCREF(decoder);
256
257 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900258 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259 if (self->errors == NULL)
260 return -1;
261 }
262 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263 self->errors = errors;
264 }
INADA Naoki507434f2017-12-21 09:59:53 +0900265 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266
Xiang Zhangb08746b2018-10-31 19:49:16 +0800267 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 self->seennl = 0;
269 self->pendingcr = 0;
270
271 return 0;
272}
273
274static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000275incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276{
277 Py_CLEAR(self->decoder);
278 Py_CLEAR(self->errors);
279 Py_TYPE(self)->tp_free((PyObject *)self);
280}
281
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200282static int
283check_decoded(PyObject *decoded)
284{
285 if (decoded == NULL)
286 return -1;
287 if (!PyUnicode_Check(decoded)) {
288 PyErr_Format(PyExc_TypeError,
289 "decoder should return a string result, not '%.200s'",
290 Py_TYPE(decoded)->tp_name);
291 Py_DECREF(decoded);
292 return -1;
293 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200294 if (PyUnicode_READY(decoded) < 0) {
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200298 return 0;
299}
300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301#define SEEN_CR 1
302#define SEEN_LF 2
303#define SEEN_CRLF 4
304#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
305
306PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200307_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 PyObject *input, int final)
309{
310 PyObject *output;
311 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200312 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314 if (self->decoder == NULL) {
315 PyErr_SetString(PyExc_ValueError,
316 "IncrementalNewlineDecoder.__init__ not called");
317 return NULL;
318 }
319
320 /* decode input (with the eventual \r from a previous pass) */
321 if (self->decoder != Py_None) {
322 output = PyObject_CallMethodObjArgs(self->decoder,
323 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
324 }
325 else {
326 output = input;
327 Py_INCREF(output);
328 }
329
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200330 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 return NULL;
332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 /* Prefix output with CR */
336 int kind;
337 PyObject *modified;
338 char *out;
339
340 modified = PyUnicode_New(output_len + 1,
341 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 if (modified == NULL)
343 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 kind = PyUnicode_KIND(modified);
345 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300346 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200347 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200349 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self->pendingcr = 0;
351 output_len++;
352 }
353
354 /* retain last \r even when not translating data:
355 * then readline() is sure to get \r\n in one pass
356 */
357 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000358 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
360 {
361 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
362 if (modified == NULL)
363 goto error;
364 Py_DECREF(output);
365 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 self->pendingcr = 1;
367 }
368 }
369
370 /* Record which newlines are read and do newline translation if desired,
371 all in one pass. */
372 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300373 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 Py_ssize_t len;
375 int seennl = self->seennl;
376 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 in_str = PyUnicode_DATA(output);
380 len = PyUnicode_GET_LENGTH(output);
381 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
383 if (len == 0)
384 return output;
385
386 /* If, up to now, newlines are consistently \n, do a quick check
387 for the \r *byte* with the libc's optimized memchr.
388 */
389 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200390 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 }
392
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 if (only_lf) {
394 /* If not already seen, quick scan for a possible "\n" character.
395 (there's nothing else to be done, even when in translation mode)
396 */
397 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200398 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100399 if (kind == PyUnicode_1BYTE_KIND)
400 seennl |= SEEN_LF;
401 else {
402 Py_ssize_t i = 0;
403 for (;;) {
404 Py_UCS4 c;
405 /* Fast loop for non-control characters */
406 while (PyUnicode_READ(kind, in_str, i) > '\n')
407 i++;
408 c = PyUnicode_READ(kind, in_str, i++);
409 if (c == '\n') {
410 seennl |= SEEN_LF;
411 break;
412 }
413 if (i >= len)
414 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000416 }
417 }
418 /* Finished: we have scanned for newlines, and none of them
419 need translating */
420 }
421 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200422 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000423 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 if (seennl == SEEN_ALL)
425 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 while (PyUnicode_READ(kind, in_str, i) > '\r')
430 i++;
431 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 if (c == '\n')
433 seennl |= SEEN_LF;
434 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 }
439 else
440 seennl |= SEEN_CR;
441 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 break;
444 if (seennl == SEEN_ALL)
445 break;
446 }
447 endscan:
448 ;
449 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000450 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 void *translated;
452 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300453 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 Py_ssize_t in, out;
455 /* XXX: Previous in-place translation here is disabled as
456 resizing is not possible anymore */
457 /* We could try to optimize this so that we only do a copy
458 when there is something to translate. On the other hand,
459 we already know there is a \r byte, so chances are high
460 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200461 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (translated == NULL) {
463 PyErr_NoMemory();
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 seennl |= SEEN_LF;
475 continue;
476 }
477 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 in++;
480 seennl |= SEEN_CRLF;
481 }
482 else
483 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 continue;
486 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_DECREF(output);
492 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100493 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200495 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000496 }
497 self->seennl |= seennl;
498 }
499
500 return output;
501
502 error:
503 Py_DECREF(output);
504 return NULL;
505}
506
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300507/*[clinic input]
508_io.IncrementalNewlineDecoder.decode
509 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200510 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300513static PyObject *
514_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
515 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200516/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
519}
520
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300521/*[clinic input]
522_io.IncrementalNewlineDecoder.getstate
523[clinic start generated code]*/
524
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300526_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
527/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528{
529 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700530 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000531
532 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100533 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200534 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535 if (state == NULL)
536 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300537 if (!PyTuple_Check(state)) {
538 PyErr_SetString(PyExc_TypeError,
539 "illegal decoder state");
540 Py_DECREF(state);
541 return NULL;
542 }
543 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
544 &buffer, &flag))
545 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546 Py_DECREF(state);
547 return NULL;
548 }
549 Py_INCREF(buffer);
550 Py_DECREF(state);
551 }
552 else {
553 buffer = PyBytes_FromString("");
554 flag = 0;
555 }
556 flag <<= 1;
557 if (self->pendingcr)
558 flag |= 1;
559 return Py_BuildValue("NK", buffer, flag);
560}
561
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300562/*[clinic input]
563_io.IncrementalNewlineDecoder.setstate
564 state: object
565 /
566[clinic start generated code]*/
567
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300569_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
570 PyObject *state)
571/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700574 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 if (!PyTuple_Check(state)) {
577 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300579 }
580 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
581 &buffer, &flag))
582 {
583 return NULL;
584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585
Victor Stinner7d7e7752014-06-17 23:31:25 +0200586 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 flag >>= 1;
588
589 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200590 return _PyObject_CallMethodId(self->decoder,
591 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 else
593 Py_RETURN_NONE;
594}
595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300596/*[clinic input]
597_io.IncrementalNewlineDecoder.reset
598[clinic start generated code]*/
599
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300601_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 self->seennl = 0;
605 self->pendingcr = 0;
606 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100607 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 else
609 Py_RETURN_NONE;
610}
611
612static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614{
615 switch (self->seennl) {
616 case SEEN_CR:
617 return PyUnicode_FromString("\r");
618 case SEEN_LF:
619 return PyUnicode_FromString("\n");
620 case SEEN_CRLF:
621 return PyUnicode_FromString("\r\n");
622 case SEEN_CR | SEEN_LF:
623 return Py_BuildValue("ss", "\r", "\n");
624 case SEEN_CR | SEEN_CRLF:
625 return Py_BuildValue("ss", "\r", "\r\n");
626 case SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("ss", "\n", "\r\n");
628 case SEEN_CR | SEEN_LF | SEEN_CRLF:
629 return Py_BuildValue("sss", "\r", "\n", "\r\n");
630 default:
631 Py_RETURN_NONE;
632 }
633
634}
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636/* TextIOWrapper */
637
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000638typedef PyObject *
639 (*encodefunc_t)(PyObject *, PyObject *);
640
641typedef struct
642{
643 PyObject_HEAD
644 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000645 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 Py_ssize_t chunk_size;
647 PyObject *buffer;
648 PyObject *encoding;
649 PyObject *encoder;
650 PyObject *decoder;
651 PyObject *readnl;
652 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900653 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200655 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200660 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200662 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 /* Specialized encoding func (see below) */
664 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000665 /* Whether or not it's the start of the stream */
666 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 /* Reads and writes are internally buffered in order to speed things up.
669 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 Please also note that text to be written is first encoded before being
672 buffered. This is necessary so that encoding errors are immediately
673 reported to the caller, but it unfortunately means that the
674 IncrementalEncoder (whose encode() method is always written in Python)
675 becomes a bottleneck for small writes.
676 */
677 PyObject *decoded_chars; /* buffer for text returned from decoder */
678 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900679 PyObject *pending_bytes; // data waiting to be written.
680 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000682
Oren Milman13614e32017-08-24 19:51:24 +0300683 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 * dec_flags is the second (integer) item of the decoder state and
685 * next_input is the chunk of input bytes that comes next after the
686 * snapshot point. We use this to reconstruct decoder states in tell().
687 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000688 PyObject *snapshot;
689 /* Bytes-to-characters ratio for the current chunk. Serves as input for
690 the heuristic in tell(). */
691 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692
693 /* Cache raw object if it's a FileIO object */
694 PyObject *raw;
695
696 PyObject *weakreflist;
697 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000698} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
Zackery Spytz23db9352018-06-29 04:14:58 -0600700static void
701textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703/* A couple of specialized cases in order to bypass the slow incremental
704 encoding methods for the most popular encodings. */
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
INADA Naoki507434f2017-12-21 09:59:53 +0900709 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100715 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900716 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717}
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100722 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900723 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724}
725
726static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000727utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728{
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 if (!self->encoding_start_of_stream) {
730 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200731#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000736 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900738 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
Antoine Pitroue4501852009-05-14 18:55:55 +0000741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900745 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900752 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000757{
758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200760#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf32be_encode(self, text);
762#else
763 return utf32le_encode(self, text);
764#endif
765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900767 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
INADA Naoki507434f2017-12-21 09:59:53 +0900773 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000777latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778{
INADA Naoki507434f2017-12-21 09:59:53 +0900779 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780}
781
Inada Naokibfba8c32019-05-16 15:03:20 +0900782// Return true when encoding can be skipped when text is ascii.
783static inline int
784is_asciicompat_encoding(encodefunc_t f)
785{
786 return f == (encodefunc_t) ascii_encode
787 || f == (encodefunc_t) latin1_encode
788 || f == (encodefunc_t) utf8_encode;
789}
790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791/* Map normalized encoding names onto the specialized encoding funcs */
792
793typedef struct {
794 const char *name;
795 encodefunc_t encodefunc;
796} encodefuncentry;
797
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200798static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"ascii", (encodefunc_t) ascii_encode},
800 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000801 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"utf-16-be", (encodefunc_t) utf16be_encode},
803 {"utf-16-le", (encodefunc_t) utf16le_encode},
804 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-32-be", (encodefunc_t) utf32be_encode},
806 {"utf-32-le", (encodefunc_t) utf32le_encode},
807 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 {NULL, NULL}
809};
810
INADA Naoki507434f2017-12-21 09:59:53 +0900811static int
812validate_newline(const char *newline)
813{
814 if (newline && newline[0] != '\0'
815 && !(newline[0] == '\n' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\0')
817 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
818 PyErr_Format(PyExc_ValueError,
819 "illegal newline value: %s", newline);
820 return -1;
821 }
822 return 0;
823}
824
825static int
826set_newline(textio *self, const char *newline)
827{
828 PyObject *old = self->readnl;
829 if (newline == NULL) {
830 self->readnl = NULL;
831 }
832 else {
833 self->readnl = PyUnicode_FromString(newline);
834 if (self->readnl == NULL) {
835 self->readnl = old;
836 return -1;
837 }
838 }
839 self->readuniversal = (newline == NULL || newline[0] == '\0');
840 self->readtranslate = (newline == NULL);
841 self->writetranslate = (newline == NULL || newline[0] != '\0');
842 if (!self->readuniversal && self->readnl != NULL) {
843 // validate_newline() accepts only ASCII newlines.
844 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
845 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
846 if (strcmp(self->writenl, "\n") == 0) {
847 self->writenl = NULL;
848 }
849 }
850 else {
851#ifdef MS_WINDOWS
852 self->writenl = "\r\n";
853#else
854 self->writenl = NULL;
855#endif
856 }
857 Py_XDECREF(old);
858 return 0;
859}
860
861static int
862_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
863 const char *errors)
864{
865 PyObject *res;
866 int r;
867
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200868 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900869 if (res == NULL)
870 return -1;
871
872 r = PyObject_IsTrue(res);
873 Py_DECREF(res);
874 if (r == -1)
875 return -1;
876
877 if (r != 1)
878 return 0;
879
880 Py_CLEAR(self->decoder);
881 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
882 if (self->decoder == NULL)
883 return -1;
884
885 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300886 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900887 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300888 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900889 if (incrementalDecoder == NULL)
890 return -1;
891 Py_CLEAR(self->decoder);
892 self->decoder = incrementalDecoder;
893 }
894
895 return 0;
896}
897
898static PyObject*
899_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
900{
901 PyObject *chars;
902
Andy Lesterdffe4c02020-03-04 07:15:20 -0600903 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900904 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
905 else
906 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
907 eof ? Py_True : Py_False, NULL);
908
909 if (check_decoded(chars) < 0)
910 // check_decoded already decreases refcount
911 return NULL;
912
913 return chars;
914}
915
916static int
917_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
918 const char *errors)
919{
920 PyObject *res;
921 int r;
922
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200923 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900924 if (res == NULL)
925 return -1;
926
927 r = PyObject_IsTrue(res);
928 Py_DECREF(res);
929 if (r == -1)
930 return -1;
931
932 if (r != 1)
933 return 0;
934
935 Py_CLEAR(self->encoder);
936 self->encodefunc = NULL;
937 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
938 if (self->encoder == NULL)
939 return -1;
940
941 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
943 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900944 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200945 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900946 const encodefuncentry *e = encodefuncs;
947 while (e->name != NULL) {
948 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
949 self->encodefunc = e->encodefunc;
950 break;
951 }
952 e++;
953 }
954 }
955 Py_XDECREF(res);
956
957 return 0;
958}
959
960static int
961_textiowrapper_fix_encoder_state(textio *self)
962{
963 if (!self->seekable || !self->encoder) {
964 return 0;
965 }
966
967 self->encoding_start_of_stream = 1;
968
Petr Viktorinffd97532020-02-11 17:46:57 +0100969 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200970 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900971 if (cookieObj == NULL) {
972 return -1;
973 }
974
Victor Stinner37834132020-10-27 17:12:53 +0100975 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
INADA Naoki507434f2017-12-21 09:59:53 +0900976 Py_DECREF(cookieObj);
977 if (cmp < 0) {
978 return -1;
979 }
980
981 if (cmp == 0) {
982 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100983 PyObject *res = PyObject_CallMethodOneArg(
Victor Stinner37834132020-10-27 17:12:53 +0100984 self->encoder, _PyIO_str_setstate, _PyLong_GetZero());
INADA Naoki507434f2017-12-21 09:59:53 +0900985 if (res == NULL) {
986 return -1;
987 }
988 Py_DECREF(res);
989 }
990
991 return 0;
992}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993
Victor Stinner22eb6892019-06-26 00:51:05 +0200994static int
995io_check_errors(PyObject *errors)
996{
997 assert(errors != NULL && errors != Py_None);
998
Victor Stinner81a7be32020-04-14 15:14:01 +0200999 PyInterpreterState *interp = _PyInterpreterState_GET();
Victor Stinner22eb6892019-06-26 00:51:05 +02001000#ifndef Py_DEBUG
1001 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001002 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001003 return 0;
1004 }
1005#else
1006 /* Always check in debug mode */
1007#endif
1008
1009 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1010 before_PyUnicode_InitEncodings() is called. */
Victor Stinner3d17c042020-05-14 01:48:38 +02001011 if (!interp->unicode.fs_codec.encoding) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001012 return 0;
1013 }
1014
1015 Py_ssize_t name_length;
1016 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1017 if (name == NULL) {
1018 return -1;
1019 }
1020 if (strlen(name) != (size_t)name_length) {
1021 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1022 return -1;
1023 }
1024 PyObject *handler = PyCodec_LookupError(name);
1025 if (handler != NULL) {
1026 Py_DECREF(handler);
1027 return 0;
1028 }
1029 return -1;
1030}
1031
1032
1033
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001034/*[clinic input]
1035_io.TextIOWrapper.__init__
1036 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001037 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001038 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001039 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001040 line_buffering: bool(accept={int}) = False
1041 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001043Character and line based layer over a BufferedIOBase object, buffer.
1044
1045encoding gives the name of the encoding that the stream will be
1046decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1047
1048errors determines the strictness of encoding and decoding (see
1049help(codecs.Codec) or the documentation for codecs.register) and
1050defaults to "strict".
1051
1052newline controls how line endings are handled. It can be None, '',
1053'\n', '\r', and '\r\n'. It works as follows:
1054
1055* On input, if newline is None, universal newlines mode is
1056 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1057 these are translated into '\n' before being returned to the
1058 caller. If it is '', universal newline mode is enabled, but line
1059 endings are returned to the caller untranslated. If it has any of
1060 the other legal values, input lines are only terminated by the given
1061 string, and the line ending is returned to the caller untranslated.
1062
1063* On output, if newline is None, any '\n' characters written are
1064 translated to the system default line separator, os.linesep. If
1065 newline is '' or '\n', no translation takes place. If newline is any
1066 of the other legal values, any '\n' characters written are translated
1067 to the given string.
1068
1069If line_buffering is True, a call to flush is implied when a call to
1070write contains a newline character.
1071[clinic start generated code]*/
1072
1073static int
1074_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001075 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001076 const char *newline, int line_buffering,
1077 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001078/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001079{
1080 PyObject *raw, *codec_info = NULL;
1081 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 PyObject *res;
1083 int r;
1084
1085 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001086 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
INADA Naoki507434f2017-12-21 09:59:53 +09001088 if (errors == Py_None) {
1089 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001090 if (errors == NULL) {
1091 return -1;
1092 }
INADA Naoki507434f2017-12-21 09:59:53 +09001093 }
1094 else if (!PyUnicode_Check(errors)) {
1095 // Check 'errors' argument here because Argument Clinic doesn't support
1096 // 'str(accept={str, NoneType})' converter.
1097 PyErr_Format(
1098 PyExc_TypeError,
1099 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001100 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001101 return -1;
1102 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001103 else if (io_check_errors(errors)) {
1104 return -1;
1105 }
INADA Naoki507434f2017-12-21 09:59:53 +09001106
1107 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 return -1;
1109 }
1110
1111 Py_CLEAR(self->buffer);
1112 Py_CLEAR(self->encoding);
1113 Py_CLEAR(self->encoder);
1114 Py_CLEAR(self->decoder);
1115 Py_CLEAR(self->readnl);
1116 Py_CLEAR(self->decoded_chars);
1117 Py_CLEAR(self->pending_bytes);
1118 Py_CLEAR(self->snapshot);
1119 Py_CLEAR(self->errors);
1120 Py_CLEAR(self->raw);
1121 self->decoded_chars_used = 0;
1122 self->pending_bytes_count = 0;
1123 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001124 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125
1126 if (encoding == NULL) {
1127 /* Try os.device_encoding(fileno) */
1128 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001129 state = IO_STATE();
1130 if (state == NULL)
1131 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001132 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133 /* Ignore only AttributeError and UnsupportedOperation */
1134 if (fileno == NULL) {
1135 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1136 PyErr_ExceptionMatches(state->unsupported_operation)) {
1137 PyErr_Clear();
1138 }
1139 else {
1140 goto error;
1141 }
1142 }
1143 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001144 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001145 Py_DECREF(fileno);
1146 if (fd == -1 && PyErr_Occurred()) {
1147 goto error;
1148 }
1149
1150 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151 if (self->encoding == NULL)
1152 goto error;
1153 else if (!PyUnicode_Check(self->encoding))
1154 Py_CLEAR(self->encoding);
1155 }
1156 }
1157 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001158 PyObject *locale_module = _PyIO_get_locale_module(state);
1159 if (locale_module == NULL)
1160 goto catch_ImportError;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001161 self->encoding = _PyObject_CallMethodIdOneArg(
1162 locale_module, &PyId_getpreferredencoding, Py_False);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001163 Py_DECREF(locale_module);
1164 if (self->encoding == NULL) {
1165 catch_ImportError:
1166 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001167 Importing locale can raise an ImportError because of
1168 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001169 ImportError if _locale is not available. These will happen
1170 during module building.
1171 */
1172 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1173 PyErr_Clear();
1174 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001176 else
1177 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001179 else if (!PyUnicode_Check(self->encoding))
1180 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001181 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001182 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001183 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001184 if (encoding == NULL)
1185 goto error;
1186 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 else if (encoding != NULL) {
1188 self->encoding = PyUnicode_FromString(encoding);
1189 if (self->encoding == NULL)
1190 goto error;
1191 }
1192 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001193 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001195 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 }
1197
Nick Coghlana9b15242014-02-04 22:11:18 +10001198 /* Check we have been asked for a real text encoding */
1199 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1200 if (codec_info == NULL) {
1201 Py_CLEAR(self->encoding);
1202 goto error;
1203 }
1204
1205 /* XXX: Failures beyond this point have the potential to leak elements
1206 * of the partially constructed object (like self->encoding)
1207 */
1208
INADA Naoki507434f2017-12-21 09:59:53 +09001209 Py_INCREF(errors);
1210 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001212 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001213 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001214 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 }
1217
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001218 self->buffer = buffer;
1219 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001220
INADA Naoki507434f2017-12-21 09:59:53 +09001221 /* Build the decoder object */
1222 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1223 goto error;
1224
1225 /* Build the encoder object */
1226 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1227 goto error;
1228
1229 /* Finished sorting out the codec details */
1230 Py_CLEAR(codec_info);
1231
Andy Lesterdffe4c02020-03-04 07:15:20 -06001232 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1233 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1234 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001235 {
1236 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1237 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001239 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001240 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001241 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001242 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001243 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001244 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245 }
1246
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001247 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 if (res == NULL)
1249 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001250 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001252 if (r < 0)
1253 goto error;
1254 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001256 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1257 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001258 goto error;
1259 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001260 Py_XDECREF(res);
1261 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001262
Antoine Pitroue4501852009-05-14 18:55:55 +00001263 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001264 if (_textiowrapper_fix_encoder_state(self) < 0) {
1265 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001266 }
1267
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 self->ok = 1;
1269 return 0;
1270
1271 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001272 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273 return -1;
1274}
1275
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001276/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1277 * -1 on error.
1278 */
1279static int
1280convert_optional_bool(PyObject *obj, int default_value)
1281{
1282 long v;
1283 if (obj == Py_None) {
1284 v = default_value;
1285 }
1286 else {
1287 v = PyLong_AsLong(obj);
1288 if (v == -1 && PyErr_Occurred())
1289 return -1;
1290 }
1291 return v != 0;
1292}
1293
INADA Naoki507434f2017-12-21 09:59:53 +09001294static int
1295textiowrapper_change_encoding(textio *self, PyObject *encoding,
1296 PyObject *errors, int newline_changed)
1297{
1298 /* Use existing settings where new settings are not specified */
1299 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1300 return 0; // no change
1301 }
1302
1303 if (encoding == Py_None) {
1304 encoding = self->encoding;
1305 if (errors == Py_None) {
1306 errors = self->errors;
1307 }
1308 }
1309 else if (errors == Py_None) {
1310 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001311 if (errors == NULL) {
1312 return -1;
1313 }
INADA Naoki507434f2017-12-21 09:59:53 +09001314 }
1315
1316 const char *c_errors = PyUnicode_AsUTF8(errors);
1317 if (c_errors == NULL) {
1318 return -1;
1319 }
1320
1321 // Create new encoder & decoder
1322 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1323 PyUnicode_AsUTF8(encoding), "codecs.open()");
1324 if (codec_info == NULL) {
1325 return -1;
1326 }
1327 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1328 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1329 Py_DECREF(codec_info);
1330 return -1;
1331 }
1332 Py_DECREF(codec_info);
1333
1334 Py_INCREF(encoding);
1335 Py_INCREF(errors);
1336 Py_SETREF(self->encoding, encoding);
1337 Py_SETREF(self->errors, errors);
1338
1339 return _textiowrapper_fix_encoder_state(self);
1340}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001341
1342/*[clinic input]
1343_io.TextIOWrapper.reconfigure
1344 *
INADA Naoki507434f2017-12-21 09:59:53 +09001345 encoding: object = None
1346 errors: object = None
1347 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001348 line_buffering as line_buffering_obj: object = None
1349 write_through as write_through_obj: object = None
1350
1351Reconfigure the text stream with new parameters.
1352
1353This also does an implicit stream flush.
1354
1355[clinic start generated code]*/
1356
1357static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001358_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1359 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001360 PyObject *line_buffering_obj,
1361 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001362/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001363{
1364 int line_buffering;
1365 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001366 const char *newline = NULL;
1367
1368 /* Check if something is in the read buffer */
1369 if (self->decoded_chars != NULL) {
1370 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001371 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001372 "of stream after the first read");
1373 return NULL;
1374 }
1375 }
1376
1377 if (newline_obj != NULL && newline_obj != Py_None) {
1378 newline = PyUnicode_AsUTF8(newline_obj);
1379 if (newline == NULL || validate_newline(newline) < 0) {
1380 return NULL;
1381 }
1382 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001383
1384 line_buffering = convert_optional_bool(line_buffering_obj,
1385 self->line_buffering);
1386 write_through = convert_optional_bool(write_through_obj,
1387 self->write_through);
1388 if (line_buffering < 0 || write_through < 0) {
1389 return NULL;
1390 }
INADA Naoki507434f2017-12-21 09:59:53 +09001391
Petr Viktorinffd97532020-02-11 17:46:57 +01001392 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001393 if (res == NULL) {
1394 return NULL;
1395 }
INADA Naoki507434f2017-12-21 09:59:53 +09001396 Py_DECREF(res);
1397 self->b2cratio = 0;
1398
1399 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1400 return NULL;
1401 }
1402
1403 if (textiowrapper_change_encoding(
1404 self, encoding, errors, newline_obj != NULL) < 0) {
1405 return NULL;
1406 }
1407
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001408 self->line_buffering = line_buffering;
1409 self->write_through = write_through;
1410 Py_RETURN_NONE;
1411}
1412
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001414textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001415{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416 self->ok = 0;
1417 Py_CLEAR(self->buffer);
1418 Py_CLEAR(self->encoding);
1419 Py_CLEAR(self->encoder);
1420 Py_CLEAR(self->decoder);
1421 Py_CLEAR(self->readnl);
1422 Py_CLEAR(self->decoded_chars);
1423 Py_CLEAR(self->pending_bytes);
1424 Py_CLEAR(self->snapshot);
1425 Py_CLEAR(self->errors);
1426 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001427
1428 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429 return 0;
1430}
1431
1432static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001433textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001435 self->finalizing = 1;
1436 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001438 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001439 _PyObject_GC_UNTRACK(self);
1440 if (self->weakreflist != NULL)
1441 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001442 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443 Py_TYPE(self)->tp_free((PyObject *)self);
1444}
1445
1446static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001447textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448{
1449 Py_VISIT(self->buffer);
1450 Py_VISIT(self->encoding);
1451 Py_VISIT(self->encoder);
1452 Py_VISIT(self->decoder);
1453 Py_VISIT(self->readnl);
1454 Py_VISIT(self->decoded_chars);
1455 Py_VISIT(self->pending_bytes);
1456 Py_VISIT(self->snapshot);
1457 Py_VISIT(self->errors);
1458 Py_VISIT(self->raw);
1459
1460 Py_VISIT(self->dict);
1461 return 0;
1462}
1463
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001464static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001465textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466
1467/* This macro takes some shortcuts to make the common case faster. */
1468#define CHECK_CLOSED(self) \
1469 do { \
1470 int r; \
1471 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001472 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 if (self->raw != NULL) \
1474 r = _PyFileIO_closed(self->raw); \
1475 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001476 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001477 if (_res == NULL) \
1478 return NULL; \
1479 r = PyObject_IsTrue(_res); \
1480 Py_DECREF(_res); \
1481 if (r < 0) \
1482 return NULL; \
1483 } \
1484 if (r > 0) { \
1485 PyErr_SetString(PyExc_ValueError, \
1486 "I/O operation on closed file."); \
1487 return NULL; \
1488 } \
1489 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001490 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491 return NULL; \
1492 } while (0)
1493
1494#define CHECK_INITIALIZED(self) \
1495 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001496 PyErr_SetString(PyExc_ValueError, \
1497 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 return NULL; \
1499 }
1500
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001501#define CHECK_ATTACHED(self) \
1502 CHECK_INITIALIZED(self); \
1503 if (self->detached) { \
1504 PyErr_SetString(PyExc_ValueError, \
1505 "underlying buffer has been detached"); \
1506 return NULL; \
1507 }
1508
1509#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001511 PyErr_SetString(PyExc_ValueError, \
1512 "I/O operation on uninitialized object"); \
1513 return -1; \
1514 } else if (self->detached) { \
1515 PyErr_SetString(PyExc_ValueError, \
1516 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001517 return -1; \
1518 }
1519
1520
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001521/*[clinic input]
1522_io.TextIOWrapper.detach
1523[clinic start generated code]*/
1524
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001525static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001526_io_TextIOWrapper_detach_impl(textio *self)
1527/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001528{
1529 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001530 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001531 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001532 if (res == NULL)
1533 return NULL;
1534 Py_DECREF(res);
1535 buffer = self->buffer;
1536 self->buffer = NULL;
1537 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001538 return buffer;
1539}
1540
Antoine Pitrou24f36292009-03-28 22:16:42 +00001541/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542 underlying buffered object, though. */
1543static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001544_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 if (self->pending_bytes == NULL)
1547 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001548
Inada Naokibfba8c32019-05-16 15:03:20 +09001549 PyObject *pending = self->pending_bytes;
1550 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001551
Inada Naokibfba8c32019-05-16 15:03:20 +09001552 if (PyBytes_Check(pending)) {
1553 b = pending;
1554 Py_INCREF(b);
1555 }
1556 else if (PyUnicode_Check(pending)) {
1557 assert(PyUnicode_IS_ASCII(pending));
1558 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1559 b = PyBytes_FromStringAndSize(
1560 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1561 if (b == NULL) {
1562 return -1;
1563 }
1564 }
1565 else {
1566 assert(PyList_Check(pending));
1567 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1568 if (b == NULL) {
1569 return -1;
1570 }
1571
1572 char *buf = PyBytes_AsString(b);
1573 Py_ssize_t pos = 0;
1574
1575 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1576 PyObject *obj = PyList_GET_ITEM(pending, i);
1577 char *src;
1578 Py_ssize_t len;
1579 if (PyUnicode_Check(obj)) {
1580 assert(PyUnicode_IS_ASCII(obj));
1581 src = PyUnicode_DATA(obj);
1582 len = PyUnicode_GET_LENGTH(obj);
1583 }
1584 else {
1585 assert(PyBytes_Check(obj));
1586 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1587 Py_DECREF(b);
1588 return -1;
1589 }
1590 }
1591 memcpy(buf + pos, src, len);
1592 pos += len;
1593 }
1594 assert(pos == self->pending_bytes_count);
1595 }
1596
1597 self->pending_bytes_count = 0;
1598 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001599 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001600
1601 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001602 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001603 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001604 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001605 Py_DECREF(b);
1606 if (ret == NULL)
1607 return -1;
1608 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 return 0;
1610}
1611
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001612/*[clinic input]
1613_io.TextIOWrapper.write
1614 text: unicode
1615 /
1616[clinic start generated code]*/
1617
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001619_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1620/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621{
1622 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 PyObject *b;
1624 Py_ssize_t textlen;
1625 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001626 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 if (PyUnicode_READY(text) == -1)
1629 return NULL;
1630
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001631 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 CHECK_CLOSED(self);
1633
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001634 if (self->encoder == NULL)
1635 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001636
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637 Py_INCREF(text);
1638
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640
1641 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 haslf = 1;
1644
1645 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001646 PyObject *newtext = _PyObject_CallMethodId(
1647 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 Py_DECREF(text);
1649 if (newtext == NULL)
1650 return NULL;
1651 text = newtext;
1652 }
1653
Antoine Pitroue96ec682011-07-23 21:46:35 +02001654 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001655 text_needflush = 1;
1656 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001658 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001659 needflush = 1;
1660
1661 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001662 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001663 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1664 b = text;
1665 Py_INCREF(b);
1666 }
1667 else {
1668 b = (*self->encodefunc)((PyObject *) self, text);
1669 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001670 self->encoding_start_of_stream = 0;
1671 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 else
Petr Viktorinffd97532020-02-11 17:46:57 +01001673 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naokibfba8c32019-05-16 15:03:20 +09001674
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 Py_DECREF(text);
1676 if (b == NULL)
1677 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001678 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001679 PyErr_Format(PyExc_TypeError,
1680 "encoder should return a bytes object, not '%.200s'",
1681 Py_TYPE(b)->tp_name);
1682 Py_DECREF(b);
1683 return NULL;
1684 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685
Inada Naokibfba8c32019-05-16 15:03:20 +09001686 Py_ssize_t bytes_len;
1687 if (b == text) {
1688 bytes_len = PyUnicode_GET_LENGTH(b);
1689 }
1690 else {
1691 bytes_len = PyBytes_GET_SIZE(b);
1692 }
1693
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001695 self->pending_bytes_count = 0;
1696 self->pending_bytes = b;
1697 }
1698 else if (!PyList_CheckExact(self->pending_bytes)) {
1699 PyObject *list = PyList_New(2);
1700 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 Py_DECREF(b);
1702 return NULL;
1703 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001704 PyList_SET_ITEM(list, 0, self->pending_bytes);
1705 PyList_SET_ITEM(list, 1, b);
1706 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001708 else {
1709 if (PyList_Append(self->pending_bytes, b) < 0) {
1710 Py_DECREF(b);
1711 return NULL;
1712 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001715
1716 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001717 if (self->pending_bytes_count > self->chunk_size || needflush ||
1718 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001719 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 return NULL;
1721 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001722
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001724 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if (ret == NULL)
1726 return NULL;
1727 Py_DECREF(ret);
1728 }
1729
Zackery Spytz23db9352018-06-29 04:14:58 -06001730 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 Py_CLEAR(self->snapshot);
1732
1733 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001734 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 if (ret == NULL)
1736 return NULL;
1737 Py_DECREF(ret);
1738 }
1739
1740 return PyLong_FromSsize_t(textlen);
1741}
1742
1743/* Steal a reference to chars and store it in the decoded_char buffer;
1744 */
1745static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001746textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001748 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001749 self->decoded_chars_used = 0;
1750}
1751
1752static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001753textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754{
1755 PyObject *chars;
1756 Py_ssize_t avail;
1757
1758 if (self->decoded_chars == NULL)
1759 return PyUnicode_FromStringAndSize(NULL, 0);
1760
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 /* decoded_chars is guaranteed to be "ready". */
1762 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 - self->decoded_chars_used);
1764
1765 assert(avail >= 0);
1766
1767 if (n < 0 || n > avail)
1768 n = avail;
1769
1770 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001771 chars = PyUnicode_Substring(self->decoded_chars,
1772 self->decoded_chars_used,
1773 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001774 if (chars == NULL)
1775 return NULL;
1776 }
1777 else {
1778 chars = self->decoded_chars;
1779 Py_INCREF(chars);
1780 }
1781
1782 self->decoded_chars_used += n;
1783 return chars;
1784}
1785
1786/* Read and decode the next chunk of data from the BufferedReader.
1787 */
1788static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001789textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790{
1791 PyObject *dec_buffer = NULL;
1792 PyObject *dec_flags = NULL;
1793 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001794 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001796 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001797 int eof;
1798
1799 /* The return value is True unless EOF was reached. The decoded string is
1800 * placed in self._decoded_chars (replacing its previous value). The
1801 * entire input chunk is sent to the decoder, though some of it may remain
1802 * buffered in the decoder, yet to be converted.
1803 */
1804
1805 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001806 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001807 return -1;
1808 }
1809
1810 if (self->telling) {
1811 /* To prepare for tell(), we need to snapshot a point in the file
1812 * where the decoder's input buffer is empty.
1813 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001814 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001815 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 if (state == NULL)
1817 return -1;
1818 /* Given this, we know there was a valid snapshot point
1819 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1820 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001821 if (!PyTuple_Check(state)) {
1822 PyErr_SetString(PyExc_TypeError,
1823 "illegal decoder state");
1824 Py_DECREF(state);
1825 return -1;
1826 }
1827 if (!PyArg_ParseTuple(state,
1828 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1829 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830 Py_DECREF(state);
1831 return -1;
1832 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001833
1834 if (!PyBytes_Check(dec_buffer)) {
1835 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001836 "illegal decoder state: the first item should be a "
1837 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001838 Py_TYPE(dec_buffer)->tp_name);
1839 Py_DECREF(state);
1840 return -1;
1841 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 Py_INCREF(dec_buffer);
1843 Py_INCREF(dec_flags);
1844 Py_DECREF(state);
1845 }
1846
1847 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001848 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001849 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001850 }
1851 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001852 if (chunk_size == NULL)
1853 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001854
Petr Viktorinffd97532020-02-11 17:46:57 +01001855 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001856 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001857 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858 Py_DECREF(chunk_size);
1859 if (input_chunk == NULL)
1860 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001861
1862 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001863 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001864 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001865 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1866 Py_TYPE(input_chunk)->tp_name);
1867 goto fail;
1868 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869
Antoine Pitroub8503892014-04-29 10:14:02 +02001870 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001871 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872
INADA Naoki507434f2017-12-21 09:59:53 +09001873 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1874 PyBuffer_Release(&input_chunk_buf);
1875 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001876 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001877
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001878 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001879 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001880 if (nchars > 0)
1881 self->b2cratio = (double) nbytes / nchars;
1882 else
1883 self->b2cratio = 0.0;
1884 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885 eof = 0;
1886
1887 if (self->telling) {
1888 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1889 * next input to be decoded is dec_buffer + input_chunk.
1890 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001891 PyObject *next_input = dec_buffer;
1892 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001893 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001894 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001895 goto fail;
1896 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001897 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1898 if (snapshot == NULL) {
1899 dec_flags = NULL;
1900 goto fail;
1901 }
1902 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903 }
1904 Py_DECREF(input_chunk);
1905
1906 return (eof == 0);
1907
1908 fail:
1909 Py_XDECREF(dec_buffer);
1910 Py_XDECREF(dec_flags);
1911 Py_XDECREF(input_chunk);
1912 return -1;
1913}
1914
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001915/*[clinic input]
1916_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001917 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001918 /
1919[clinic start generated code]*/
1920
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001922_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001923/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 PyObject *result = NULL, *chunks = NULL;
1926
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001927 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001928 CHECK_CLOSED(self);
1929
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001930 if (self->decoder == NULL)
1931 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001932
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001933 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934 return NULL;
1935
1936 if (n < 0) {
1937 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001938 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939 PyObject *decoded;
1940 if (bytes == NULL)
1941 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001942
Andy Lesterdffe4c02020-03-04 07:15:20 -06001943 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001944 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1945 bytes, 1);
1946 else
1947 decoded = PyObject_CallMethodObjArgs(
1948 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001950 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951 goto fail;
1952
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001953 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954
1955 if (result == NULL) {
1956 Py_DECREF(decoded);
1957 return NULL;
1958 }
1959
1960 PyUnicode_AppendAndDel(&result, decoded);
1961 if (result == NULL)
1962 goto fail;
1963
Zackery Spytz23db9352018-06-29 04:14:58 -06001964 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965 Py_CLEAR(self->snapshot);
1966 return result;
1967 }
1968 else {
1969 int res = 1;
1970 Py_ssize_t remaining = n;
1971
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001972 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973 if (result == NULL)
1974 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001975 if (PyUnicode_READY(result) == -1)
1976 goto fail;
1977 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978
1979 /* Keep reading chunks until we have n characters to return */
1980 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001981 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001982 if (res < 0) {
1983 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1984 when EINTR occurs so we needn't do it ourselves. */
1985 if (_PyIO_trap_eintr()) {
1986 continue;
1987 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001989 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990 if (res == 0) /* EOF */
1991 break;
1992 if (chunks == NULL) {
1993 chunks = PyList_New(0);
1994 if (chunks == NULL)
1995 goto fail;
1996 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001997 if (PyUnicode_GET_LENGTH(result) > 0 &&
1998 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 goto fail;
2000 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002001 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 if (result == NULL)
2003 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002004 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 }
2006 if (chunks != NULL) {
2007 if (result != NULL && PyList_Append(chunks, result) < 0)
2008 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002009 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 if (result == NULL)
2011 goto fail;
2012 Py_CLEAR(chunks);
2013 }
2014 return result;
2015 }
2016 fail:
2017 Py_XDECREF(result);
2018 Py_XDECREF(chunks);
2019 return NULL;
2020}
2021
2022
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002023/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 that is to the NUL character. Otherwise the function will produce
2025 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002026static const char *
2027find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002029 if (kind == PyUnicode_1BYTE_KIND) {
2030 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002031 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002032 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002034 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002035 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002036 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002037 return s;
2038 if (s == end)
2039 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002040 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 }
2042}
2043
2044Py_ssize_t
2045_PyIO_find_line_ending(
2046 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002047 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048{
Andy Lestere6be9b52020-02-11 20:28:35 -06002049 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050
2051 if (translated) {
2052 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002053 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002055 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 else {
2057 *consumed = len;
2058 return -1;
2059 }
2060 }
2061 else if (universal) {
2062 /* Universal newline search. Find any of \r, \r\n, \n
2063 * The decoder ensures that \r\n are not split in two pieces
2064 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002065 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002067 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002069 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002070 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002071 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 if (s >= end) {
2073 *consumed = len;
2074 return -1;
2075 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002076 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002077 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002079 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002081 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002082 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002084 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085 }
2086 }
2087 }
2088 else {
2089 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002090 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002091 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002092 /* Assume that readnl is an ASCII character. */
2093 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002094 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002095 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002097 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 *consumed = len;
2099 return -1;
2100 }
2101 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002102 const char *s = start;
2103 const char *e = end - (readnl_len - 1)*kind;
2104 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 if (e < s)
2106 e = s;
2107 while (s < e) {
2108 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002109 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 if (pos == NULL || pos >= e)
2111 break;
2112 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002113 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 break;
2115 }
2116 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002117 return (pos - start)/kind + readnl_len;
2118 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002120 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 if (pos == NULL)
2122 *consumed = len;
2123 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002124 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125 return -1;
2126 }
2127 }
2128}
2129
2130static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002131_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132{
2133 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2134 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2135 int res;
2136
2137 CHECK_CLOSED(self);
2138
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002139 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 return NULL;
2141
2142 chunked = 0;
2143
2144 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002145 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002146 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002147 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148 Py_ssize_t consumed = 0;
2149
2150 /* First, get some data if necessary */
2151 res = 1;
2152 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002153 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002154 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002155 if (res < 0) {
2156 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2157 when EINTR occurs so we needn't do it ourselves. */
2158 if (_PyIO_trap_eintr()) {
2159 continue;
2160 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002162 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163 if (res == 0)
2164 break;
2165 }
2166 if (res == 0) {
2167 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002168 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169 Py_CLEAR(self->snapshot);
2170 start = endpos = offset_to_buffer = 0;
2171 break;
2172 }
2173
2174 if (remaining == NULL) {
2175 line = self->decoded_chars;
2176 start = self->decoded_chars_used;
2177 offset_to_buffer = 0;
2178 Py_INCREF(line);
2179 }
2180 else {
2181 assert(self->decoded_chars_used == 0);
2182 line = PyUnicode_Concat(remaining, self->decoded_chars);
2183 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002184 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185 Py_CLEAR(remaining);
2186 if (line == NULL)
2187 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002188 if (PyUnicode_READY(line) == -1)
2189 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190 }
2191
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002192 ptr = PyUnicode_DATA(line);
2193 line_len = PyUnicode_GET_LENGTH(line);
2194 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195
2196 endpos = _PyIO_find_line_ending(
2197 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002198 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002199 ptr + kind * start,
2200 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002201 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 if (endpos >= 0) {
2203 endpos += start;
2204 if (limit >= 0 && (endpos - start) + chunked >= limit)
2205 endpos = start + limit - chunked;
2206 break;
2207 }
2208
2209 /* We can put aside up to `endpos` */
2210 endpos = consumed + start;
2211 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2212 /* Didn't find line ending, but reached length limit */
2213 endpos = start + limit - chunked;
2214 break;
2215 }
2216
2217 if (endpos > start) {
2218 /* No line ending seen yet - put aside current data */
2219 PyObject *s;
2220 if (chunks == NULL) {
2221 chunks = PyList_New(0);
2222 if (chunks == NULL)
2223 goto error;
2224 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002225 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002226 if (s == NULL)
2227 goto error;
2228 if (PyList_Append(chunks, s) < 0) {
2229 Py_DECREF(s);
2230 goto error;
2231 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002232 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002233 Py_DECREF(s);
2234 }
2235 /* There may be some remaining bytes we'll have to prepend to the
2236 next chunk of data */
2237 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002238 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002239 if (remaining == NULL)
2240 goto error;
2241 }
2242 Py_CLEAR(line);
2243 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002244 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245 }
2246
2247 if (line != NULL) {
2248 /* Our line ends in the current buffer */
2249 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002250 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2251 PyObject *s = PyUnicode_Substring(line, start, endpos);
2252 Py_CLEAR(line);
2253 if (s == NULL)
2254 goto error;
2255 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256 }
2257 }
2258 if (remaining != NULL) {
2259 if (chunks == NULL) {
2260 chunks = PyList_New(0);
2261 if (chunks == NULL)
2262 goto error;
2263 }
2264 if (PyList_Append(chunks, remaining) < 0)
2265 goto error;
2266 Py_CLEAR(remaining);
2267 }
2268 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002269 if (line != NULL) {
2270 if (PyList_Append(chunks, line) < 0)
2271 goto error;
2272 Py_DECREF(line);
2273 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2275 if (line == NULL)
2276 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002277 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002278 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002279 if (line == NULL) {
2280 Py_INCREF(_PyIO_empty_str);
2281 line = _PyIO_empty_str;
2282 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002283
2284 return line;
2285
2286 error:
2287 Py_XDECREF(chunks);
2288 Py_XDECREF(remaining);
2289 Py_XDECREF(line);
2290 return NULL;
2291}
2292
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002293/*[clinic input]
2294_io.TextIOWrapper.readline
2295 size: Py_ssize_t = -1
2296 /
2297[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002298
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002299static PyObject *
2300_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2301/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2302{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002303 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002304 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002305}
2306
2307/* Seek and Tell */
2308
2309typedef struct {
2310 Py_off_t start_pos;
2311 int dec_flags;
2312 int bytes_to_feed;
2313 int chars_to_skip;
2314 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002315} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316
2317/*
2318 To speed up cookie packing/unpacking, we store the fields in a temporary
2319 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2320 The following macros define at which offsets in the intermediary byte
2321 string the various CookieStruct fields will be stored.
2322 */
2323
2324#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2325
Christian Heimes743e0cd2012-10-17 23:52:17 +02002326#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002327/* We want the least significant byte of start_pos to also be the least
2328 significant byte of the cookie, which means that in big-endian mode we
2329 must copy the fields in reverse order. */
2330
2331# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2332# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2333# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2334# define OFF_CHARS_TO_SKIP (sizeof(char))
2335# define OFF_NEED_EOF 0
2336
2337#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338/* Little-endian mode: the least significant byte of start_pos will
2339 naturally end up the least significant byte of the cookie. */
2340
2341# define OFF_START_POS 0
2342# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2343# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2344# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2345# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2346
2347#endif
2348
2349static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002350textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351{
2352 unsigned char buffer[COOKIE_BUF_LEN];
2353 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2354 if (cookieLong == NULL)
2355 return -1;
2356
2357 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002358 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002359 Py_DECREF(cookieLong);
2360 return -1;
2361 }
2362 Py_DECREF(cookieLong);
2363
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002364 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2365 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2366 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2367 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2368 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369
2370 return 0;
2371}
2372
2373static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002374textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375{
2376 unsigned char buffer[COOKIE_BUF_LEN];
2377
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002378 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2379 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2380 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2381 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2382 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383
Christian Heimes743e0cd2012-10-17 23:52:17 +02002384 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2385 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387
2388static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002389_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390{
2391 PyObject *res;
2392 /* When seeking to the start of the stream, we call decoder.reset()
2393 rather than decoder.getstate().
2394 This is for a few decoders such as utf-16 for which the state value
2395 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2396 utf-16, that we are expecting a BOM).
2397 */
2398 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002399 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002401 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2402 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403 if (res == NULL)
2404 return -1;
2405 Py_DECREF(res);
2406 return 0;
2407}
2408
Antoine Pitroue4501852009-05-14 18:55:55 +00002409static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002410_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002411{
2412 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002413 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002414 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002415 self->encoding_start_of_stream = 1;
2416 }
2417 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002418 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Victor Stinner37834132020-10-27 17:12:53 +01002419 _PyLong_GetZero());
Antoine Pitroue4501852009-05-14 18:55:55 +00002420 self->encoding_start_of_stream = 0;
2421 }
2422 if (res == NULL)
2423 return -1;
2424 Py_DECREF(res);
2425 return 0;
2426}
2427
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002428static int
2429_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2430{
2431 /* Same as _textiowrapper_decoder_setstate() above. */
2432 return _textiowrapper_encoder_reset(
2433 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2434}
2435
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002436/*[clinic input]
2437_io.TextIOWrapper.seek
2438 cookie as cookieObj: object
2439 whence: int = 0
2440 /
2441[clinic start generated code]*/
2442
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002444_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2445/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002447 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002448 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 PyObject *res;
2450 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002451 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002453 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 CHECK_CLOSED(self);
2455
2456 Py_INCREF(cookieObj);
2457
2458 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002459 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460 goto fail;
2461 }
2462
Victor Stinner37834132020-10-27 17:12:53 +01002463 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2464
ngie-eign848037c2019-03-02 23:28:26 -08002465 switch (whence) {
2466 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467 /* seek relative to current position */
Victor Stinner37834132020-10-27 17:12:53 +01002468 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 if (cmp < 0)
2470 goto fail;
2471
2472 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002473 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474 goto fail;
2475 }
2476
2477 /* Seeking to the current position should attempt to
2478 * sync the underlying buffer with the current position.
2479 */
2480 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002481 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482 if (cookieObj == NULL)
2483 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002484 break;
2485
ngie-eign848037c2019-03-02 23:28:26 -08002486 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 /* seek relative to end of file */
Victor Stinner37834132020-10-27 17:12:53 +01002488 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 if (cmp < 0)
2490 goto fail;
2491
2492 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002493 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 goto fail;
2495 }
2496
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002497 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498 if (res == NULL)
2499 goto fail;
2500 Py_DECREF(res);
2501
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002502 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 Py_CLEAR(self->snapshot);
2504 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002505 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506 if (res == NULL)
2507 goto fail;
2508 Py_DECREF(res);
2509 }
2510
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002511 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002512 Py_CLEAR(cookieObj);
2513 if (res == NULL)
2514 goto fail;
2515 if (self->encoder) {
2516 /* If seek() == 0, we are at the start of stream, otherwise not */
Victor Stinner37834132020-10-27 17:12:53 +01002517 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002518 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2519 Py_DECREF(res);
2520 goto fail;
2521 }
2522 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002524
ngie-eign848037c2019-03-02 23:28:26 -08002525 case SEEK_SET:
2526 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002527
ngie-eign848037c2019-03-02 23:28:26 -08002528 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002530 "invalid whence (%d, should be %d, %d or %d)", whence,
2531 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 goto fail;
2533 }
2534
Victor Stinner37834132020-10-27 17:12:53 +01002535 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 if (cmp < 0)
2537 goto fail;
2538
2539 if (cmp == 1) {
2540 PyErr_Format(PyExc_ValueError,
2541 "negative seek position %R", cookieObj);
2542 goto fail;
2543 }
2544
Petr Viktorinffd97532020-02-11 17:46:57 +01002545 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546 if (res == NULL)
2547 goto fail;
2548 Py_DECREF(res);
2549
2550 /* The strategy of seek() is to go back to the safe start point
2551 * and replay the effect of read(chars_to_skip) from there.
2552 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002553 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554 goto fail;
2555
2556 /* Seek back to the safe start point. */
2557 posobj = PyLong_FromOff_t(cookie.start_pos);
2558 if (posobj == NULL)
2559 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002560 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 Py_DECREF(posobj);
2562 if (res == NULL)
2563 goto fail;
2564 Py_DECREF(res);
2565
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 Py_CLEAR(self->snapshot);
2568
2569 /* Restore the decoder to its state from the safe start point. */
2570 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002571 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 goto fail;
2573 }
2574
2575 if (cookie.chars_to_skip) {
2576 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002577 PyObject *input_chunk = _PyObject_CallMethodId(
2578 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002579 PyObject *decoded;
2580
2581 if (input_chunk == NULL)
2582 goto fail;
2583
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002584 if (!PyBytes_Check(input_chunk)) {
2585 PyErr_Format(PyExc_TypeError,
2586 "underlying read() should have returned a bytes "
2587 "object, not '%.200s'",
2588 Py_TYPE(input_chunk)->tp_name);
2589 Py_DECREF(input_chunk);
2590 goto fail;
2591 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002593 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2594 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595 goto fail;
2596 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002597 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002598
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002599 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2600 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002602 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603 goto fail;
2604
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002605 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606
2607 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002608 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002609 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610 goto fail;
2611 }
2612 self->decoded_chars_used = cookie.chars_to_skip;
2613 }
2614 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002615 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2616 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002618 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619 }
2620
Antoine Pitroue4501852009-05-14 18:55:55 +00002621 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2622 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002623 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002624 goto fail;
2625 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626 return cookieObj;
2627 fail:
2628 Py_XDECREF(cookieObj);
2629 return NULL;
2630
2631}
2632
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002633/*[clinic input]
2634_io.TextIOWrapper.tell
2635[clinic start generated code]*/
2636
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002638_io_TextIOWrapper_tell_impl(textio *self)
2639/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640{
2641 PyObject *res;
2642 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002643 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 PyObject *next_input;
2645 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002646 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002648 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002649 Py_ssize_t dec_buffer_len;
2650 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002651
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002652 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653 CHECK_CLOSED(self);
2654
2655 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002656 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657 goto fail;
2658 }
2659 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002660 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 "telling position disabled by next() call");
2662 goto fail;
2663 }
2664
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002665 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002666 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002667 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002668 if (res == NULL)
2669 goto fail;
2670 Py_DECREF(res);
2671
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002672 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 if (posobj == NULL)
2674 goto fail;
2675
2676 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002677 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002678 return posobj;
2679 }
2680
2681#if defined(HAVE_LARGEFILE_SUPPORT)
2682 cookie.start_pos = PyLong_AsLongLong(posobj);
2683#else
2684 cookie.start_pos = PyLong_AsLong(posobj);
2685#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002686 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687 if (PyErr_Occurred())
2688 goto fail;
2689
2690 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002691 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002692 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 goto fail;
2694
2695 assert (PyBytes_Check(next_input));
2696
2697 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2698
2699 /* How many decoded characters have been used up since the snapshot? */
2700 if (self->decoded_chars_used == 0) {
2701 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002702 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703 }
2704
2705 chars_to_skip = self->decoded_chars_used;
2706
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002707 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002708 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002709 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002710 if (saved_state == NULL)
2711 goto fail;
2712
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002713#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002714 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002715 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002716 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002717 if (_state == NULL) \
2718 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002719 if (!PyTuple_Check(_state)) { \
2720 PyErr_SetString(PyExc_TypeError, \
2721 "illegal decoder state"); \
2722 Py_DECREF(_state); \
2723 goto fail; \
2724 } \
2725 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2726 &dec_buffer, &dec_flags)) \
2727 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002728 Py_DECREF(_state); \
2729 goto fail; \
2730 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002731 if (!PyBytes_Check(dec_buffer)) { \
2732 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002733 "illegal decoder state: the first item should be a " \
2734 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002735 Py_TYPE(dec_buffer)->tp_name); \
2736 Py_DECREF(_state); \
2737 goto fail; \
2738 } \
2739 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002740 Py_DECREF(_state); \
2741 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002742
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002743#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002744 PyObject *_decoded = _PyObject_CallMethodId( \
2745 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002746 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002747 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002748 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002749 Py_DECREF(_decoded); \
2750 } while (0)
2751
2752 /* Fast search for an acceptable start point, close to our
2753 current pos */
2754 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2755 skip_back = 1;
2756 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2757 input = PyBytes_AS_STRING(next_input);
2758 while (skip_bytes > 0) {
2759 /* Decode up to temptative start point */
2760 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2761 goto fail;
2762 DECODER_DECODE(input, skip_bytes, chars_decoded);
2763 if (chars_decoded <= chars_to_skip) {
2764 DECODER_GETSTATE();
2765 if (dec_buffer_len == 0) {
2766 /* Before pos and no bytes buffered in decoder => OK */
2767 cookie.dec_flags = dec_flags;
2768 chars_to_skip -= chars_decoded;
2769 break;
2770 }
2771 /* Skip back by buffered amount and reset heuristic */
2772 skip_bytes -= dec_buffer_len;
2773 skip_back = 1;
2774 }
2775 else {
2776 /* We're too far ahead, skip back a bit */
2777 skip_bytes -= skip_back;
2778 skip_back *= 2;
2779 }
2780 }
2781 if (skip_bytes <= 0) {
2782 skip_bytes = 0;
2783 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2784 goto fail;
2785 }
2786
2787 /* Note our initial start point. */
2788 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002789 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002790 if (chars_to_skip == 0)
2791 goto finally;
2792
2793 /* We should be close to the desired position. Now feed the decoder one
2794 * byte at a time until we reach the `chars_to_skip` target.
2795 * As we go, note the nearest "safe start point" before the current
2796 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002797 * can safely start from there and advance to this location).
2798 */
2799 chars_decoded = 0;
2800 input = PyBytes_AS_STRING(next_input);
2801 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002802 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002803 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002804 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002805
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002806 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002807 /* We got n chars for 1 byte */
2808 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002809 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002810 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002811
2812 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2813 /* Decoder buffer is empty, so this is a safe start point. */
2814 cookie.start_pos += cookie.bytes_to_feed;
2815 chars_to_skip -= chars_decoded;
2816 cookie.dec_flags = dec_flags;
2817 cookie.bytes_to_feed = 0;
2818 chars_decoded = 0;
2819 }
2820 if (chars_decoded >= chars_to_skip)
2821 break;
2822 input++;
2823 }
2824 if (input == input_end) {
2825 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002826 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002827 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002828 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002829 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002830 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002831 Py_DECREF(decoded);
2832 cookie.need_eof = 1;
2833
2834 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002835 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002836 "can't reconstruct logical file position");
2837 goto fail;
2838 }
2839 }
2840
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002841finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002842 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002843 Py_DECREF(saved_state);
2844 if (res == NULL)
2845 return NULL;
2846 Py_DECREF(res);
2847
2848 /* The returned cookie corresponds to the last safe start point. */
2849 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002850 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002851
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002852fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853 if (saved_state) {
2854 PyObject *type, *value, *traceback;
2855 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002856 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002857 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002858 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002859 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002860 }
2861 return NULL;
2862}
2863
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002864/*[clinic input]
2865_io.TextIOWrapper.truncate
2866 pos: object = None
2867 /
2868[clinic start generated code]*/
2869
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002870static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002871_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2872/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002873{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874 PyObject *res;
2875
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002876 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002877
Petr Viktorinffd97532020-02-11 17:46:57 +01002878 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002879 if (res == NULL)
2880 return NULL;
2881 Py_DECREF(res);
2882
Petr Viktorinffd97532020-02-11 17:46:57 +01002883 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002884}
2885
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002886static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002887textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002888{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002889 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002890 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002891
2892 CHECK_INITIALIZED(self);
2893
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002894 res = PyUnicode_FromString("<_io.TextIOWrapper");
2895 if (res == NULL)
2896 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002897
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002898 status = Py_ReprEnter((PyObject *)self);
2899 if (status != 0) {
2900 if (status > 0) {
2901 PyErr_Format(PyExc_RuntimeError,
2902 "reentrant call inside %s.__repr__",
2903 Py_TYPE(self)->tp_name);
2904 }
2905 goto error;
2906 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002907 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2908 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002909 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002910 }
2911 /* Ignore ValueError raised if the underlying stream was detached */
2912 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002913 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002914 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002915 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002916 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002917 if (s == NULL)
2918 goto error;
2919 PyUnicode_AppendAndDel(&res, s);
2920 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002921 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002922 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002923 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2924 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002925 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002926 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002927 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2928 Py_DECREF(modeobj);
2929 if (s == NULL)
2930 goto error;
2931 PyUnicode_AppendAndDel(&res, s);
2932 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002933 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002934 }
2935 s = PyUnicode_FromFormat("%U encoding=%R>",
2936 res, self->encoding);
2937 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002938 if (status == 0) {
2939 Py_ReprLeave((PyObject *)self);
2940 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002941 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002942
2943 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002944 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002945 if (status == 0) {
2946 Py_ReprLeave((PyObject *)self);
2947 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002948 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002949}
2950
2951
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002952/* Inquiries */
2953
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002954/*[clinic input]
2955_io.TextIOWrapper.fileno
2956[clinic start generated code]*/
2957
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002958static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002959_io_TextIOWrapper_fileno_impl(textio *self)
2960/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002961{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002962 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002963 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002964}
2965
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002966/*[clinic input]
2967_io.TextIOWrapper.seekable
2968[clinic start generated code]*/
2969
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002970static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002971_io_TextIOWrapper_seekable_impl(textio *self)
2972/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002974 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002975 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002976}
2977
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002978/*[clinic input]
2979_io.TextIOWrapper.readable
2980[clinic start generated code]*/
2981
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002982static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002983_io_TextIOWrapper_readable_impl(textio *self)
2984/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002985{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002986 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002987 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002988}
2989
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002990/*[clinic input]
2991_io.TextIOWrapper.writable
2992[clinic start generated code]*/
2993
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002995_io_TextIOWrapper_writable_impl(textio *self)
2996/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002997{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002998 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002999 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003000}
3001
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003002/*[clinic input]
3003_io.TextIOWrapper.isatty
3004[clinic start generated code]*/
3005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003006static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003007_io_TextIOWrapper_isatty_impl(textio *self)
3008/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003010 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003011 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003012}
3013
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003014/*[clinic input]
3015_io.TextIOWrapper.flush
3016[clinic start generated code]*/
3017
Antoine Pitrou243757e2010-11-05 21:15:39 +00003018static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003019_io_TextIOWrapper_flush_impl(textio *self)
3020/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003021{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003022 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003023 CHECK_CLOSED(self);
3024 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003025 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003026 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003027 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003028}
3029
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003030/*[clinic input]
3031_io.TextIOWrapper.close
3032[clinic start generated code]*/
3033
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003034static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003035_io_TextIOWrapper_close_impl(textio *self)
3036/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037{
3038 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003039 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003040 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003041
Antoine Pitrou6be88762010-05-03 16:48:20 +00003042 res = textiowrapper_closed_get(self, NULL);
3043 if (res == NULL)
3044 return NULL;
3045 r = PyObject_IsTrue(res);
3046 Py_DECREF(res);
3047 if (r < 0)
3048 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003049
Antoine Pitrou6be88762010-05-03 16:48:20 +00003050 if (r > 0) {
3051 Py_RETURN_NONE; /* stream already closed */
3052 }
3053 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003054 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003055 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003056 res = _PyObject_CallMethodIdOneArg(self->buffer,
3057 &PyId__dealloc_warn,
3058 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003059 if (res)
3060 Py_DECREF(res);
3061 else
3062 PyErr_Clear();
3063 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003064 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003065 if (res == NULL)
3066 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003067 else
3068 Py_DECREF(res);
3069
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003070 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003071 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003072 _PyErr_ChainExceptions(exc, val, tb);
3073 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003074 }
3075 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003076 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003077}
3078
3079static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003080textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003081{
3082 PyObject *line;
3083
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003084 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003085
3086 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003087 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003088 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003089 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003090 }
3091 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003092 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003093 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003094 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003095 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003096 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003097 "not '%.200s'", Py_TYPE(line)->tp_name);
3098 Py_DECREF(line);
3099 return NULL;
3100 }
3101 }
3102
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003103 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003104 return NULL;
3105
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003106 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003107 /* Reached EOF or would have blocked */
3108 Py_DECREF(line);
3109 Py_CLEAR(self->snapshot);
3110 self->telling = self->seekable;
3111 return NULL;
3112 }
3113
3114 return line;
3115}
3116
3117static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003118textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003119{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003120 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003121 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003122}
3123
3124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003125textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003126{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003127 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003128 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3129}
3130
3131static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003132textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003133{
3134 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003135 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003136 if (self->decoder == NULL ||
3137 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3138 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003139 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003140 }
3141 return res;
3142}
3143
3144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003145textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003146{
3147 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003148 Py_INCREF(self->errors);
3149 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003150}
3151
3152static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003153textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003154{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003155 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003156 return PyLong_FromSsize_t(self->chunk_size);
3157}
3158
3159static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003160textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003161{
3162 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003163 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003164 if (arg == NULL) {
3165 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3166 return -1;
3167 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003168 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003169 if (n == -1 && PyErr_Occurred())
3170 return -1;
3171 if (n <= 0) {
3172 PyErr_SetString(PyExc_ValueError,
3173 "a strictly positive integer is required");
3174 return -1;
3175 }
3176 self->chunk_size = n;
3177 return 0;
3178}
3179
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003180#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003181
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003182static PyMethodDef incrementalnewlinedecoder_methods[] = {
3183 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3184 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3185 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3186 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3187 {NULL}
3188};
3189
3190static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3191 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3192 {NULL}
3193};
3194
3195PyTypeObject PyIncrementalNewlineDecoder_Type = {
3196 PyVarObject_HEAD_INIT(NULL, 0)
3197 "_io.IncrementalNewlineDecoder", /*tp_name*/
3198 sizeof(nldecoder_object), /*tp_basicsize*/
3199 0, /*tp_itemsize*/
3200 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003201 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003202 0, /*tp_getattr*/
3203 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003204 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003205 0, /*tp_repr*/
3206 0, /*tp_as_number*/
3207 0, /*tp_as_sequence*/
3208 0, /*tp_as_mapping*/
3209 0, /*tp_hash */
3210 0, /*tp_call*/
3211 0, /*tp_str*/
3212 0, /*tp_getattro*/
3213 0, /*tp_setattro*/
3214 0, /*tp_as_buffer*/
3215 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3216 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3217 0, /* tp_traverse */
3218 0, /* tp_clear */
3219 0, /* tp_richcompare */
3220 0, /*tp_weaklistoffset*/
3221 0, /* tp_iter */
3222 0, /* tp_iternext */
3223 incrementalnewlinedecoder_methods, /* tp_methods */
3224 0, /* tp_members */
3225 incrementalnewlinedecoder_getset, /* tp_getset */
3226 0, /* tp_base */
3227 0, /* tp_dict */
3228 0, /* tp_descr_get */
3229 0, /* tp_descr_set */
3230 0, /* tp_dictoffset */
3231 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3232 0, /* tp_alloc */
3233 PyType_GenericNew, /* tp_new */
3234};
3235
3236
3237static PyMethodDef textiowrapper_methods[] = {
3238 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003239 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003240 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3241 _IO_TEXTIOWRAPPER_READ_METHODDEF
3242 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3243 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3244 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3245
3246 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3247 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3248 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3249 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3250 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003251
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003252 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3253 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3254 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003255 {NULL, NULL}
3256};
3257
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003258static PyMemberDef textiowrapper_members[] = {
3259 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3260 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3261 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003262 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003263 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003264 {NULL}
3265};
3266
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003267static PyGetSetDef textiowrapper_getset[] = {
3268 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3269 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003270/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3271*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003272 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3273 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3274 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3275 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003276 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003277};
3278
3279PyTypeObject PyTextIOWrapper_Type = {
3280 PyVarObject_HEAD_INIT(NULL, 0)
3281 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003282 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003283 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003284 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003285 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003286 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003287 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003288 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003289 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003290 0, /*tp_as_number*/
3291 0, /*tp_as_sequence*/
3292 0, /*tp_as_mapping*/
3293 0, /*tp_hash */
3294 0, /*tp_call*/
3295 0, /*tp_str*/
3296 0, /*tp_getattro*/
3297 0, /*tp_setattro*/
3298 0, /*tp_as_buffer*/
3299 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003300 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003301 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003302 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3303 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003304 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003305 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003306 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003307 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3308 textiowrapper_methods, /* tp_methods */
3309 textiowrapper_members, /* tp_members */
3310 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003311 0, /* tp_base */
3312 0, /* tp_dict */
3313 0, /* tp_descr_get */
3314 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003315 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003316 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003317 0, /* tp_alloc */
3318 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003319 0, /* tp_free */
3320 0, /* tp_is_gc */
3321 0, /* tp_bases */
3322 0, /* tp_mro */
3323 0, /* tp_cache */
3324 0, /* tp_subclasses */
3325 0, /* tp_weaklist */
3326 0, /* tp_del */
3327 0, /* tp_version_tag */
3328 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003329};