blob: 03001ecb0a5b3bb9a6a5b5cb047fcd1af298bf56 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "pycore_interp.h" // PyInterpreterState.fs_codec
Victor Stinner37834132020-10-27 17:12:53 +010012#include "pycore_long.h" // _PyLong_GetZero()
Victor Stinner710e8262020-10-31 01:02:09 +010013#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
Victor Stinnerbcda8f12018-11-21 22:27:47 +010014#include "pycore_object.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020015#include "pycore_pystate.h" // _PyInterpreterState_GET()
16#include "structmember.h" // PyMemberDef
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017#include "_iomodule.h"
18
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030019/*[clinic input]
20module _io
21class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
23[clinic start generated code]*/
24/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
25
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(close);
27_Py_IDENTIFIER(_dealloc_warn);
28_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020029_Py_IDENTIFIER(fileno);
30_Py_IDENTIFIER(flush);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020032_Py_IDENTIFIER(mode);
33_Py_IDENTIFIER(name);
34_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020035_Py_IDENTIFIER(read);
36_Py_IDENTIFIER(readable);
37_Py_IDENTIFIER(replace);
38_Py_IDENTIFIER(reset);
39_Py_IDENTIFIER(seek);
40_Py_IDENTIFIER(seekable);
41_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090042_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020043_Py_IDENTIFIER(tell);
44_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020045
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046/* TextIOBase */
47
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000048PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049 "Base class for text I/O.\n"
50 "\n"
51 "This class provides a character and line based interface to stream\n"
52 "I/O. There is no readinto method because Python's character strings\n"
53 "are immutable. There is no public constructor.\n"
54 );
55
56static PyObject *
57_unsupported(const char *message)
58{
Antoine Pitrou712cb732013-12-21 15:51:54 +010059 _PyIO_State *state = IO_STATE();
60 if (state != NULL)
61 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000062 return NULL;
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000066 "Separate the underlying buffer from the TextIOBase and return it.\n"
67 "\n"
68 "After the underlying buffer has been detached, the TextIO is in an\n"
69 "unusable state.\n"
70 );
71
72static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053073textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000074{
75 return _unsupported("detach");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read at most n characters from stream.\n"
80 "\n"
81 "Read from underlying buffer until we have n characters or we hit EOF.\n"
82 "If n is negative or omitted, read until EOF.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("read");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Read until newline or EOF.\n"
93 "\n"
94 "Returns an empty string if EOF is hit immediately.\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("readline");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Write string to stream.\n"
105 "Returns the number of characters written (which is always equal to\n"
106 "the length of the string).\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 return _unsupported("write");
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Encoding of the text stream.\n"
117 "\n"
118 "Subclasses should override.\n"
119 );
120
121static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000122textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000123{
124 Py_RETURN_NONE;
125}
126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000127PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000128 "Line endings translated so far.\n"
129 "\n"
130 "Only line endings translated during reading are considered.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000142 "The error setting of the decoder or encoder.\n"
143 "\n"
144 "Subclasses should override.\n"
145 );
146
147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000148textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000149{
150 Py_RETURN_NONE;
151}
152
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000154static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530155 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
157 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
158 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000159 {NULL, NULL}
160};
161
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000162static PyGetSetDef textiobase_getset[] = {
163 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
164 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
165 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000166 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000167};
168
169PyTypeObject PyTextIOBase_Type = {
170 PyVarObject_HEAD_INIT(NULL, 0)
171 "_io._TextIOBase", /*tp_name*/
172 0, /*tp_basicsize*/
173 0, /*tp_itemsize*/
174 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200175 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000176 0, /*tp_getattr*/
177 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200178 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /*tp_repr*/
180 0, /*tp_as_number*/
181 0, /*tp_as_sequence*/
182 0, /*tp_as_mapping*/
183 0, /*tp_hash */
184 0, /*tp_call*/
185 0, /*tp_str*/
186 0, /*tp_getattro*/
187 0, /*tp_setattro*/
188 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200189 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000190 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000191 0, /* tp_traverse */
192 0, /* tp_clear */
193 0, /* tp_richcompare */
194 0, /* tp_weaklistoffset */
195 0, /* tp_iter */
196 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 &PyIOBase_Type, /* tp_base */
201 0, /* tp_dict */
202 0, /* tp_descr_get */
203 0, /* tp_descr_set */
204 0, /* tp_dictoffset */
205 0, /* tp_init */
206 0, /* tp_alloc */
207 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200208 0, /* tp_free */
209 0, /* tp_is_gc */
210 0, /* tp_bases */
211 0, /* tp_mro */
212 0, /* tp_cache */
213 0, /* tp_subclasses */
214 0, /* tp_weaklist */
215 0, /* tp_del */
216 0, /* tp_version_tag */
217 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218};
219
220
221/* IncrementalNewlineDecoder */
222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300232/*[clinic input]
233_io.IncrementalNewlineDecoder.__init__
234 decoder: object
235 translate: int
236 errors: object(c_default="NULL") = "strict"
237
238Codec used when reading a file in universal newlines mode.
239
240It wraps another incremental decoder, translating \r\n and \r into \n.
241It also records the types of newlines encountered. When used with
242translate=False, it ensures that the newline sequence is returned in
243one piece. When used with decoder=None, it expects unicode strings as
244decode input and translates newlines without first invoking an external
245decoder.
246[clinic start generated code]*/
247
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000248static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300249_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
250 PyObject *decoder, int translate,
251 PyObject *errors)
252/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254 self->decoder = decoder;
255 Py_INCREF(decoder);
256
257 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900258 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000259 if (self->errors == NULL)
260 return -1;
261 }
262 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263 self->errors = errors;
264 }
INADA Naoki507434f2017-12-21 09:59:53 +0900265 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266
Xiang Zhangb08746b2018-10-31 19:49:16 +0800267 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 self->seennl = 0;
269 self->pendingcr = 0;
270
271 return 0;
272}
273
274static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000275incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000276{
277 Py_CLEAR(self->decoder);
278 Py_CLEAR(self->errors);
279 Py_TYPE(self)->tp_free((PyObject *)self);
280}
281
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200282static int
283check_decoded(PyObject *decoded)
284{
285 if (decoded == NULL)
286 return -1;
287 if (!PyUnicode_Check(decoded)) {
288 PyErr_Format(PyExc_TypeError,
289 "decoder should return a string result, not '%.200s'",
290 Py_TYPE(decoded)->tp_name);
291 Py_DECREF(decoded);
292 return -1;
293 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200294 if (PyUnicode_READY(decoded) < 0) {
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200298 return 0;
299}
300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000301#define SEEN_CR 1
302#define SEEN_LF 2
303#define SEEN_CRLF 4
304#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
305
306PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200307_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000308 PyObject *input, int final)
309{
310 PyObject *output;
311 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200312 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313
314 if (self->decoder == NULL) {
315 PyErr_SetString(PyExc_ValueError,
316 "IncrementalNewlineDecoder.__init__ not called");
317 return NULL;
318 }
319
320 /* decode input (with the eventual \r from a previous pass) */
321 if (self->decoder != Py_None) {
322 output = PyObject_CallMethodObjArgs(self->decoder,
323 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
324 }
325 else {
326 output = input;
327 Py_INCREF(output);
328 }
329
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200330 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 return NULL;
332
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 /* Prefix output with CR */
336 int kind;
337 PyObject *modified;
338 char *out;
339
340 modified = PyUnicode_New(output_len + 1,
341 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 if (modified == NULL)
343 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 kind = PyUnicode_KIND(modified);
345 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300346 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200347 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200349 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350 self->pendingcr = 0;
351 output_len++;
352 }
353
354 /* retain last \r even when not translating data:
355 * then readline() is sure to get \r\n in one pass
356 */
357 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000358 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
360 {
361 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
362 if (modified == NULL)
363 goto error;
364 Py_DECREF(output);
365 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000366 self->pendingcr = 1;
367 }
368 }
369
370 /* Record which newlines are read and do newline translation if desired,
371 all in one pass. */
372 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300373 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 Py_ssize_t len;
375 int seennl = self->seennl;
376 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 in_str = PyUnicode_DATA(output);
380 len = PyUnicode_GET_LENGTH(output);
381 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
383 if (len == 0)
384 return output;
385
386 /* If, up to now, newlines are consistently \n, do a quick check
387 for the \r *byte* with the libc's optimized memchr.
388 */
389 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200390 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 }
392
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 if (only_lf) {
394 /* If not already seen, quick scan for a possible "\n" character.
395 (there's nothing else to be done, even when in translation mode)
396 */
397 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200398 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100399 if (kind == PyUnicode_1BYTE_KIND)
400 seennl |= SEEN_LF;
401 else {
402 Py_ssize_t i = 0;
403 for (;;) {
404 Py_UCS4 c;
405 /* Fast loop for non-control characters */
406 while (PyUnicode_READ(kind, in_str, i) > '\n')
407 i++;
408 c = PyUnicode_READ(kind, in_str, i++);
409 if (c == '\n') {
410 seennl |= SEEN_LF;
411 break;
412 }
413 if (i >= len)
414 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000416 }
417 }
418 /* Finished: we have scanned for newlines, and none of them
419 need translating */
420 }
421 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200422 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000423 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 if (seennl == SEEN_ALL)
425 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 while (PyUnicode_READ(kind, in_str, i) > '\r')
430 i++;
431 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 if (c == '\n')
433 seennl |= SEEN_LF;
434 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 }
439 else
440 seennl |= SEEN_CR;
441 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 break;
444 if (seennl == SEEN_ALL)
445 break;
446 }
447 endscan:
448 ;
449 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000450 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 void *translated;
452 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300453 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 Py_ssize_t in, out;
455 /* XXX: Previous in-place translation here is disabled as
456 resizing is not possible anymore */
457 /* We could try to optimize this so that we only do a copy
458 when there is something to translate. On the other hand,
459 we already know there is a \r byte, so chances are high
460 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200461 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (translated == NULL) {
463 PyErr_NoMemory();
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 seennl |= SEEN_LF;
475 continue;
476 }
477 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 in++;
480 seennl |= SEEN_CRLF;
481 }
482 else
483 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 continue;
486 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 Py_DECREF(output);
492 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100493 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200494 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200495 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000496 }
497 self->seennl |= seennl;
498 }
499
500 return output;
501
502 error:
503 Py_DECREF(output);
504 return NULL;
505}
506
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300507/*[clinic input]
508_io.IncrementalNewlineDecoder.decode
509 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200510 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300513static PyObject *
514_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
515 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200516/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000518 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
519}
520
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300521/*[clinic input]
522_io.IncrementalNewlineDecoder.getstate
523[clinic start generated code]*/
524
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300526_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
527/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528{
529 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700530 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000531
532 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100533 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200534 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535 if (state == NULL)
536 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300537 if (!PyTuple_Check(state)) {
538 PyErr_SetString(PyExc_TypeError,
539 "illegal decoder state");
540 Py_DECREF(state);
541 return NULL;
542 }
543 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
544 &buffer, &flag))
545 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546 Py_DECREF(state);
547 return NULL;
548 }
549 Py_INCREF(buffer);
550 Py_DECREF(state);
551 }
552 else {
553 buffer = PyBytes_FromString("");
554 flag = 0;
555 }
556 flag <<= 1;
557 if (self->pendingcr)
558 flag |= 1;
559 return Py_BuildValue("NK", buffer, flag);
560}
561
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300562/*[clinic input]
563_io.IncrementalNewlineDecoder.setstate
564 state: object
565 /
566[clinic start generated code]*/
567
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300569_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
570 PyObject *state)
571/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700574 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 if (!PyTuple_Check(state)) {
577 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300579 }
580 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
581 &buffer, &flag))
582 {
583 return NULL;
584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585
Victor Stinner7d7e7752014-06-17 23:31:25 +0200586 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 flag >>= 1;
588
589 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200590 return _PyObject_CallMethodId(self->decoder,
591 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 else
593 Py_RETURN_NONE;
594}
595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300596/*[clinic input]
597_io.IncrementalNewlineDecoder.reset
598[clinic start generated code]*/
599
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300601_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 self->seennl = 0;
605 self->pendingcr = 0;
606 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100607 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 else
609 Py_RETURN_NONE;
610}
611
612static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614{
615 switch (self->seennl) {
616 case SEEN_CR:
617 return PyUnicode_FromString("\r");
618 case SEEN_LF:
619 return PyUnicode_FromString("\n");
620 case SEEN_CRLF:
621 return PyUnicode_FromString("\r\n");
622 case SEEN_CR | SEEN_LF:
623 return Py_BuildValue("ss", "\r", "\n");
624 case SEEN_CR | SEEN_CRLF:
625 return Py_BuildValue("ss", "\r", "\r\n");
626 case SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("ss", "\n", "\r\n");
628 case SEEN_CR | SEEN_LF | SEEN_CRLF:
629 return Py_BuildValue("sss", "\r", "\n", "\r\n");
630 default:
631 Py_RETURN_NONE;
632 }
633
634}
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636/* TextIOWrapper */
637
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000638typedef PyObject *
639 (*encodefunc_t)(PyObject *, PyObject *);
640
641typedef struct
642{
643 PyObject_HEAD
644 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000645 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 Py_ssize_t chunk_size;
647 PyObject *buffer;
648 PyObject *encoding;
649 PyObject *encoder;
650 PyObject *decoder;
651 PyObject *readnl;
652 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900653 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200655 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200660 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200662 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 /* Specialized encoding func (see below) */
664 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000665 /* Whether or not it's the start of the stream */
666 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667
668 /* Reads and writes are internally buffered in order to speed things up.
669 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 Please also note that text to be written is first encoded before being
672 buffered. This is necessary so that encoding errors are immediately
673 reported to the caller, but it unfortunately means that the
674 IncrementalEncoder (whose encode() method is always written in Python)
675 becomes a bottleneck for small writes.
676 */
677 PyObject *decoded_chars; /* buffer for text returned from decoder */
678 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900679 PyObject *pending_bytes; // data waiting to be written.
680 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000682
Oren Milman13614e32017-08-24 19:51:24 +0300683 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 * dec_flags is the second (integer) item of the decoder state and
685 * next_input is the chunk of input bytes that comes next after the
686 * snapshot point. We use this to reconstruct decoder states in tell().
687 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000688 PyObject *snapshot;
689 /* Bytes-to-characters ratio for the current chunk. Serves as input for
690 the heuristic in tell(). */
691 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692
693 /* Cache raw object if it's a FileIO object */
694 PyObject *raw;
695
696 PyObject *weakreflist;
697 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000698} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
Zackery Spytz23db9352018-06-29 04:14:58 -0600700static void
701textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703/* A couple of specialized cases in order to bypass the slow incremental
704 encoding methods for the most popular encodings. */
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
INADA Naoki507434f2017-12-21 09:59:53 +0900709 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100715 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900716 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717}
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100722 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900723 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724}
725
726static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000727utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728{
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 if (!self->encoding_start_of_stream) {
730 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200731#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000736 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900738 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
Antoine Pitroue4501852009-05-14 18:55:55 +0000741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900745 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900752 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000757{
758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200760#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf32be_encode(self, text);
762#else
763 return utf32le_encode(self, text);
764#endif
765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900767 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
INADA Naoki507434f2017-12-21 09:59:53 +0900773 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000777latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778{
INADA Naoki507434f2017-12-21 09:59:53 +0900779 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780}
781
Inada Naokibfba8c32019-05-16 15:03:20 +0900782// Return true when encoding can be skipped when text is ascii.
783static inline int
784is_asciicompat_encoding(encodefunc_t f)
785{
786 return f == (encodefunc_t) ascii_encode
787 || f == (encodefunc_t) latin1_encode
788 || f == (encodefunc_t) utf8_encode;
789}
790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791/* Map normalized encoding names onto the specialized encoding funcs */
792
793typedef struct {
794 const char *name;
795 encodefunc_t encodefunc;
796} encodefuncentry;
797
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200798static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799 {"ascii", (encodefunc_t) ascii_encode},
800 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000801 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"utf-16-be", (encodefunc_t) utf16be_encode},
803 {"utf-16-le", (encodefunc_t) utf16le_encode},
804 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-32-be", (encodefunc_t) utf32be_encode},
806 {"utf-32-le", (encodefunc_t) utf32le_encode},
807 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 {NULL, NULL}
809};
810
INADA Naoki507434f2017-12-21 09:59:53 +0900811static int
812validate_newline(const char *newline)
813{
814 if (newline && newline[0] != '\0'
815 && !(newline[0] == '\n' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\0')
817 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
818 PyErr_Format(PyExc_ValueError,
819 "illegal newline value: %s", newline);
820 return -1;
821 }
822 return 0;
823}
824
825static int
826set_newline(textio *self, const char *newline)
827{
828 PyObject *old = self->readnl;
829 if (newline == NULL) {
830 self->readnl = NULL;
831 }
832 else {
833 self->readnl = PyUnicode_FromString(newline);
834 if (self->readnl == NULL) {
835 self->readnl = old;
836 return -1;
837 }
838 }
839 self->readuniversal = (newline == NULL || newline[0] == '\0');
840 self->readtranslate = (newline == NULL);
841 self->writetranslate = (newline == NULL || newline[0] != '\0');
842 if (!self->readuniversal && self->readnl != NULL) {
843 // validate_newline() accepts only ASCII newlines.
844 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
845 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
846 if (strcmp(self->writenl, "\n") == 0) {
847 self->writenl = NULL;
848 }
849 }
850 else {
851#ifdef MS_WINDOWS
852 self->writenl = "\r\n";
853#else
854 self->writenl = NULL;
855#endif
856 }
857 Py_XDECREF(old);
858 return 0;
859}
860
861static int
862_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
863 const char *errors)
864{
865 PyObject *res;
866 int r;
867
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200868 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900869 if (res == NULL)
870 return -1;
871
872 r = PyObject_IsTrue(res);
873 Py_DECREF(res);
874 if (r == -1)
875 return -1;
876
877 if (r != 1)
878 return 0;
879
880 Py_CLEAR(self->decoder);
881 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
882 if (self->decoder == NULL)
883 return -1;
884
885 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300886 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900887 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300888 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900889 if (incrementalDecoder == NULL)
890 return -1;
891 Py_CLEAR(self->decoder);
892 self->decoder = incrementalDecoder;
893 }
894
895 return 0;
896}
897
898static PyObject*
899_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
900{
901 PyObject *chars;
902
Andy Lesterdffe4c02020-03-04 07:15:20 -0600903 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900904 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
905 else
906 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
907 eof ? Py_True : Py_False, NULL);
908
909 if (check_decoded(chars) < 0)
910 // check_decoded already decreases refcount
911 return NULL;
912
913 return chars;
914}
915
916static int
917_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
918 const char *errors)
919{
920 PyObject *res;
921 int r;
922
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200923 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900924 if (res == NULL)
925 return -1;
926
927 r = PyObject_IsTrue(res);
928 Py_DECREF(res);
929 if (r == -1)
930 return -1;
931
932 if (r != 1)
933 return 0;
934
935 Py_CLEAR(self->encoder);
936 self->encodefunc = NULL;
937 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
938 if (self->encoder == NULL)
939 return -1;
940
941 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200942 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
943 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900944 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200945 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900946 const encodefuncentry *e = encodefuncs;
947 while (e->name != NULL) {
948 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
949 self->encodefunc = e->encodefunc;
950 break;
951 }
952 e++;
953 }
954 }
955 Py_XDECREF(res);
956
957 return 0;
958}
959
960static int
961_textiowrapper_fix_encoder_state(textio *self)
962{
963 if (!self->seekable || !self->encoder) {
964 return 0;
965 }
966
967 self->encoding_start_of_stream = 1;
968
Petr Viktorinffd97532020-02-11 17:46:57 +0100969 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200970 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900971 if (cookieObj == NULL) {
972 return -1;
973 }
974
Victor Stinner37834132020-10-27 17:12:53 +0100975 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
INADA Naoki507434f2017-12-21 09:59:53 +0900976 Py_DECREF(cookieObj);
977 if (cmp < 0) {
978 return -1;
979 }
980
981 if (cmp == 0) {
982 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100983 PyObject *res = PyObject_CallMethodOneArg(
Victor Stinner37834132020-10-27 17:12:53 +0100984 self->encoder, _PyIO_str_setstate, _PyLong_GetZero());
INADA Naoki507434f2017-12-21 09:59:53 +0900985 if (res == NULL) {
986 return -1;
987 }
988 Py_DECREF(res);
989 }
990
991 return 0;
992}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000993
Victor Stinner22eb6892019-06-26 00:51:05 +0200994static int
995io_check_errors(PyObject *errors)
996{
997 assert(errors != NULL && errors != Py_None);
998
Victor Stinner81a7be32020-04-14 15:14:01 +0200999 PyInterpreterState *interp = _PyInterpreterState_GET();
Victor Stinner22eb6892019-06-26 00:51:05 +02001000#ifndef Py_DEBUG
1001 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001002 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001003 return 0;
1004 }
1005#else
1006 /* Always check in debug mode */
1007#endif
1008
1009 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1010 before_PyUnicode_InitEncodings() is called. */
Victor Stinner3d17c042020-05-14 01:48:38 +02001011 if (!interp->unicode.fs_codec.encoding) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001012 return 0;
1013 }
1014
1015 Py_ssize_t name_length;
1016 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1017 if (name == NULL) {
1018 return -1;
1019 }
1020 if (strlen(name) != (size_t)name_length) {
1021 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1022 return -1;
1023 }
1024 PyObject *handler = PyCodec_LookupError(name);
1025 if (handler != NULL) {
1026 Py_DECREF(handler);
1027 return 0;
1028 }
1029 return -1;
1030}
1031
1032
1033
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001034/*[clinic input]
1035_io.TextIOWrapper.__init__
1036 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001037 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001038 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001039 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001040 line_buffering: bool(accept={int}) = False
1041 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001043Character and line based layer over a BufferedIOBase object, buffer.
1044
1045encoding gives the name of the encoding that the stream will be
1046decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1047
1048errors determines the strictness of encoding and decoding (see
1049help(codecs.Codec) or the documentation for codecs.register) and
1050defaults to "strict".
1051
1052newline controls how line endings are handled. It can be None, '',
1053'\n', '\r', and '\r\n'. It works as follows:
1054
1055* On input, if newline is None, universal newlines mode is
1056 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1057 these are translated into '\n' before being returned to the
1058 caller. If it is '', universal newline mode is enabled, but line
1059 endings are returned to the caller untranslated. If it has any of
1060 the other legal values, input lines are only terminated by the given
1061 string, and the line ending is returned to the caller untranslated.
1062
1063* On output, if newline is None, any '\n' characters written are
1064 translated to the system default line separator, os.linesep. If
1065 newline is '' or '\n', no translation takes place. If newline is any
1066 of the other legal values, any '\n' characters written are translated
1067 to the given string.
1068
1069If line_buffering is True, a call to flush is implied when a call to
1070write contains a newline character.
1071[clinic start generated code]*/
1072
1073static int
1074_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001075 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001076 const char *newline, int line_buffering,
1077 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001078/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001079{
1080 PyObject *raw, *codec_info = NULL;
1081 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 PyObject *res;
1083 int r;
1084
1085 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001086 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087
INADA Naoki507434f2017-12-21 09:59:53 +09001088 if (errors == Py_None) {
1089 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001090 if (errors == NULL) {
1091 return -1;
1092 }
INADA Naoki507434f2017-12-21 09:59:53 +09001093 }
1094 else if (!PyUnicode_Check(errors)) {
1095 // Check 'errors' argument here because Argument Clinic doesn't support
1096 // 'str(accept={str, NoneType})' converter.
1097 PyErr_Format(
1098 PyExc_TypeError,
1099 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001100 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001101 return -1;
1102 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001103 else if (io_check_errors(errors)) {
1104 return -1;
1105 }
INADA Naoki507434f2017-12-21 09:59:53 +09001106
1107 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 return -1;
1109 }
1110
1111 Py_CLEAR(self->buffer);
1112 Py_CLEAR(self->encoding);
1113 Py_CLEAR(self->encoder);
1114 Py_CLEAR(self->decoder);
1115 Py_CLEAR(self->readnl);
1116 Py_CLEAR(self->decoded_chars);
1117 Py_CLEAR(self->pending_bytes);
1118 Py_CLEAR(self->snapshot);
1119 Py_CLEAR(self->errors);
1120 Py_CLEAR(self->raw);
1121 self->decoded_chars_used = 0;
1122 self->pending_bytes_count = 0;
1123 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001124 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125
1126 if (encoding == NULL) {
1127 /* Try os.device_encoding(fileno) */
1128 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001129 state = IO_STATE();
1130 if (state == NULL)
1131 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001132 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133 /* Ignore only AttributeError and UnsupportedOperation */
1134 if (fileno == NULL) {
1135 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1136 PyErr_ExceptionMatches(state->unsupported_operation)) {
1137 PyErr_Clear();
1138 }
1139 else {
1140 goto error;
1141 }
1142 }
1143 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001144 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001145 Py_DECREF(fileno);
1146 if (fd == -1 && PyErr_Occurred()) {
1147 goto error;
1148 }
1149
1150 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151 if (self->encoding == NULL)
1152 goto error;
1153 else if (!PyUnicode_Check(self->encoding))
1154 Py_CLEAR(self->encoding);
1155 }
1156 }
1157 if (encoding == NULL && self->encoding == NULL) {
Victor Stinner82458b62020-11-01 20:59:35 +01001158 self->encoding = _Py_GetLocaleEncodingObject();
Antoine Pitrou932ff832013-08-01 21:04:50 +02001159 if (self->encoding == NULL) {
Victor Stinner710e8262020-10-31 01:02:09 +01001160 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161 }
Victor Stinner710e8262020-10-31 01:02:09 +01001162 assert(PyUnicode_Check(self->encoding));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001164 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001165 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001166 if (encoding == NULL)
1167 goto error;
1168 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 else if (encoding != NULL) {
1170 self->encoding = PyUnicode_FromString(encoding);
1171 if (self->encoding == NULL)
1172 goto error;
1173 }
1174 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001175 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001177 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 }
1179
Nick Coghlana9b15242014-02-04 22:11:18 +10001180 /* Check we have been asked for a real text encoding */
1181 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1182 if (codec_info == NULL) {
1183 Py_CLEAR(self->encoding);
1184 goto error;
1185 }
1186
1187 /* XXX: Failures beyond this point have the potential to leak elements
1188 * of the partially constructed object (like self->encoding)
1189 */
1190
INADA Naoki507434f2017-12-21 09:59:53 +09001191 Py_INCREF(errors);
1192 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001195 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001196 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 }
1199
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 self->buffer = buffer;
1201 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001202
INADA Naoki507434f2017-12-21 09:59:53 +09001203 /* Build the decoder object */
1204 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1205 goto error;
1206
1207 /* Build the encoder object */
1208 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1209 goto error;
1210
1211 /* Finished sorting out the codec details */
1212 Py_CLEAR(codec_info);
1213
Andy Lesterdffe4c02020-03-04 07:15:20 -06001214 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1215 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1216 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001217 {
1218 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1219 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001220 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001221 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001222 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001223 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001224 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001225 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001226 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227 }
1228
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001229 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230 if (res == NULL)
1231 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001232 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001234 if (r < 0)
1235 goto error;
1236 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001238 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1239 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001240 goto error;
1241 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001242 Py_XDECREF(res);
1243 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001244
Antoine Pitroue4501852009-05-14 18:55:55 +00001245 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001246 if (_textiowrapper_fix_encoder_state(self) < 0) {
1247 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001248 }
1249
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250 self->ok = 1;
1251 return 0;
1252
1253 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001254 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 return -1;
1256}
1257
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001258/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1259 * -1 on error.
1260 */
1261static int
1262convert_optional_bool(PyObject *obj, int default_value)
1263{
1264 long v;
1265 if (obj == Py_None) {
1266 v = default_value;
1267 }
1268 else {
1269 v = PyLong_AsLong(obj);
1270 if (v == -1 && PyErr_Occurred())
1271 return -1;
1272 }
1273 return v != 0;
1274}
1275
INADA Naoki507434f2017-12-21 09:59:53 +09001276static int
1277textiowrapper_change_encoding(textio *self, PyObject *encoding,
1278 PyObject *errors, int newline_changed)
1279{
1280 /* Use existing settings where new settings are not specified */
1281 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1282 return 0; // no change
1283 }
1284
1285 if (encoding == Py_None) {
1286 encoding = self->encoding;
1287 if (errors == Py_None) {
1288 errors = self->errors;
1289 }
1290 }
1291 else if (errors == Py_None) {
1292 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001293 if (errors == NULL) {
1294 return -1;
1295 }
INADA Naoki507434f2017-12-21 09:59:53 +09001296 }
1297
1298 const char *c_errors = PyUnicode_AsUTF8(errors);
1299 if (c_errors == NULL) {
1300 return -1;
1301 }
1302
1303 // Create new encoder & decoder
1304 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1305 PyUnicode_AsUTF8(encoding), "codecs.open()");
1306 if (codec_info == NULL) {
1307 return -1;
1308 }
1309 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1310 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1311 Py_DECREF(codec_info);
1312 return -1;
1313 }
1314 Py_DECREF(codec_info);
1315
1316 Py_INCREF(encoding);
1317 Py_INCREF(errors);
1318 Py_SETREF(self->encoding, encoding);
1319 Py_SETREF(self->errors, errors);
1320
1321 return _textiowrapper_fix_encoder_state(self);
1322}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001323
1324/*[clinic input]
1325_io.TextIOWrapper.reconfigure
1326 *
INADA Naoki507434f2017-12-21 09:59:53 +09001327 encoding: object = None
1328 errors: object = None
1329 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001330 line_buffering as line_buffering_obj: object = None
1331 write_through as write_through_obj: object = None
1332
1333Reconfigure the text stream with new parameters.
1334
1335This also does an implicit stream flush.
1336
1337[clinic start generated code]*/
1338
1339static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001340_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1341 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001342 PyObject *line_buffering_obj,
1343 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001344/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001345{
1346 int line_buffering;
1347 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001348 const char *newline = NULL;
1349
1350 /* Check if something is in the read buffer */
1351 if (self->decoded_chars != NULL) {
1352 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001353 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001354 "of stream after the first read");
1355 return NULL;
1356 }
1357 }
1358
1359 if (newline_obj != NULL && newline_obj != Py_None) {
1360 newline = PyUnicode_AsUTF8(newline_obj);
1361 if (newline == NULL || validate_newline(newline) < 0) {
1362 return NULL;
1363 }
1364 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001365
1366 line_buffering = convert_optional_bool(line_buffering_obj,
1367 self->line_buffering);
1368 write_through = convert_optional_bool(write_through_obj,
1369 self->write_through);
1370 if (line_buffering < 0 || write_through < 0) {
1371 return NULL;
1372 }
INADA Naoki507434f2017-12-21 09:59:53 +09001373
Petr Viktorinffd97532020-02-11 17:46:57 +01001374 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001375 if (res == NULL) {
1376 return NULL;
1377 }
INADA Naoki507434f2017-12-21 09:59:53 +09001378 Py_DECREF(res);
1379 self->b2cratio = 0;
1380
1381 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1382 return NULL;
1383 }
1384
1385 if (textiowrapper_change_encoding(
1386 self, encoding, errors, newline_obj != NULL) < 0) {
1387 return NULL;
1388 }
1389
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001390 self->line_buffering = line_buffering;
1391 self->write_through = write_through;
1392 Py_RETURN_NONE;
1393}
1394
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001396textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001397{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 self->ok = 0;
1399 Py_CLEAR(self->buffer);
1400 Py_CLEAR(self->encoding);
1401 Py_CLEAR(self->encoder);
1402 Py_CLEAR(self->decoder);
1403 Py_CLEAR(self->readnl);
1404 Py_CLEAR(self->decoded_chars);
1405 Py_CLEAR(self->pending_bytes);
1406 Py_CLEAR(self->snapshot);
1407 Py_CLEAR(self->errors);
1408 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001409
1410 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 return 0;
1412}
1413
1414static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001415textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001417 self->finalizing = 1;
1418 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001420 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001421 _PyObject_GC_UNTRACK(self);
1422 if (self->weakreflist != NULL)
1423 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001424 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425 Py_TYPE(self)->tp_free((PyObject *)self);
1426}
1427
1428static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001429textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001430{
1431 Py_VISIT(self->buffer);
1432 Py_VISIT(self->encoding);
1433 Py_VISIT(self->encoder);
1434 Py_VISIT(self->decoder);
1435 Py_VISIT(self->readnl);
1436 Py_VISIT(self->decoded_chars);
1437 Py_VISIT(self->pending_bytes);
1438 Py_VISIT(self->snapshot);
1439 Py_VISIT(self->errors);
1440 Py_VISIT(self->raw);
1441
1442 Py_VISIT(self->dict);
1443 return 0;
1444}
1445
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001447textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448
1449/* This macro takes some shortcuts to make the common case faster. */
1450#define CHECK_CLOSED(self) \
1451 do { \
1452 int r; \
1453 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001454 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 if (self->raw != NULL) \
1456 r = _PyFileIO_closed(self->raw); \
1457 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001458 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001459 if (_res == NULL) \
1460 return NULL; \
1461 r = PyObject_IsTrue(_res); \
1462 Py_DECREF(_res); \
1463 if (r < 0) \
1464 return NULL; \
1465 } \
1466 if (r > 0) { \
1467 PyErr_SetString(PyExc_ValueError, \
1468 "I/O operation on closed file."); \
1469 return NULL; \
1470 } \
1471 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001472 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 return NULL; \
1474 } while (0)
1475
1476#define CHECK_INITIALIZED(self) \
1477 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001478 PyErr_SetString(PyExc_ValueError, \
1479 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480 return NULL; \
1481 }
1482
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001483#define CHECK_ATTACHED(self) \
1484 CHECK_INITIALIZED(self); \
1485 if (self->detached) { \
1486 PyErr_SetString(PyExc_ValueError, \
1487 "underlying buffer has been detached"); \
1488 return NULL; \
1489 }
1490
1491#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001493 PyErr_SetString(PyExc_ValueError, \
1494 "I/O operation on uninitialized object"); \
1495 return -1; \
1496 } else if (self->detached) { \
1497 PyErr_SetString(PyExc_ValueError, \
1498 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001499 return -1; \
1500 }
1501
1502
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001503/*[clinic input]
1504_io.TextIOWrapper.detach
1505[clinic start generated code]*/
1506
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001507static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001508_io_TextIOWrapper_detach_impl(textio *self)
1509/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001510{
1511 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001512 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001513 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001514 if (res == NULL)
1515 return NULL;
1516 Py_DECREF(res);
1517 buffer = self->buffer;
1518 self->buffer = NULL;
1519 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001520 return buffer;
1521}
1522
Antoine Pitrou24f36292009-03-28 22:16:42 +00001523/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524 underlying buffered object, though. */
1525static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001526_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001528 if (self->pending_bytes == NULL)
1529 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001530
Inada Naokibfba8c32019-05-16 15:03:20 +09001531 PyObject *pending = self->pending_bytes;
1532 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001533
Inada Naokibfba8c32019-05-16 15:03:20 +09001534 if (PyBytes_Check(pending)) {
1535 b = pending;
1536 Py_INCREF(b);
1537 }
1538 else if (PyUnicode_Check(pending)) {
1539 assert(PyUnicode_IS_ASCII(pending));
1540 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1541 b = PyBytes_FromStringAndSize(
1542 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1543 if (b == NULL) {
1544 return -1;
1545 }
1546 }
1547 else {
1548 assert(PyList_Check(pending));
1549 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1550 if (b == NULL) {
1551 return -1;
1552 }
1553
1554 char *buf = PyBytes_AsString(b);
1555 Py_ssize_t pos = 0;
1556
1557 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1558 PyObject *obj = PyList_GET_ITEM(pending, i);
1559 char *src;
1560 Py_ssize_t len;
1561 if (PyUnicode_Check(obj)) {
1562 assert(PyUnicode_IS_ASCII(obj));
1563 src = PyUnicode_DATA(obj);
1564 len = PyUnicode_GET_LENGTH(obj);
1565 }
1566 else {
1567 assert(PyBytes_Check(obj));
1568 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1569 Py_DECREF(b);
1570 return -1;
1571 }
1572 }
1573 memcpy(buf + pos, src, len);
1574 pos += len;
1575 }
1576 assert(pos == self->pending_bytes_count);
1577 }
1578
1579 self->pending_bytes_count = 0;
1580 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001581 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001582
1583 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001584 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001585 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001586 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 Py_DECREF(b);
Inada Naoki01806d52021-02-22 08:29:30 +09001588 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1589 // when an error occurred.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001590 if (ret == NULL)
1591 return -1;
1592 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 return 0;
1594}
1595
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001596/*[clinic input]
1597_io.TextIOWrapper.write
1598 text: unicode
1599 /
1600[clinic start generated code]*/
1601
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001603_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1604/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001605{
1606 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001607 PyObject *b;
1608 Py_ssize_t textlen;
1609 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001610 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001611
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001612 if (PyUnicode_READY(text) == -1)
1613 return NULL;
1614
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001615 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 CHECK_CLOSED(self);
1617
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001618 if (self->encoder == NULL)
1619 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001620
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621 Py_INCREF(text);
1622
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001623 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624
1625 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001626 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 haslf = 1;
1628
1629 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001630 PyObject *newtext = _PyObject_CallMethodId(
1631 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 Py_DECREF(text);
1633 if (newtext == NULL)
1634 return NULL;
1635 text = newtext;
1636 }
1637
Antoine Pitroue96ec682011-07-23 21:46:35 +02001638 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001639 text_needflush = 1;
1640 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 needflush = 1;
1644
1645 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001646 if (self->encodefunc != NULL) {
Inada Naoki01806d52021-02-22 08:29:30 +09001647 if (PyUnicode_IS_ASCII(text) &&
1648 // See bpo-43260
1649 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1650 is_asciicompat_encoding(self->encodefunc)) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001651 b = text;
1652 Py_INCREF(b);
1653 }
1654 else {
1655 b = (*self->encodefunc)((PyObject *) self, text);
1656 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001657 self->encoding_start_of_stream = 0;
1658 }
Inada Naoki01806d52021-02-22 08:29:30 +09001659 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01001660 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naoki01806d52021-02-22 08:29:30 +09001661 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001662
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 Py_DECREF(text);
1664 if (b == NULL)
1665 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001666 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001667 PyErr_Format(PyExc_TypeError,
1668 "encoder should return a bytes object, not '%.200s'",
1669 Py_TYPE(b)->tp_name);
1670 Py_DECREF(b);
1671 return NULL;
1672 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673
Inada Naokibfba8c32019-05-16 15:03:20 +09001674 Py_ssize_t bytes_len;
1675 if (b == text) {
1676 bytes_len = PyUnicode_GET_LENGTH(b);
1677 }
1678 else {
1679 bytes_len = PyBytes_GET_SIZE(b);
1680 }
1681
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001683 self->pending_bytes_count = 0;
1684 self->pending_bytes = b;
1685 }
Inada Naoki01806d52021-02-22 08:29:30 +09001686 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1687 // Prevent to concatenate more than chunk_size data.
1688 if (_textiowrapper_writeflush(self) < 0) {
1689 Py_DECREF(b);
1690 return NULL;
1691 }
1692 self->pending_bytes = b;
1693 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001694 else if (!PyList_CheckExact(self->pending_bytes)) {
1695 PyObject *list = PyList_New(2);
1696 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 Py_DECREF(b);
1698 return NULL;
1699 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001700 PyList_SET_ITEM(list, 0, self->pending_bytes);
1701 PyList_SET_ITEM(list, 1, b);
1702 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001704 else {
1705 if (PyList_Append(self->pending_bytes, b) < 0) {
1706 Py_DECREF(b);
1707 return NULL;
1708 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001711
1712 self->pending_bytes_count += bytes_len;
Inada Naoki01806d52021-02-22 08:29:30 +09001713 if (self->pending_bytes_count >= self->chunk_size || needflush ||
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001714 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001715 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 return NULL;
1717 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001718
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001720 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (ret == NULL)
1722 return NULL;
1723 Py_DECREF(ret);
1724 }
1725
Zackery Spytz23db9352018-06-29 04:14:58 -06001726 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727 Py_CLEAR(self->snapshot);
1728
1729 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001730 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (ret == NULL)
1732 return NULL;
1733 Py_DECREF(ret);
1734 }
1735
1736 return PyLong_FromSsize_t(textlen);
1737}
1738
1739/* Steal a reference to chars and store it in the decoded_char buffer;
1740 */
1741static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001742textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001743{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001744 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 self->decoded_chars_used = 0;
1746}
1747
1748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001749textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750{
1751 PyObject *chars;
1752 Py_ssize_t avail;
1753
1754 if (self->decoded_chars == NULL)
1755 return PyUnicode_FromStringAndSize(NULL, 0);
1756
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001757 /* decoded_chars is guaranteed to be "ready". */
1758 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001759 - self->decoded_chars_used);
1760
1761 assert(avail >= 0);
1762
1763 if (n < 0 || n > avail)
1764 n = avail;
1765
1766 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001767 chars = PyUnicode_Substring(self->decoded_chars,
1768 self->decoded_chars_used,
1769 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001770 if (chars == NULL)
1771 return NULL;
1772 }
1773 else {
1774 chars = self->decoded_chars;
1775 Py_INCREF(chars);
1776 }
1777
1778 self->decoded_chars_used += n;
1779 return chars;
1780}
1781
1782/* Read and decode the next chunk of data from the BufferedReader.
1783 */
1784static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001785textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786{
1787 PyObject *dec_buffer = NULL;
1788 PyObject *dec_flags = NULL;
1789 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001790 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001792 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 int eof;
1794
1795 /* The return value is True unless EOF was reached. The decoded string is
1796 * placed in self._decoded_chars (replacing its previous value). The
1797 * entire input chunk is sent to the decoder, though some of it may remain
1798 * buffered in the decoder, yet to be converted.
1799 */
1800
1801 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001802 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 return -1;
1804 }
1805
1806 if (self->telling) {
1807 /* To prepare for tell(), we need to snapshot a point in the file
1808 * where the decoder's input buffer is empty.
1809 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001810 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001811 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 if (state == NULL)
1813 return -1;
1814 /* Given this, we know there was a valid snapshot point
1815 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1816 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001817 if (!PyTuple_Check(state)) {
1818 PyErr_SetString(PyExc_TypeError,
1819 "illegal decoder state");
1820 Py_DECREF(state);
1821 return -1;
1822 }
1823 if (!PyArg_ParseTuple(state,
1824 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1825 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 Py_DECREF(state);
1827 return -1;
1828 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001829
1830 if (!PyBytes_Check(dec_buffer)) {
1831 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001832 "illegal decoder state: the first item should be a "
1833 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001834 Py_TYPE(dec_buffer)->tp_name);
1835 Py_DECREF(state);
1836 return -1;
1837 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838 Py_INCREF(dec_buffer);
1839 Py_INCREF(dec_flags);
1840 Py_DECREF(state);
1841 }
1842
1843 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001844 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001845 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001846 }
1847 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 if (chunk_size == NULL)
1849 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001850
Petr Viktorinffd97532020-02-11 17:46:57 +01001851 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001852 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001853 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 Py_DECREF(chunk_size);
1855 if (input_chunk == NULL)
1856 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001857
1858 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001859 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001860 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001861 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1862 Py_TYPE(input_chunk)->tp_name);
1863 goto fail;
1864 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865
Antoine Pitroub8503892014-04-29 10:14:02 +02001866 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001867 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868
INADA Naoki507434f2017-12-21 09:59:53 +09001869 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1870 PyBuffer_Release(&input_chunk_buf);
1871 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001872 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001873
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001874 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001876 if (nchars > 0)
1877 self->b2cratio = (double) nbytes / nchars;
1878 else
1879 self->b2cratio = 0.0;
1880 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881 eof = 0;
1882
1883 if (self->telling) {
1884 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1885 * next input to be decoded is dec_buffer + input_chunk.
1886 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001887 PyObject *next_input = dec_buffer;
1888 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001889 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001890 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001891 goto fail;
1892 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001893 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1894 if (snapshot == NULL) {
1895 dec_flags = NULL;
1896 goto fail;
1897 }
1898 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 }
1900 Py_DECREF(input_chunk);
1901
1902 return (eof == 0);
1903
1904 fail:
1905 Py_XDECREF(dec_buffer);
1906 Py_XDECREF(dec_flags);
1907 Py_XDECREF(input_chunk);
1908 return -1;
1909}
1910
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001911/*[clinic input]
1912_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001913 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001914 /
1915[clinic start generated code]*/
1916
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001918_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001919/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921 PyObject *result = NULL, *chunks = NULL;
1922
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001923 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924 CHECK_CLOSED(self);
1925
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001926 if (self->decoder == NULL)
1927 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001928
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001929 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930 return NULL;
1931
1932 if (n < 0) {
1933 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001934 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935 PyObject *decoded;
1936 if (bytes == NULL)
1937 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001938
Andy Lesterdffe4c02020-03-04 07:15:20 -06001939 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001940 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1941 bytes, 1);
1942 else
1943 decoded = PyObject_CallMethodObjArgs(
1944 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001946 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 goto fail;
1948
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001949 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950
1951 if (result == NULL) {
1952 Py_DECREF(decoded);
1953 return NULL;
1954 }
1955
1956 PyUnicode_AppendAndDel(&result, decoded);
1957 if (result == NULL)
1958 goto fail;
1959
Zackery Spytz23db9352018-06-29 04:14:58 -06001960 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961 Py_CLEAR(self->snapshot);
1962 return result;
1963 }
1964 else {
1965 int res = 1;
1966 Py_ssize_t remaining = n;
1967
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001968 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 if (result == NULL)
1970 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001971 if (PyUnicode_READY(result) == -1)
1972 goto fail;
1973 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974
1975 /* Keep reading chunks until we have n characters to return */
1976 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001977 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001978 if (res < 0) {
1979 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1980 when EINTR occurs so we needn't do it ourselves. */
1981 if (_PyIO_trap_eintr()) {
1982 continue;
1983 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001984 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001985 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986 if (res == 0) /* EOF */
1987 break;
1988 if (chunks == NULL) {
1989 chunks = PyList_New(0);
1990 if (chunks == NULL)
1991 goto fail;
1992 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001993 if (PyUnicode_GET_LENGTH(result) > 0 &&
1994 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 goto fail;
1996 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001997 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 if (result == NULL)
1999 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002000 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001 }
2002 if (chunks != NULL) {
2003 if (result != NULL && PyList_Append(chunks, result) < 0)
2004 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002005 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002006 if (result == NULL)
2007 goto fail;
2008 Py_CLEAR(chunks);
2009 }
2010 return result;
2011 }
2012 fail:
2013 Py_XDECREF(result);
2014 Py_XDECREF(chunks);
2015 return NULL;
2016}
2017
2018
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002019/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020 that is to the NUL character. Otherwise the function will produce
2021 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002022static const char *
2023find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002025 if (kind == PyUnicode_1BYTE_KIND) {
2026 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002027 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002028 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002030 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002031 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002032 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 return s;
2034 if (s == end)
2035 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002036 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002037 }
2038}
2039
2040Py_ssize_t
2041_PyIO_find_line_ending(
2042 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002043 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044{
Andy Lestere6be9b52020-02-11 20:28:35 -06002045 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046
2047 if (translated) {
2048 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002049 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002051 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 else {
2053 *consumed = len;
2054 return -1;
2055 }
2056 }
2057 else if (universal) {
2058 /* Universal newline search. Find any of \r, \r\n, \n
2059 * The decoder ensures that \r\n are not split in two pieces
2060 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002061 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002063 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002065 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002066 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002067 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 if (s >= end) {
2069 *consumed = len;
2070 return -1;
2071 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002072 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002073 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002075 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002077 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002078 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002080 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 }
2082 }
2083 }
2084 else {
2085 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002086 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002087 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002088 /* Assume that readnl is an ASCII character. */
2089 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002091 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002093 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002094 *consumed = len;
2095 return -1;
2096 }
2097 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002098 const char *s = start;
2099 const char *e = end - (readnl_len - 1)*kind;
2100 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 if (e < s)
2102 e = s;
2103 while (s < e) {
2104 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002105 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 if (pos == NULL || pos >= e)
2107 break;
2108 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002109 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 break;
2111 }
2112 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002113 return (pos - start)/kind + readnl_len;
2114 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002116 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 if (pos == NULL)
2118 *consumed = len;
2119 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002120 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 return -1;
2122 }
2123 }
2124}
2125
2126static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002127_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128{
2129 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2130 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2131 int res;
2132
2133 CHECK_CLOSED(self);
2134
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002135 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 return NULL;
2137
2138 chunked = 0;
2139
2140 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002141 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002143 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002144 Py_ssize_t consumed = 0;
2145
2146 /* First, get some data if necessary */
2147 res = 1;
2148 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002149 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002150 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002151 if (res < 0) {
2152 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2153 when EINTR occurs so we needn't do it ourselves. */
2154 if (_PyIO_trap_eintr()) {
2155 continue;
2156 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002158 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002159 if (res == 0)
2160 break;
2161 }
2162 if (res == 0) {
2163 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002164 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002165 Py_CLEAR(self->snapshot);
2166 start = endpos = offset_to_buffer = 0;
2167 break;
2168 }
2169
2170 if (remaining == NULL) {
2171 line = self->decoded_chars;
2172 start = self->decoded_chars_used;
2173 offset_to_buffer = 0;
2174 Py_INCREF(line);
2175 }
2176 else {
2177 assert(self->decoded_chars_used == 0);
2178 line = PyUnicode_Concat(remaining, self->decoded_chars);
2179 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002180 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002181 Py_CLEAR(remaining);
2182 if (line == NULL)
2183 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002184 if (PyUnicode_READY(line) == -1)
2185 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002186 }
2187
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002188 ptr = PyUnicode_DATA(line);
2189 line_len = PyUnicode_GET_LENGTH(line);
2190 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191
2192 endpos = _PyIO_find_line_ending(
2193 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002194 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002195 ptr + kind * start,
2196 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002197 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002198 if (endpos >= 0) {
2199 endpos += start;
2200 if (limit >= 0 && (endpos - start) + chunked >= limit)
2201 endpos = start + limit - chunked;
2202 break;
2203 }
2204
2205 /* We can put aside up to `endpos` */
2206 endpos = consumed + start;
2207 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2208 /* Didn't find line ending, but reached length limit */
2209 endpos = start + limit - chunked;
2210 break;
2211 }
2212
2213 if (endpos > start) {
2214 /* No line ending seen yet - put aside current data */
2215 PyObject *s;
2216 if (chunks == NULL) {
2217 chunks = PyList_New(0);
2218 if (chunks == NULL)
2219 goto error;
2220 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002221 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 if (s == NULL)
2223 goto error;
2224 if (PyList_Append(chunks, s) < 0) {
2225 Py_DECREF(s);
2226 goto error;
2227 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002228 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 Py_DECREF(s);
2230 }
2231 /* There may be some remaining bytes we'll have to prepend to the
2232 next chunk of data */
2233 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002234 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 if (remaining == NULL)
2236 goto error;
2237 }
2238 Py_CLEAR(line);
2239 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002240 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002241 }
2242
2243 if (line != NULL) {
2244 /* Our line ends in the current buffer */
2245 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002246 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2247 PyObject *s = PyUnicode_Substring(line, start, endpos);
2248 Py_CLEAR(line);
2249 if (s == NULL)
2250 goto error;
2251 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 }
2253 }
2254 if (remaining != NULL) {
2255 if (chunks == NULL) {
2256 chunks = PyList_New(0);
2257 if (chunks == NULL)
2258 goto error;
2259 }
2260 if (PyList_Append(chunks, remaining) < 0)
2261 goto error;
2262 Py_CLEAR(remaining);
2263 }
2264 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002265 if (line != NULL) {
2266 if (PyList_Append(chunks, line) < 0)
2267 goto error;
2268 Py_DECREF(line);
2269 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2271 if (line == NULL)
2272 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002273 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002275 if (line == NULL) {
2276 Py_INCREF(_PyIO_empty_str);
2277 line = _PyIO_empty_str;
2278 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002279
2280 return line;
2281
2282 error:
2283 Py_XDECREF(chunks);
2284 Py_XDECREF(remaining);
2285 Py_XDECREF(line);
2286 return NULL;
2287}
2288
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002289/*[clinic input]
2290_io.TextIOWrapper.readline
2291 size: Py_ssize_t = -1
2292 /
2293[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002295static PyObject *
2296_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2297/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2298{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002299 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002300 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002301}
2302
2303/* Seek and Tell */
2304
2305typedef struct {
2306 Py_off_t start_pos;
2307 int dec_flags;
2308 int bytes_to_feed;
2309 int chars_to_skip;
2310 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002311} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312
2313/*
2314 To speed up cookie packing/unpacking, we store the fields in a temporary
2315 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2316 The following macros define at which offsets in the intermediary byte
2317 string the various CookieStruct fields will be stored.
2318 */
2319
2320#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2321
Christian Heimes743e0cd2012-10-17 23:52:17 +02002322#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323/* We want the least significant byte of start_pos to also be the least
2324 significant byte of the cookie, which means that in big-endian mode we
2325 must copy the fields in reverse order. */
2326
2327# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2328# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2329# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2330# define OFF_CHARS_TO_SKIP (sizeof(char))
2331# define OFF_NEED_EOF 0
2332
2333#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002334/* Little-endian mode: the least significant byte of start_pos will
2335 naturally end up the least significant byte of the cookie. */
2336
2337# define OFF_START_POS 0
2338# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2339# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2340# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2341# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2342
2343#endif
2344
2345static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002346textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347{
2348 unsigned char buffer[COOKIE_BUF_LEN];
2349 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2350 if (cookieLong == NULL)
2351 return -1;
2352
2353 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002354 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002355 Py_DECREF(cookieLong);
2356 return -1;
2357 }
2358 Py_DECREF(cookieLong);
2359
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002360 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2361 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2362 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2363 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2364 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365
2366 return 0;
2367}
2368
2369static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002370textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371{
2372 unsigned char buffer[COOKIE_BUF_LEN];
2373
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002374 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2375 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2376 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2377 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2378 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379
Christian Heimes743e0cd2012-10-17 23:52:17 +02002380 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2381 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383
2384static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002385_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386{
2387 PyObject *res;
2388 /* When seeking to the start of the stream, we call decoder.reset()
2389 rather than decoder.getstate().
2390 This is for a few decoders such as utf-16 for which the state value
2391 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2392 utf-16, that we are expecting a BOM).
2393 */
2394 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002395 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002397 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2398 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 if (res == NULL)
2400 return -1;
2401 Py_DECREF(res);
2402 return 0;
2403}
2404
Antoine Pitroue4501852009-05-14 18:55:55 +00002405static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002406_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002407{
2408 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002409 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002410 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002411 self->encoding_start_of_stream = 1;
2412 }
2413 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002414 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Victor Stinner37834132020-10-27 17:12:53 +01002415 _PyLong_GetZero());
Antoine Pitroue4501852009-05-14 18:55:55 +00002416 self->encoding_start_of_stream = 0;
2417 }
2418 if (res == NULL)
2419 return -1;
2420 Py_DECREF(res);
2421 return 0;
2422}
2423
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002424static int
2425_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2426{
2427 /* Same as _textiowrapper_decoder_setstate() above. */
2428 return _textiowrapper_encoder_reset(
2429 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2430}
2431
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002432/*[clinic input]
2433_io.TextIOWrapper.seek
2434 cookie as cookieObj: object
2435 whence: int = 0
2436 /
2437[clinic start generated code]*/
2438
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002439static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002440_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2441/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002443 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002444 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445 PyObject *res;
2446 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002447 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002449 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450 CHECK_CLOSED(self);
2451
2452 Py_INCREF(cookieObj);
2453
2454 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002455 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 goto fail;
2457 }
2458
Victor Stinner37834132020-10-27 17:12:53 +01002459 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2460
ngie-eign848037c2019-03-02 23:28:26 -08002461 switch (whence) {
2462 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 /* seek relative to current position */
Victor Stinner37834132020-10-27 17:12:53 +01002464 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465 if (cmp < 0)
2466 goto fail;
2467
2468 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002469 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470 goto fail;
2471 }
2472
2473 /* Seeking to the current position should attempt to
2474 * sync the underlying buffer with the current position.
2475 */
2476 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002477 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 if (cookieObj == NULL)
2479 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002480 break;
2481
ngie-eign848037c2019-03-02 23:28:26 -08002482 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483 /* seek relative to end of file */
Victor Stinner37834132020-10-27 17:12:53 +01002484 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 if (cmp < 0)
2486 goto fail;
2487
2488 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002489 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 goto fail;
2491 }
2492
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002493 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 if (res == NULL)
2495 goto fail;
2496 Py_DECREF(res);
2497
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002498 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499 Py_CLEAR(self->snapshot);
2500 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002501 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 if (res == NULL)
2503 goto fail;
2504 Py_DECREF(res);
2505 }
2506
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002507 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002508 Py_CLEAR(cookieObj);
2509 if (res == NULL)
2510 goto fail;
2511 if (self->encoder) {
2512 /* If seek() == 0, we are at the start of stream, otherwise not */
Victor Stinner37834132020-10-27 17:12:53 +01002513 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002514 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2515 Py_DECREF(res);
2516 goto fail;
2517 }
2518 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002519 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002520
ngie-eign848037c2019-03-02 23:28:26 -08002521 case SEEK_SET:
2522 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002523
ngie-eign848037c2019-03-02 23:28:26 -08002524 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002526 "invalid whence (%d, should be %d, %d or %d)", whence,
2527 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528 goto fail;
2529 }
2530
Victor Stinner37834132020-10-27 17:12:53 +01002531 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 if (cmp < 0)
2533 goto fail;
2534
2535 if (cmp == 1) {
2536 PyErr_Format(PyExc_ValueError,
2537 "negative seek position %R", cookieObj);
2538 goto fail;
2539 }
2540
Petr Viktorinffd97532020-02-11 17:46:57 +01002541 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542 if (res == NULL)
2543 goto fail;
2544 Py_DECREF(res);
2545
2546 /* The strategy of seek() is to go back to the safe start point
2547 * and replay the effect of read(chars_to_skip) from there.
2548 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002549 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550 goto fail;
2551
2552 /* Seek back to the safe start point. */
2553 posobj = PyLong_FromOff_t(cookie.start_pos);
2554 if (posobj == NULL)
2555 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002556 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 Py_DECREF(posobj);
2558 if (res == NULL)
2559 goto fail;
2560 Py_DECREF(res);
2561
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 Py_CLEAR(self->snapshot);
2564
2565 /* Restore the decoder to its state from the safe start point. */
2566 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002567 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568 goto fail;
2569 }
2570
2571 if (cookie.chars_to_skip) {
2572 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002573 PyObject *input_chunk = _PyObject_CallMethodId(
2574 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575 PyObject *decoded;
2576
2577 if (input_chunk == NULL)
2578 goto fail;
2579
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002580 if (!PyBytes_Check(input_chunk)) {
2581 PyErr_Format(PyExc_TypeError,
2582 "underlying read() should have returned a bytes "
2583 "object, not '%.200s'",
2584 Py_TYPE(input_chunk)->tp_name);
2585 Py_DECREF(input_chunk);
2586 goto fail;
2587 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002589 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2590 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591 goto fail;
2592 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002593 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002595 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2596 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002598 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599 goto fail;
2600
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002601 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602
2603 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002604 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002605 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606 goto fail;
2607 }
2608 self->decoded_chars_used = cookie.chars_to_skip;
2609 }
2610 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002611 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2612 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002613 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002614 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615 }
2616
Antoine Pitroue4501852009-05-14 18:55:55 +00002617 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2618 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002620 goto fail;
2621 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622 return cookieObj;
2623 fail:
2624 Py_XDECREF(cookieObj);
2625 return NULL;
2626
2627}
2628
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002629/*[clinic input]
2630_io.TextIOWrapper.tell
2631[clinic start generated code]*/
2632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002634_io_TextIOWrapper_tell_impl(textio *self)
2635/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636{
2637 PyObject *res;
2638 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002639 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640 PyObject *next_input;
2641 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002642 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002644 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002645 Py_ssize_t dec_buffer_len;
2646 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002648 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 CHECK_CLOSED(self);
2650
2651 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002652 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653 goto fail;
2654 }
2655 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002656 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657 "telling position disabled by next() call");
2658 goto fail;
2659 }
2660
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002663 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 if (res == NULL)
2665 goto fail;
2666 Py_DECREF(res);
2667
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002668 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669 if (posobj == NULL)
2670 goto fail;
2671
2672 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002673 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 return posobj;
2675 }
2676
2677#if defined(HAVE_LARGEFILE_SUPPORT)
2678 cookie.start_pos = PyLong_AsLongLong(posobj);
2679#else
2680 cookie.start_pos = PyLong_AsLong(posobj);
2681#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002682 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002683 if (PyErr_Occurred())
2684 goto fail;
2685
2686 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002687 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002688 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689 goto fail;
2690
2691 assert (PyBytes_Check(next_input));
2692
2693 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2694
2695 /* How many decoded characters have been used up since the snapshot? */
2696 if (self->decoded_chars_used == 0) {
2697 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699 }
2700
2701 chars_to_skip = self->decoded_chars_used;
2702
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002703 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002704 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002705 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 if (saved_state == NULL)
2707 goto fail;
2708
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002709#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002710 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002711 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002712 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002713 if (_state == NULL) \
2714 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002715 if (!PyTuple_Check(_state)) { \
2716 PyErr_SetString(PyExc_TypeError, \
2717 "illegal decoder state"); \
2718 Py_DECREF(_state); \
2719 goto fail; \
2720 } \
2721 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2722 &dec_buffer, &dec_flags)) \
2723 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002724 Py_DECREF(_state); \
2725 goto fail; \
2726 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002727 if (!PyBytes_Check(dec_buffer)) { \
2728 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002729 "illegal decoder state: the first item should be a " \
2730 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002731 Py_TYPE(dec_buffer)->tp_name); \
2732 Py_DECREF(_state); \
2733 goto fail; \
2734 } \
2735 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002736 Py_DECREF(_state); \
2737 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002738
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002739#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002740 PyObject *_decoded = _PyObject_CallMethodId( \
2741 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002742 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002743 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002744 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002745 Py_DECREF(_decoded); \
2746 } while (0)
2747
2748 /* Fast search for an acceptable start point, close to our
2749 current pos */
2750 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2751 skip_back = 1;
2752 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2753 input = PyBytes_AS_STRING(next_input);
2754 while (skip_bytes > 0) {
2755 /* Decode up to temptative start point */
2756 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2757 goto fail;
2758 DECODER_DECODE(input, skip_bytes, chars_decoded);
2759 if (chars_decoded <= chars_to_skip) {
2760 DECODER_GETSTATE();
2761 if (dec_buffer_len == 0) {
2762 /* Before pos and no bytes buffered in decoder => OK */
2763 cookie.dec_flags = dec_flags;
2764 chars_to_skip -= chars_decoded;
2765 break;
2766 }
2767 /* Skip back by buffered amount and reset heuristic */
2768 skip_bytes -= dec_buffer_len;
2769 skip_back = 1;
2770 }
2771 else {
2772 /* We're too far ahead, skip back a bit */
2773 skip_bytes -= skip_back;
2774 skip_back *= 2;
2775 }
2776 }
2777 if (skip_bytes <= 0) {
2778 skip_bytes = 0;
2779 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2780 goto fail;
2781 }
2782
2783 /* Note our initial start point. */
2784 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002785 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002786 if (chars_to_skip == 0)
2787 goto finally;
2788
2789 /* We should be close to the desired position. Now feed the decoder one
2790 * byte at a time until we reach the `chars_to_skip` target.
2791 * As we go, note the nearest "safe start point" before the current
2792 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002793 * can safely start from there and advance to this location).
2794 */
2795 chars_decoded = 0;
2796 input = PyBytes_AS_STRING(next_input);
2797 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002798 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002799 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002800 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002801
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002802 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002803 /* We got n chars for 1 byte */
2804 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002805 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002806 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002807
2808 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2809 /* Decoder buffer is empty, so this is a safe start point. */
2810 cookie.start_pos += cookie.bytes_to_feed;
2811 chars_to_skip -= chars_decoded;
2812 cookie.dec_flags = dec_flags;
2813 cookie.bytes_to_feed = 0;
2814 chars_decoded = 0;
2815 }
2816 if (chars_decoded >= chars_to_skip)
2817 break;
2818 input++;
2819 }
2820 if (input == input_end) {
2821 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002822 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002823 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002824 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002825 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002826 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002827 Py_DECREF(decoded);
2828 cookie.need_eof = 1;
2829
2830 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002831 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002832 "can't reconstruct logical file position");
2833 goto fail;
2834 }
2835 }
2836
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002837finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002838 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002839 Py_DECREF(saved_state);
2840 if (res == NULL)
2841 return NULL;
2842 Py_DECREF(res);
2843
2844 /* The returned cookie corresponds to the last safe start point. */
2845 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002846 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002847
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002848fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002849 if (saved_state) {
2850 PyObject *type, *value, *traceback;
2851 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002852 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002853 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002854 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002855 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002856 }
2857 return NULL;
2858}
2859
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002860/*[clinic input]
2861_io.TextIOWrapper.truncate
2862 pos: object = None
2863 /
2864[clinic start generated code]*/
2865
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002866static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002867_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2868/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002870 PyObject *res;
2871
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002872 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002873
Petr Viktorinffd97532020-02-11 17:46:57 +01002874 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002875 if (res == NULL)
2876 return NULL;
2877 Py_DECREF(res);
2878
Petr Viktorinffd97532020-02-11 17:46:57 +01002879 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880}
2881
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002882static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002883textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002884{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002885 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002886 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002887
2888 CHECK_INITIALIZED(self);
2889
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002890 res = PyUnicode_FromString("<_io.TextIOWrapper");
2891 if (res == NULL)
2892 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002893
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002894 status = Py_ReprEnter((PyObject *)self);
2895 if (status != 0) {
2896 if (status > 0) {
2897 PyErr_Format(PyExc_RuntimeError,
2898 "reentrant call inside %s.__repr__",
2899 Py_TYPE(self)->tp_name);
2900 }
2901 goto error;
2902 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002903 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2904 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002905 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002906 }
2907 /* Ignore ValueError raised if the underlying stream was detached */
2908 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002909 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002910 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002911 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002912 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002913 if (s == NULL)
2914 goto error;
2915 PyUnicode_AppendAndDel(&res, s);
2916 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002917 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002918 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002919 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2920 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002921 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002922 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002923 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2924 Py_DECREF(modeobj);
2925 if (s == NULL)
2926 goto error;
2927 PyUnicode_AppendAndDel(&res, s);
2928 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002929 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002930 }
2931 s = PyUnicode_FromFormat("%U encoding=%R>",
2932 res, self->encoding);
2933 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002934 if (status == 0) {
2935 Py_ReprLeave((PyObject *)self);
2936 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002937 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002938
2939 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002940 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002941 if (status == 0) {
2942 Py_ReprLeave((PyObject *)self);
2943 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002944 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002945}
2946
2947
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002948/* Inquiries */
2949
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002950/*[clinic input]
2951_io.TextIOWrapper.fileno
2952[clinic start generated code]*/
2953
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002954static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002955_io_TextIOWrapper_fileno_impl(textio *self)
2956/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002957{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002958 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002959 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002960}
2961
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002962/*[clinic input]
2963_io.TextIOWrapper.seekable
2964[clinic start generated code]*/
2965
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002966static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002967_io_TextIOWrapper_seekable_impl(textio *self)
2968/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002969{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002970 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002971 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002972}
2973
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002974/*[clinic input]
2975_io.TextIOWrapper.readable
2976[clinic start generated code]*/
2977
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002978static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002979_io_TextIOWrapper_readable_impl(textio *self)
2980/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002981{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002982 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002983 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002984}
2985
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002986/*[clinic input]
2987_io.TextIOWrapper.writable
2988[clinic start generated code]*/
2989
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002990static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002991_io_TextIOWrapper_writable_impl(textio *self)
2992/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002993{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002994 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002995 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002996}
2997
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002998/*[clinic input]
2999_io.TextIOWrapper.isatty
3000[clinic start generated code]*/
3001
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003002static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003003_io_TextIOWrapper_isatty_impl(textio *self)
3004/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003005{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003006 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003007 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003008}
3009
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003010/*[clinic input]
3011_io.TextIOWrapper.flush
3012[clinic start generated code]*/
3013
Antoine Pitrou243757e2010-11-05 21:15:39 +00003014static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003015_io_TextIOWrapper_flush_impl(textio *self)
3016/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003017{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003018 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003019 CHECK_CLOSED(self);
3020 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003021 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003022 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003023 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003024}
3025
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003026/*[clinic input]
3027_io.TextIOWrapper.close
3028[clinic start generated code]*/
3029
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003030static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003031_io_TextIOWrapper_close_impl(textio *self)
3032/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003033{
3034 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003035 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003036 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037
Antoine Pitrou6be88762010-05-03 16:48:20 +00003038 res = textiowrapper_closed_get(self, NULL);
3039 if (res == NULL)
3040 return NULL;
3041 r = PyObject_IsTrue(res);
3042 Py_DECREF(res);
3043 if (r < 0)
3044 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003045
Antoine Pitrou6be88762010-05-03 16:48:20 +00003046 if (r > 0) {
3047 Py_RETURN_NONE; /* stream already closed */
3048 }
3049 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003050 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003051 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003052 res = _PyObject_CallMethodIdOneArg(self->buffer,
3053 &PyId__dealloc_warn,
3054 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003055 if (res)
3056 Py_DECREF(res);
3057 else
3058 PyErr_Clear();
3059 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003060 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003061 if (res == NULL)
3062 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003063 else
3064 Py_DECREF(res);
3065
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003066 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003067 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003068 _PyErr_ChainExceptions(exc, val, tb);
3069 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003070 }
3071 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003072 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003073}
3074
3075static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003076textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003077{
3078 PyObject *line;
3079
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003080 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003081
3082 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003083 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003084 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003085 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003086 }
3087 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003088 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003089 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003090 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003091 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003092 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003093 "not '%.200s'", Py_TYPE(line)->tp_name);
3094 Py_DECREF(line);
3095 return NULL;
3096 }
3097 }
3098
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003099 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003100 return NULL;
3101
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003102 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003103 /* Reached EOF or would have blocked */
3104 Py_DECREF(line);
3105 Py_CLEAR(self->snapshot);
3106 self->telling = self->seekable;
3107 return NULL;
3108 }
3109
3110 return line;
3111}
3112
3113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003114textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003115{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003116 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003117 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003118}
3119
3120static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003121textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003122{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003123 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003124 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3125}
3126
3127static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003128textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003129{
3130 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003131 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003132 if (self->decoder == NULL ||
3133 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3134 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003135 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003136 }
3137 return res;
3138}
3139
3140static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003141textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003142{
3143 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003144 Py_INCREF(self->errors);
3145 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003146}
3147
3148static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003149textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003150{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003151 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003152 return PyLong_FromSsize_t(self->chunk_size);
3153}
3154
3155static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003156textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003157{
3158 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003159 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003160 if (arg == NULL) {
3161 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3162 return -1;
3163 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003164 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003165 if (n == -1 && PyErr_Occurred())
3166 return -1;
3167 if (n <= 0) {
3168 PyErr_SetString(PyExc_ValueError,
3169 "a strictly positive integer is required");
3170 return -1;
3171 }
3172 self->chunk_size = n;
3173 return 0;
3174}
3175
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003176#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003177
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003178static PyMethodDef incrementalnewlinedecoder_methods[] = {
3179 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3180 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3181 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3182 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3183 {NULL}
3184};
3185
3186static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3187 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3188 {NULL}
3189};
3190
3191PyTypeObject PyIncrementalNewlineDecoder_Type = {
3192 PyVarObject_HEAD_INIT(NULL, 0)
3193 "_io.IncrementalNewlineDecoder", /*tp_name*/
3194 sizeof(nldecoder_object), /*tp_basicsize*/
3195 0, /*tp_itemsize*/
3196 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003197 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003198 0, /*tp_getattr*/
3199 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003200 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003201 0, /*tp_repr*/
3202 0, /*tp_as_number*/
3203 0, /*tp_as_sequence*/
3204 0, /*tp_as_mapping*/
3205 0, /*tp_hash */
3206 0, /*tp_call*/
3207 0, /*tp_str*/
3208 0, /*tp_getattro*/
3209 0, /*tp_setattro*/
3210 0, /*tp_as_buffer*/
3211 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3212 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3213 0, /* tp_traverse */
3214 0, /* tp_clear */
3215 0, /* tp_richcompare */
3216 0, /*tp_weaklistoffset*/
3217 0, /* tp_iter */
3218 0, /* tp_iternext */
3219 incrementalnewlinedecoder_methods, /* tp_methods */
3220 0, /* tp_members */
3221 incrementalnewlinedecoder_getset, /* tp_getset */
3222 0, /* tp_base */
3223 0, /* tp_dict */
3224 0, /* tp_descr_get */
3225 0, /* tp_descr_set */
3226 0, /* tp_dictoffset */
3227 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3228 0, /* tp_alloc */
3229 PyType_GenericNew, /* tp_new */
3230};
3231
3232
3233static PyMethodDef textiowrapper_methods[] = {
3234 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003235 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003236 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3237 _IO_TEXTIOWRAPPER_READ_METHODDEF
3238 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3239 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3240 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3241
3242 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3243 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3244 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3245 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3246 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003247
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003248 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3249 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3250 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003251 {NULL, NULL}
3252};
3253
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003254static PyMemberDef textiowrapper_members[] = {
3255 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3256 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3257 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003258 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003259 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003260 {NULL}
3261};
3262
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003263static PyGetSetDef textiowrapper_getset[] = {
3264 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3265 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003266/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3267*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003268 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3269 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3270 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3271 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003272 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003273};
3274
3275PyTypeObject PyTextIOWrapper_Type = {
3276 PyVarObject_HEAD_INIT(NULL, 0)
3277 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003278 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003279 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003280 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003281 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003282 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003283 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003284 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003285 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003286 0, /*tp_as_number*/
3287 0, /*tp_as_sequence*/
3288 0, /*tp_as_mapping*/
3289 0, /*tp_hash */
3290 0, /*tp_call*/
3291 0, /*tp_str*/
3292 0, /*tp_getattro*/
3293 0, /*tp_setattro*/
3294 0, /*tp_as_buffer*/
3295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003296 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003297 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003298 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3299 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003300 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003301 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003302 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003303 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3304 textiowrapper_methods, /* tp_methods */
3305 textiowrapper_members, /* tp_members */
3306 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003307 0, /* tp_base */
3308 0, /* tp_dict */
3309 0, /* tp_descr_get */
3310 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003311 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003312 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003313 0, /* tp_alloc */
3314 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003315 0, /* tp_free */
3316 0, /* tp_is_gc */
3317 0, /* tp_bases */
3318 0, /* tp_mro */
3319 0, /* tp_cache */
3320 0, /* tp_subclasses */
3321 0, /* tp_weaklist */
3322 0, /* tp_del */
3323 0, /* tp_version_tag */
3324 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003325};