blob: 492988ef422a0e1549ffb6f71826eed8c97d84f0 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Victor Stinnerda7933e2020-04-13 03:04:28 +020012#include "pycore_pystate.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000013#include "structmember.h"
14#include "_iomodule.h"
15
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030016/*[clinic input]
17module _io
18class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
19class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
20[clinic start generated code]*/
21/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
22
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020023_Py_IDENTIFIER(close);
24_Py_IDENTIFIER(_dealloc_warn);
25_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(fileno);
27_Py_IDENTIFIER(flush);
28_Py_IDENTIFIER(getpreferredencoding);
29_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020030_Py_IDENTIFIER(mode);
31_Py_IDENTIFIER(name);
32_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020033_Py_IDENTIFIER(read);
34_Py_IDENTIFIER(readable);
35_Py_IDENTIFIER(replace);
36_Py_IDENTIFIER(reset);
37_Py_IDENTIFIER(seek);
38_Py_IDENTIFIER(seekable);
39_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090040_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020041_Py_IDENTIFIER(tell);
42_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020043
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044/* TextIOBase */
45
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000046PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000047 "Base class for text I/O.\n"
48 "\n"
49 "This class provides a character and line based interface to stream\n"
50 "I/O. There is no readinto method because Python's character strings\n"
51 "are immutable. There is no public constructor.\n"
52 );
53
54static PyObject *
55_unsupported(const char *message)
56{
Antoine Pitrou712cb732013-12-21 15:51:54 +010057 _PyIO_State *state = IO_STATE();
58 if (state != NULL)
59 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000060 return NULL;
61}
62
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000063PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000064 "Separate the underlying buffer from the TextIOBase and return it.\n"
65 "\n"
66 "After the underlying buffer has been detached, the TextIO is in an\n"
67 "unusable state.\n"
68 );
69
70static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053071textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000072{
73 return _unsupported("detach");
74}
75
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077 "Read at most n characters from stream.\n"
78 "\n"
79 "Read from underlying buffer until we have n characters or we hit EOF.\n"
80 "If n is negative or omitted, read until EOF.\n"
81 );
82
83static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000084textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000085{
86 return _unsupported("read");
87}
88
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000089PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000090 "Read until newline or EOF.\n"
91 "\n"
92 "Returns an empty string if EOF is hit immediately.\n"
93 );
94
95static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000096textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000097{
98 return _unsupported("readline");
99}
100
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000101PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102 "Write string to stream.\n"
103 "Returns the number of characters written (which is always equal to\n"
104 "the length of the string).\n"
105 );
106
107static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000108textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000109{
110 return _unsupported("write");
111}
112
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000113PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114 "Encoding of the text stream.\n"
115 "\n"
116 "Subclasses should override.\n"
117 );
118
119static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000121{
122 Py_RETURN_NONE;
123}
124
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126 "Line endings translated so far.\n"
127 "\n"
128 "Only line endings translated during reading are considered.\n"
129 "\n"
130 "Subclasses should override.\n"
131 );
132
133static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000134textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000135{
136 Py_RETURN_NONE;
137}
138
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000139PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000140 "The error setting of the decoder or encoder.\n"
141 "\n"
142 "Subclasses should override.\n"
143 );
144
145static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000146textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000147{
148 Py_RETURN_NONE;
149}
150
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000151
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000152static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530153 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000154 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
155 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
156 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 {NULL, NULL}
158};
159
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000160static PyGetSetDef textiobase_getset[] = {
161 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
162 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
163 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000164 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000165};
166
167PyTypeObject PyTextIOBase_Type = {
168 PyVarObject_HEAD_INIT(NULL, 0)
169 "_io._TextIOBase", /*tp_name*/
170 0, /*tp_basicsize*/
171 0, /*tp_itemsize*/
172 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200173 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000174 0, /*tp_getattr*/
175 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200176 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000177 0, /*tp_repr*/
178 0, /*tp_as_number*/
179 0, /*tp_as_sequence*/
180 0, /*tp_as_mapping*/
181 0, /*tp_hash */
182 0, /*tp_call*/
183 0, /*tp_str*/
184 0, /*tp_getattro*/
185 0, /*tp_setattro*/
186 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200187 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000188 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 0, /* tp_traverse */
190 0, /* tp_clear */
191 0, /* tp_richcompare */
192 0, /* tp_weaklistoffset */
193 0, /* tp_iter */
194 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000195 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 &PyIOBase_Type, /* tp_base */
199 0, /* tp_dict */
200 0, /* tp_descr_get */
201 0, /* tp_descr_set */
202 0, /* tp_dictoffset */
203 0, /* tp_init */
204 0, /* tp_alloc */
205 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200206 0, /* tp_free */
207 0, /* tp_is_gc */
208 0, /* tp_bases */
209 0, /* tp_mro */
210 0, /* tp_cache */
211 0, /* tp_subclasses */
212 0, /* tp_weaklist */
213 0, /* tp_del */
214 0, /* tp_version_tag */
215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000216};
217
218
219/* IncrementalNewlineDecoder */
220
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200225 unsigned int pendingcr: 1;
226 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300230/*[clinic input]
231_io.IncrementalNewlineDecoder.__init__
232 decoder: object
233 translate: int
234 errors: object(c_default="NULL") = "strict"
235
236Codec used when reading a file in universal newlines mode.
237
238It wraps another incremental decoder, translating \r\n and \r into \n.
239It also records the types of newlines encountered. When used with
240translate=False, it ensures that the newline sequence is returned in
241one piece. When used with decoder=None, it expects unicode strings as
242decode input and translates newlines without first invoking an external
243decoder.
244[clinic start generated code]*/
245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300247_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248 PyObject *decoder, int translate,
249 PyObject *errors)
250/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252 self->decoder = decoder;
253 Py_INCREF(decoder);
254
255 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900256 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 if (self->errors == NULL)
258 return -1;
259 }
260 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 self->errors = errors;
262 }
INADA Naoki507434f2017-12-21 09:59:53 +0900263 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264
Xiang Zhangb08746b2018-10-31 19:49:16 +0800265 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266 self->seennl = 0;
267 self->pendingcr = 0;
268
269 return 0;
270}
271
272static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000273incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000274{
275 Py_CLEAR(self->decoder);
276 Py_CLEAR(self->errors);
277 Py_TYPE(self)->tp_free((PyObject *)self);
278}
279
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200280static int
281check_decoded(PyObject *decoded)
282{
283 if (decoded == NULL)
284 return -1;
285 if (!PyUnicode_Check(decoded)) {
286 PyErr_Format(PyExc_TypeError,
287 "decoder should return a string result, not '%.200s'",
288 Py_TYPE(decoded)->tp_name);
289 Py_DECREF(decoded);
290 return -1;
291 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200292 if (PyUnicode_READY(decoded) < 0) {
293 Py_DECREF(decoded);
294 return -1;
295 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200296 return 0;
297}
298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299#define SEEN_CR 1
300#define SEEN_LF 2
301#define SEEN_CRLF 4
302#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
303
304PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200305_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306 PyObject *input, int final)
307{
308 PyObject *output;
309 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200310 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311
312 if (self->decoder == NULL) {
313 PyErr_SetString(PyExc_ValueError,
314 "IncrementalNewlineDecoder.__init__ not called");
315 return NULL;
316 }
317
318 /* decode input (with the eventual \r from a previous pass) */
319 if (self->decoder != Py_None) {
320 output = PyObject_CallMethodObjArgs(self->decoder,
321 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
322 }
323 else {
324 output = input;
325 Py_INCREF(output);
326 }
327
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200328 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 return NULL;
330
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 /* Prefix output with CR */
334 int kind;
335 PyObject *modified;
336 char *out;
337
338 modified = PyUnicode_New(output_len + 1,
339 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (modified == NULL)
341 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 kind = PyUnicode_KIND(modified);
343 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300344 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 self->pendingcr = 0;
349 output_len++;
350 }
351
352 /* retain last \r even when not translating data:
353 * then readline() is sure to get \r\n in one pass
354 */
355 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000356 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
358 {
359 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
360 if (modified == NULL)
361 goto error;
362 Py_DECREF(output);
363 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 self->pendingcr = 1;
365 }
366 }
367
368 /* Record which newlines are read and do newline translation if desired,
369 all in one pass. */
370 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300371 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 Py_ssize_t len;
373 int seennl = self->seennl;
374 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 in_str = PyUnicode_DATA(output);
378 len = PyUnicode_GET_LENGTH(output);
379 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
381 if (len == 0)
382 return output;
383
384 /* If, up to now, newlines are consistently \n, do a quick check
385 for the \r *byte* with the libc's optimized memchr.
386 */
387 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200388 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 }
390
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 if (only_lf) {
392 /* If not already seen, quick scan for a possible "\n" character.
393 (there's nothing else to be done, even when in translation mode)
394 */
395 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200396 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100397 if (kind == PyUnicode_1BYTE_KIND)
398 seennl |= SEEN_LF;
399 else {
400 Py_ssize_t i = 0;
401 for (;;) {
402 Py_UCS4 c;
403 /* Fast loop for non-control characters */
404 while (PyUnicode_READ(kind, in_str, i) > '\n')
405 i++;
406 c = PyUnicode_READ(kind, in_str, i++);
407 if (c == '\n') {
408 seennl |= SEEN_LF;
409 break;
410 }
411 if (i >= len)
412 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
415 }
416 /* Finished: we have scanned for newlines, and none of them
417 need translating */
418 }
419 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000421 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 if (seennl == SEEN_ALL)
423 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200425 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 while (PyUnicode_READ(kind, in_str, i) > '\r')
428 i++;
429 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 if (c == '\n')
431 seennl |= SEEN_LF;
432 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 }
437 else
438 seennl |= SEEN_CR;
439 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 break;
442 if (seennl == SEEN_ALL)
443 break;
444 }
445 endscan:
446 ;
447 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000448 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 void *translated;
450 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300451 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 Py_ssize_t in, out;
453 /* XXX: Previous in-place translation here is disabled as
454 resizing is not possible anymore */
455 /* We could try to optimize this so that we only do a copy
456 when there is something to translate. On the other hand,
457 we already know there is a \r byte, so chances are high
458 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200459 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 if (translated == NULL) {
461 PyErr_NoMemory();
462 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
469 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 seennl |= SEEN_LF;
473 continue;
474 }
475 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 in++;
478 seennl |= SEEN_CRLF;
479 }
480 else
481 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 continue;
484 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 Py_DECREF(output);
490 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100491 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200492 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200493 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
495 self->seennl |= seennl;
496 }
497
498 return output;
499
500 error:
501 Py_DECREF(output);
502 return NULL;
503}
504
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300505/*[clinic input]
506_io.IncrementalNewlineDecoder.decode
507 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200508 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300509[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511static PyObject *
512_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
513 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200514/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300515{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
517}
518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300519/*[clinic input]
520_io.IncrementalNewlineDecoder.getstate
521[clinic start generated code]*/
522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300524_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
525/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000526{
527 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700528 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100531 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200532 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000533 if (state == NULL)
534 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300535 if (!PyTuple_Check(state)) {
536 PyErr_SetString(PyExc_TypeError,
537 "illegal decoder state");
538 Py_DECREF(state);
539 return NULL;
540 }
541 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
542 &buffer, &flag))
543 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 Py_DECREF(state);
545 return NULL;
546 }
547 Py_INCREF(buffer);
548 Py_DECREF(state);
549 }
550 else {
551 buffer = PyBytes_FromString("");
552 flag = 0;
553 }
554 flag <<= 1;
555 if (self->pendingcr)
556 flag |= 1;
557 return Py_BuildValue("NK", buffer, flag);
558}
559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300560/*[clinic input]
561_io.IncrementalNewlineDecoder.setstate
562 state: object
563 /
564[clinic start generated code]*/
565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300567_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
568 PyObject *state)
569/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700572 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Oren Milman1d1d3e92017-08-20 18:35:36 +0300574 if (!PyTuple_Check(state)) {
575 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300577 }
578 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579 &buffer, &flag))
580 {
581 return NULL;
582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583
Victor Stinner7d7e7752014-06-17 23:31:25 +0200584 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 flag >>= 1;
586
587 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200588 return _PyObject_CallMethodId(self->decoder,
589 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 else
591 Py_RETURN_NONE;
592}
593
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300594/*[clinic input]
595_io.IncrementalNewlineDecoder.reset
596[clinic start generated code]*/
597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300599_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
600/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601{
602 self->seennl = 0;
603 self->pendingcr = 0;
604 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100605 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 else
607 Py_RETURN_NONE;
608}
609
610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612{
613 switch (self->seennl) {
614 case SEEN_CR:
615 return PyUnicode_FromString("\r");
616 case SEEN_LF:
617 return PyUnicode_FromString("\n");
618 case SEEN_CRLF:
619 return PyUnicode_FromString("\r\n");
620 case SEEN_CR | SEEN_LF:
621 return Py_BuildValue("ss", "\r", "\n");
622 case SEEN_CR | SEEN_CRLF:
623 return Py_BuildValue("ss", "\r", "\r\n");
624 case SEEN_LF | SEEN_CRLF:
625 return Py_BuildValue("ss", "\n", "\r\n");
626 case SEEN_CR | SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("sss", "\r", "\n", "\r\n");
628 default:
629 Py_RETURN_NONE;
630 }
631
632}
633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634/* TextIOWrapper */
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636typedef PyObject *
637 (*encodefunc_t)(PyObject *, PyObject *);
638
639typedef struct
640{
641 PyObject_HEAD
642 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000643 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 Py_ssize_t chunk_size;
645 PyObject *buffer;
646 PyObject *encoding;
647 PyObject *encoder;
648 PyObject *decoder;
649 PyObject *readnl;
650 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900651 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200653 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char readuniversal;
655 char readtranslate;
656 char writetranslate;
657 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200658 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200660 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900677 PyObject *pending_bytes; // data waiting to be written.
678 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000679 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000680
Oren Milman13614e32017-08-24 19:51:24 +0300681 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000686 PyObject *snapshot;
687 /* Bytes-to-characters ratio for the current chunk. Serves as input for
688 the heuristic in tell(). */
689 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 /* Cache raw object if it's a FileIO object */
692 PyObject *raw;
693
694 PyObject *weakreflist;
695 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000696} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
Zackery Spytz23db9352018-06-29 04:14:58 -0600698static void
699textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
700
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701/* A couple of specialized cases in order to bypass the slow incremental
702 encoding methods for the most popular encodings. */
703
704static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000705ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706{
INADA Naoki507434f2017-12-21 09:59:53 +0900707 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708}
709
710static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100713 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900714 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715}
716
717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000718utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100720 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900721 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitroue4501852009-05-14 18:55:55 +0000727 if (!self->encoding_start_of_stream) {
728 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200729#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900736 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
Antoine Pitroue4501852009-05-14 18:55:55 +0000739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000741{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100742 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900743 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900750 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 if (!self->encoding_start_of_stream) {
757 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200758#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 return utf32be_encode(self, text);
760#else
761 return utf32le_encode(self, text);
762#endif
763 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900765 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770{
INADA Naoki507434f2017-12-21 09:59:53 +0900771 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776{
INADA Naoki507434f2017-12-21 09:59:53 +0900777 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778}
779
Inada Naokibfba8c32019-05-16 15:03:20 +0900780// Return true when encoding can be skipped when text is ascii.
781static inline int
782is_asciicompat_encoding(encodefunc_t f)
783{
784 return f == (encodefunc_t) ascii_encode
785 || f == (encodefunc_t) latin1_encode
786 || f == (encodefunc_t) utf8_encode;
787}
788
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000789/* Map normalized encoding names onto the specialized encoding funcs */
790
791typedef struct {
792 const char *name;
793 encodefunc_t encodefunc;
794} encodefuncentry;
795
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200796static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"ascii", (encodefunc_t) ascii_encode},
798 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000799 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 {"utf-16-be", (encodefunc_t) utf16be_encode},
801 {"utf-16-le", (encodefunc_t) utf16le_encode},
802 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000803 {"utf-32-be", (encodefunc_t) utf32be_encode},
804 {"utf-32-le", (encodefunc_t) utf32le_encode},
805 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 {NULL, NULL}
807};
808
INADA Naoki507434f2017-12-21 09:59:53 +0900809static int
810validate_newline(const char *newline)
811{
812 if (newline && newline[0] != '\0'
813 && !(newline[0] == '\n' && newline[1] == '\0')
814 && !(newline[0] == '\r' && newline[1] == '\0')
815 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
816 PyErr_Format(PyExc_ValueError,
817 "illegal newline value: %s", newline);
818 return -1;
819 }
820 return 0;
821}
822
823static int
824set_newline(textio *self, const char *newline)
825{
826 PyObject *old = self->readnl;
827 if (newline == NULL) {
828 self->readnl = NULL;
829 }
830 else {
831 self->readnl = PyUnicode_FromString(newline);
832 if (self->readnl == NULL) {
833 self->readnl = old;
834 return -1;
835 }
836 }
837 self->readuniversal = (newline == NULL || newline[0] == '\0');
838 self->readtranslate = (newline == NULL);
839 self->writetranslate = (newline == NULL || newline[0] != '\0');
840 if (!self->readuniversal && self->readnl != NULL) {
841 // validate_newline() accepts only ASCII newlines.
842 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
843 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
844 if (strcmp(self->writenl, "\n") == 0) {
845 self->writenl = NULL;
846 }
847 }
848 else {
849#ifdef MS_WINDOWS
850 self->writenl = "\r\n";
851#else
852 self->writenl = NULL;
853#endif
854 }
855 Py_XDECREF(old);
856 return 0;
857}
858
859static int
860_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
861 const char *errors)
862{
863 PyObject *res;
864 int r;
865
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200866 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900867 if (res == NULL)
868 return -1;
869
870 r = PyObject_IsTrue(res);
871 Py_DECREF(res);
872 if (r == -1)
873 return -1;
874
875 if (r != 1)
876 return 0;
877
878 Py_CLEAR(self->decoder);
879 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
880 if (self->decoder == NULL)
881 return -1;
882
883 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300884 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900885 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300886 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900887 if (incrementalDecoder == NULL)
888 return -1;
889 Py_CLEAR(self->decoder);
890 self->decoder = incrementalDecoder;
891 }
892
893 return 0;
894}
895
896static PyObject*
897_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
898{
899 PyObject *chars;
900
Andy Lesterdffe4c02020-03-04 07:15:20 -0600901 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900902 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
903 else
904 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
905 eof ? Py_True : Py_False, NULL);
906
907 if (check_decoded(chars) < 0)
908 // check_decoded already decreases refcount
909 return NULL;
910
911 return chars;
912}
913
914static int
915_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
916 const char *errors)
917{
918 PyObject *res;
919 int r;
920
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200921 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900922 if (res == NULL)
923 return -1;
924
925 r = PyObject_IsTrue(res);
926 Py_DECREF(res);
927 if (r == -1)
928 return -1;
929
930 if (r != 1)
931 return 0;
932
933 Py_CLEAR(self->encoder);
934 self->encodefunc = NULL;
935 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
936 if (self->encoder == NULL)
937 return -1;
938
939 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200940 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
941 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900942 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200943 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900944 const encodefuncentry *e = encodefuncs;
945 while (e->name != NULL) {
946 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
947 self->encodefunc = e->encodefunc;
948 break;
949 }
950 e++;
951 }
952 }
953 Py_XDECREF(res);
954
955 return 0;
956}
957
958static int
959_textiowrapper_fix_encoder_state(textio *self)
960{
961 if (!self->seekable || !self->encoder) {
962 return 0;
963 }
964
965 self->encoding_start_of_stream = 1;
966
Petr Viktorinffd97532020-02-11 17:46:57 +0100967 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200968 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900969 if (cookieObj == NULL) {
970 return -1;
971 }
972
973 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
974 Py_DECREF(cookieObj);
975 if (cmp < 0) {
976 return -1;
977 }
978
979 if (cmp == 0) {
980 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100981 PyObject *res = PyObject_CallMethodOneArg(
Jeroen Demeyer59ad1102019-07-11 10:59:05 +0200982 self->encoder, _PyIO_str_setstate, _PyLong_Zero);
INADA Naoki507434f2017-12-21 09:59:53 +0900983 if (res == NULL) {
984 return -1;
985 }
986 Py_DECREF(res);
987 }
988
989 return 0;
990}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Victor Stinner22eb6892019-06-26 00:51:05 +0200992static int
993io_check_errors(PyObject *errors)
994{
995 assert(errors != NULL && errors != Py_None);
996
997 PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
998#ifndef Py_DEBUG
999 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001000 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001001 return 0;
1002 }
1003#else
1004 /* Always check in debug mode */
1005#endif
1006
1007 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1008 before_PyUnicode_InitEncodings() is called. */
1009 if (!interp->fs_codec.encoding) {
1010 return 0;
1011 }
1012
1013 Py_ssize_t name_length;
1014 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1015 if (name == NULL) {
1016 return -1;
1017 }
1018 if (strlen(name) != (size_t)name_length) {
1019 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1020 return -1;
1021 }
1022 PyObject *handler = PyCodec_LookupError(name);
1023 if (handler != NULL) {
1024 Py_DECREF(handler);
1025 return 0;
1026 }
1027 return -1;
1028}
1029
1030
1031
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001032/*[clinic input]
1033_io.TextIOWrapper.__init__
1034 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001035 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001036 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001037 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001038 line_buffering: bool(accept={int}) = False
1039 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001041Character and line based layer over a BufferedIOBase object, buffer.
1042
1043encoding gives the name of the encoding that the stream will be
1044decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1045
1046errors determines the strictness of encoding and decoding (see
1047help(codecs.Codec) or the documentation for codecs.register) and
1048defaults to "strict".
1049
1050newline controls how line endings are handled. It can be None, '',
1051'\n', '\r', and '\r\n'. It works as follows:
1052
1053* On input, if newline is None, universal newlines mode is
1054 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1055 these are translated into '\n' before being returned to the
1056 caller. If it is '', universal newline mode is enabled, but line
1057 endings are returned to the caller untranslated. If it has any of
1058 the other legal values, input lines are only terminated by the given
1059 string, and the line ending is returned to the caller untranslated.
1060
1061* On output, if newline is None, any '\n' characters written are
1062 translated to the system default line separator, os.linesep. If
1063 newline is '' or '\n', no translation takes place. If newline is any
1064 of the other legal values, any '\n' characters written are translated
1065 to the given string.
1066
1067If line_buffering is True, a call to flush is implied when a call to
1068write contains a newline character.
1069[clinic start generated code]*/
1070
1071static int
1072_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001073 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001074 const char *newline, int line_buffering,
1075 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001076/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001077{
1078 PyObject *raw, *codec_info = NULL;
1079 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 PyObject *res;
1081 int r;
1082
1083 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001084 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001085
INADA Naoki507434f2017-12-21 09:59:53 +09001086 if (errors == Py_None) {
1087 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001088 if (errors == NULL) {
1089 return -1;
1090 }
INADA Naoki507434f2017-12-21 09:59:53 +09001091 }
1092 else if (!PyUnicode_Check(errors)) {
1093 // Check 'errors' argument here because Argument Clinic doesn't support
1094 // 'str(accept={str, NoneType})' converter.
1095 PyErr_Format(
1096 PyExc_TypeError,
1097 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001098 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001099 return -1;
1100 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001101 else if (io_check_errors(errors)) {
1102 return -1;
1103 }
INADA Naoki507434f2017-12-21 09:59:53 +09001104
1105 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001106 return -1;
1107 }
1108
1109 Py_CLEAR(self->buffer);
1110 Py_CLEAR(self->encoding);
1111 Py_CLEAR(self->encoder);
1112 Py_CLEAR(self->decoder);
1113 Py_CLEAR(self->readnl);
1114 Py_CLEAR(self->decoded_chars);
1115 Py_CLEAR(self->pending_bytes);
1116 Py_CLEAR(self->snapshot);
1117 Py_CLEAR(self->errors);
1118 Py_CLEAR(self->raw);
1119 self->decoded_chars_used = 0;
1120 self->pending_bytes_count = 0;
1121 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001122 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123
1124 if (encoding == NULL) {
1125 /* Try os.device_encoding(fileno) */
1126 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001127 state = IO_STATE();
1128 if (state == NULL)
1129 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001130 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001131 /* Ignore only AttributeError and UnsupportedOperation */
1132 if (fileno == NULL) {
1133 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1134 PyErr_ExceptionMatches(state->unsupported_operation)) {
1135 PyErr_Clear();
1136 }
1137 else {
1138 goto error;
1139 }
1140 }
1141 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001142 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001143 Py_DECREF(fileno);
1144 if (fd == -1 && PyErr_Occurred()) {
1145 goto error;
1146 }
1147
1148 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149 if (self->encoding == NULL)
1150 goto error;
1151 else if (!PyUnicode_Check(self->encoding))
1152 Py_CLEAR(self->encoding);
1153 }
1154 }
1155 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001156 PyObject *locale_module = _PyIO_get_locale_module(state);
1157 if (locale_module == NULL)
1158 goto catch_ImportError;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001159 self->encoding = _PyObject_CallMethodIdOneArg(
1160 locale_module, &PyId_getpreferredencoding, Py_False);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001161 Py_DECREF(locale_module);
1162 if (self->encoding == NULL) {
1163 catch_ImportError:
1164 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001165 Importing locale can raise an ImportError because of
1166 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001167 ImportError if _locale is not available. These will happen
1168 during module building.
1169 */
1170 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1171 PyErr_Clear();
1172 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001174 else
1175 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001177 else if (!PyUnicode_Check(self->encoding))
1178 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001180 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001181 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001182 if (encoding == NULL)
1183 goto error;
1184 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 else if (encoding != NULL) {
1186 self->encoding = PyUnicode_FromString(encoding);
1187 if (self->encoding == NULL)
1188 goto error;
1189 }
1190 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001191 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001193 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 }
1195
Nick Coghlana9b15242014-02-04 22:11:18 +10001196 /* Check we have been asked for a real text encoding */
1197 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1198 if (codec_info == NULL) {
1199 Py_CLEAR(self->encoding);
1200 goto error;
1201 }
1202
1203 /* XXX: Failures beyond this point have the potential to leak elements
1204 * of the partially constructed object (like self->encoding)
1205 */
1206
INADA Naoki507434f2017-12-21 09:59:53 +09001207 Py_INCREF(errors);
1208 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001211 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001212 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 }
1215
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 self->buffer = buffer;
1217 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001218
INADA Naoki507434f2017-12-21 09:59:53 +09001219 /* Build the decoder object */
1220 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1221 goto error;
1222
1223 /* Build the encoder object */
1224 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1225 goto error;
1226
1227 /* Finished sorting out the codec details */
1228 Py_CLEAR(codec_info);
1229
Andy Lesterdffe4c02020-03-04 07:15:20 -06001230 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1231 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1232 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001233 {
1234 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1235 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001237 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001238 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001239 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001240 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001241 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001242 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 }
1244
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001245 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 if (res == NULL)
1247 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001248 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001249 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001250 if (r < 0)
1251 goto error;
1252 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001254 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1255 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001256 goto error;
1257 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001258 Py_XDECREF(res);
1259 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001260
Antoine Pitroue4501852009-05-14 18:55:55 +00001261 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001262 if (_textiowrapper_fix_encoder_state(self) < 0) {
1263 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001264 }
1265
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 self->ok = 1;
1267 return 0;
1268
1269 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001270 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 return -1;
1272}
1273
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001274/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1275 * -1 on error.
1276 */
1277static int
1278convert_optional_bool(PyObject *obj, int default_value)
1279{
1280 long v;
1281 if (obj == Py_None) {
1282 v = default_value;
1283 }
1284 else {
1285 v = PyLong_AsLong(obj);
1286 if (v == -1 && PyErr_Occurred())
1287 return -1;
1288 }
1289 return v != 0;
1290}
1291
INADA Naoki507434f2017-12-21 09:59:53 +09001292static int
1293textiowrapper_change_encoding(textio *self, PyObject *encoding,
1294 PyObject *errors, int newline_changed)
1295{
1296 /* Use existing settings where new settings are not specified */
1297 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1298 return 0; // no change
1299 }
1300
1301 if (encoding == Py_None) {
1302 encoding = self->encoding;
1303 if (errors == Py_None) {
1304 errors = self->errors;
1305 }
1306 }
1307 else if (errors == Py_None) {
1308 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001309 if (errors == NULL) {
1310 return -1;
1311 }
INADA Naoki507434f2017-12-21 09:59:53 +09001312 }
1313
1314 const char *c_errors = PyUnicode_AsUTF8(errors);
1315 if (c_errors == NULL) {
1316 return -1;
1317 }
1318
1319 // Create new encoder & decoder
1320 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1321 PyUnicode_AsUTF8(encoding), "codecs.open()");
1322 if (codec_info == NULL) {
1323 return -1;
1324 }
1325 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1326 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1327 Py_DECREF(codec_info);
1328 return -1;
1329 }
1330 Py_DECREF(codec_info);
1331
1332 Py_INCREF(encoding);
1333 Py_INCREF(errors);
1334 Py_SETREF(self->encoding, encoding);
1335 Py_SETREF(self->errors, errors);
1336
1337 return _textiowrapper_fix_encoder_state(self);
1338}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001339
1340/*[clinic input]
1341_io.TextIOWrapper.reconfigure
1342 *
INADA Naoki507434f2017-12-21 09:59:53 +09001343 encoding: object = None
1344 errors: object = None
1345 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001346 line_buffering as line_buffering_obj: object = None
1347 write_through as write_through_obj: object = None
1348
1349Reconfigure the text stream with new parameters.
1350
1351This also does an implicit stream flush.
1352
1353[clinic start generated code]*/
1354
1355static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001356_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1357 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001358 PyObject *line_buffering_obj,
1359 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001360/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001361{
1362 int line_buffering;
1363 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001364 const char *newline = NULL;
1365
1366 /* Check if something is in the read buffer */
1367 if (self->decoded_chars != NULL) {
1368 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001369 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001370 "of stream after the first read");
1371 return NULL;
1372 }
1373 }
1374
1375 if (newline_obj != NULL && newline_obj != Py_None) {
1376 newline = PyUnicode_AsUTF8(newline_obj);
1377 if (newline == NULL || validate_newline(newline) < 0) {
1378 return NULL;
1379 }
1380 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001381
1382 line_buffering = convert_optional_bool(line_buffering_obj,
1383 self->line_buffering);
1384 write_through = convert_optional_bool(write_through_obj,
1385 self->write_through);
1386 if (line_buffering < 0 || write_through < 0) {
1387 return NULL;
1388 }
INADA Naoki507434f2017-12-21 09:59:53 +09001389
Petr Viktorinffd97532020-02-11 17:46:57 +01001390 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001391 if (res == NULL) {
1392 return NULL;
1393 }
INADA Naoki507434f2017-12-21 09:59:53 +09001394 Py_DECREF(res);
1395 self->b2cratio = 0;
1396
1397 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1398 return NULL;
1399 }
1400
1401 if (textiowrapper_change_encoding(
1402 self, encoding, errors, newline_obj != NULL) < 0) {
1403 return NULL;
1404 }
1405
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001406 self->line_buffering = line_buffering;
1407 self->write_through = write_through;
1408 Py_RETURN_NONE;
1409}
1410
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001412textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414 self->ok = 0;
1415 Py_CLEAR(self->buffer);
1416 Py_CLEAR(self->encoding);
1417 Py_CLEAR(self->encoder);
1418 Py_CLEAR(self->decoder);
1419 Py_CLEAR(self->readnl);
1420 Py_CLEAR(self->decoded_chars);
1421 Py_CLEAR(self->pending_bytes);
1422 Py_CLEAR(self->snapshot);
1423 Py_CLEAR(self->errors);
1424 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001425
1426 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001427 return 0;
1428}
1429
1430static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001431textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001433 self->finalizing = 1;
1434 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001436 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 _PyObject_GC_UNTRACK(self);
1438 if (self->weakreflist != NULL)
1439 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001440 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001441 Py_TYPE(self)->tp_free((PyObject *)self);
1442}
1443
1444static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001445textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446{
1447 Py_VISIT(self->buffer);
1448 Py_VISIT(self->encoding);
1449 Py_VISIT(self->encoder);
1450 Py_VISIT(self->decoder);
1451 Py_VISIT(self->readnl);
1452 Py_VISIT(self->decoded_chars);
1453 Py_VISIT(self->pending_bytes);
1454 Py_VISIT(self->snapshot);
1455 Py_VISIT(self->errors);
1456 Py_VISIT(self->raw);
1457
1458 Py_VISIT(self->dict);
1459 return 0;
1460}
1461
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001463textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001464
1465/* This macro takes some shortcuts to make the common case faster. */
1466#define CHECK_CLOSED(self) \
1467 do { \
1468 int r; \
1469 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001470 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001471 if (self->raw != NULL) \
1472 r = _PyFileIO_closed(self->raw); \
1473 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001474 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001475 if (_res == NULL) \
1476 return NULL; \
1477 r = PyObject_IsTrue(_res); \
1478 Py_DECREF(_res); \
1479 if (r < 0) \
1480 return NULL; \
1481 } \
1482 if (r > 0) { \
1483 PyErr_SetString(PyExc_ValueError, \
1484 "I/O operation on closed file."); \
1485 return NULL; \
1486 } \
1487 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001488 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001489 return NULL; \
1490 } while (0)
1491
1492#define CHECK_INITIALIZED(self) \
1493 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001494 PyErr_SetString(PyExc_ValueError, \
1495 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496 return NULL; \
1497 }
1498
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001499#define CHECK_ATTACHED(self) \
1500 CHECK_INITIALIZED(self); \
1501 if (self->detached) { \
1502 PyErr_SetString(PyExc_ValueError, \
1503 "underlying buffer has been detached"); \
1504 return NULL; \
1505 }
1506
1507#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001509 PyErr_SetString(PyExc_ValueError, \
1510 "I/O operation on uninitialized object"); \
1511 return -1; \
1512 } else if (self->detached) { \
1513 PyErr_SetString(PyExc_ValueError, \
1514 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 return -1; \
1516 }
1517
1518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001519/*[clinic input]
1520_io.TextIOWrapper.detach
1521[clinic start generated code]*/
1522
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001524_io_TextIOWrapper_detach_impl(textio *self)
1525/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001526{
1527 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001528 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001529 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001530 if (res == NULL)
1531 return NULL;
1532 Py_DECREF(res);
1533 buffer = self->buffer;
1534 self->buffer = NULL;
1535 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001536 return buffer;
1537}
1538
Antoine Pitrou24f36292009-03-28 22:16:42 +00001539/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001540 underlying buffered object, though. */
1541static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001542_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544 if (self->pending_bytes == NULL)
1545 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001546
Inada Naokibfba8c32019-05-16 15:03:20 +09001547 PyObject *pending = self->pending_bytes;
1548 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001549
Inada Naokibfba8c32019-05-16 15:03:20 +09001550 if (PyBytes_Check(pending)) {
1551 b = pending;
1552 Py_INCREF(b);
1553 }
1554 else if (PyUnicode_Check(pending)) {
1555 assert(PyUnicode_IS_ASCII(pending));
1556 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1557 b = PyBytes_FromStringAndSize(
1558 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1559 if (b == NULL) {
1560 return -1;
1561 }
1562 }
1563 else {
1564 assert(PyList_Check(pending));
1565 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1566 if (b == NULL) {
1567 return -1;
1568 }
1569
1570 char *buf = PyBytes_AsString(b);
1571 Py_ssize_t pos = 0;
1572
1573 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1574 PyObject *obj = PyList_GET_ITEM(pending, i);
1575 char *src;
1576 Py_ssize_t len;
1577 if (PyUnicode_Check(obj)) {
1578 assert(PyUnicode_IS_ASCII(obj));
1579 src = PyUnicode_DATA(obj);
1580 len = PyUnicode_GET_LENGTH(obj);
1581 }
1582 else {
1583 assert(PyBytes_Check(obj));
1584 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1585 Py_DECREF(b);
1586 return -1;
1587 }
1588 }
1589 memcpy(buf + pos, src, len);
1590 pos += len;
1591 }
1592 assert(pos == self->pending_bytes_count);
1593 }
1594
1595 self->pending_bytes_count = 0;
1596 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001597 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001598
1599 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001600 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001601 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001602 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 Py_DECREF(b);
1604 if (ret == NULL)
1605 return -1;
1606 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001607 return 0;
1608}
1609
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001610/*[clinic input]
1611_io.TextIOWrapper.write
1612 text: unicode
1613 /
1614[clinic start generated code]*/
1615
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001617_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1618/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619{
1620 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621 PyObject *b;
1622 Py_ssize_t textlen;
1623 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001624 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001626 if (PyUnicode_READY(text) == -1)
1627 return NULL;
1628
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001629 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 CHECK_CLOSED(self);
1631
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001632 if (self->encoder == NULL)
1633 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001635 Py_INCREF(text);
1636
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001637 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638
1639 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001640 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 haslf = 1;
1642
1643 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001644 PyObject *newtext = _PyObject_CallMethodId(
1645 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 Py_DECREF(text);
1647 if (newtext == NULL)
1648 return NULL;
1649 text = newtext;
1650 }
1651
Antoine Pitroue96ec682011-07-23 21:46:35 +02001652 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001653 text_needflush = 1;
1654 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001655 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001656 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 needflush = 1;
1658
1659 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001660 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001661 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1662 b = text;
1663 Py_INCREF(b);
1664 }
1665 else {
1666 b = (*self->encodefunc)((PyObject *) self, text);
1667 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001668 self->encoding_start_of_stream = 0;
1669 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 else
Petr Viktorinffd97532020-02-11 17:46:57 +01001671 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naokibfba8c32019-05-16 15:03:20 +09001672
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673 Py_DECREF(text);
1674 if (b == NULL)
1675 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001676 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001677 PyErr_Format(PyExc_TypeError,
1678 "encoder should return a bytes object, not '%.200s'",
1679 Py_TYPE(b)->tp_name);
1680 Py_DECREF(b);
1681 return NULL;
1682 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683
Inada Naokibfba8c32019-05-16 15:03:20 +09001684 Py_ssize_t bytes_len;
1685 if (b == text) {
1686 bytes_len = PyUnicode_GET_LENGTH(b);
1687 }
1688 else {
1689 bytes_len = PyBytes_GET_SIZE(b);
1690 }
1691
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001693 self->pending_bytes_count = 0;
1694 self->pending_bytes = b;
1695 }
1696 else if (!PyList_CheckExact(self->pending_bytes)) {
1697 PyObject *list = PyList_New(2);
1698 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 Py_DECREF(b);
1700 return NULL;
1701 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001702 PyList_SET_ITEM(list, 0, self->pending_bytes);
1703 PyList_SET_ITEM(list, 1, b);
1704 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001706 else {
1707 if (PyList_Append(self->pending_bytes, b) < 0) {
1708 Py_DECREF(b);
1709 return NULL;
1710 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001713
1714 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001715 if (self->pending_bytes_count > self->chunk_size || needflush ||
1716 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001717 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001718 return NULL;
1719 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001720
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001722 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 if (ret == NULL)
1724 return NULL;
1725 Py_DECREF(ret);
1726 }
1727
Zackery Spytz23db9352018-06-29 04:14:58 -06001728 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 Py_CLEAR(self->snapshot);
1730
1731 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001732 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001733 if (ret == NULL)
1734 return NULL;
1735 Py_DECREF(ret);
1736 }
1737
1738 return PyLong_FromSsize_t(textlen);
1739}
1740
1741/* Steal a reference to chars and store it in the decoded_char buffer;
1742 */
1743static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001744textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001746 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747 self->decoded_chars_used = 0;
1748}
1749
1750static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001751textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752{
1753 PyObject *chars;
1754 Py_ssize_t avail;
1755
1756 if (self->decoded_chars == NULL)
1757 return PyUnicode_FromStringAndSize(NULL, 0);
1758
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001759 /* decoded_chars is guaranteed to be "ready". */
1760 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 - self->decoded_chars_used);
1762
1763 assert(avail >= 0);
1764
1765 if (n < 0 || n > avail)
1766 n = avail;
1767
1768 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001769 chars = PyUnicode_Substring(self->decoded_chars,
1770 self->decoded_chars_used,
1771 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 if (chars == NULL)
1773 return NULL;
1774 }
1775 else {
1776 chars = self->decoded_chars;
1777 Py_INCREF(chars);
1778 }
1779
1780 self->decoded_chars_used += n;
1781 return chars;
1782}
1783
1784/* Read and decode the next chunk of data from the BufferedReader.
1785 */
1786static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001787textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788{
1789 PyObject *dec_buffer = NULL;
1790 PyObject *dec_flags = NULL;
1791 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001792 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001794 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795 int eof;
1796
1797 /* The return value is True unless EOF was reached. The decoded string is
1798 * placed in self._decoded_chars (replacing its previous value). The
1799 * entire input chunk is sent to the decoder, though some of it may remain
1800 * buffered in the decoder, yet to be converted.
1801 */
1802
1803 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001804 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 return -1;
1806 }
1807
1808 if (self->telling) {
1809 /* To prepare for tell(), we need to snapshot a point in the file
1810 * where the decoder's input buffer is empty.
1811 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001812 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001813 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001814 if (state == NULL)
1815 return -1;
1816 /* Given this, we know there was a valid snapshot point
1817 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1818 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001819 if (!PyTuple_Check(state)) {
1820 PyErr_SetString(PyExc_TypeError,
1821 "illegal decoder state");
1822 Py_DECREF(state);
1823 return -1;
1824 }
1825 if (!PyArg_ParseTuple(state,
1826 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1827 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 Py_DECREF(state);
1829 return -1;
1830 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001831
1832 if (!PyBytes_Check(dec_buffer)) {
1833 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001834 "illegal decoder state: the first item should be a "
1835 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001836 Py_TYPE(dec_buffer)->tp_name);
1837 Py_DECREF(state);
1838 return -1;
1839 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001840 Py_INCREF(dec_buffer);
1841 Py_INCREF(dec_flags);
1842 Py_DECREF(state);
1843 }
1844
1845 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001846 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001847 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001848 }
1849 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001850 if (chunk_size == NULL)
1851 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001852
Petr Viktorinffd97532020-02-11 17:46:57 +01001853 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001854 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001855 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856 Py_DECREF(chunk_size);
1857 if (input_chunk == NULL)
1858 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001859
1860 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001861 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001862 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001863 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1864 Py_TYPE(input_chunk)->tp_name);
1865 goto fail;
1866 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001867
Antoine Pitroub8503892014-04-29 10:14:02 +02001868 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001869 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870
INADA Naoki507434f2017-12-21 09:59:53 +09001871 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1872 PyBuffer_Release(&input_chunk_buf);
1873 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001874 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001875
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001876 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001877 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001878 if (nchars > 0)
1879 self->b2cratio = (double) nbytes / nchars;
1880 else
1881 self->b2cratio = 0.0;
1882 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 eof = 0;
1884
1885 if (self->telling) {
1886 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1887 * next input to be decoded is dec_buffer + input_chunk.
1888 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001889 PyObject *next_input = dec_buffer;
1890 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001891 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001892 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001893 goto fail;
1894 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001895 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1896 if (snapshot == NULL) {
1897 dec_flags = NULL;
1898 goto fail;
1899 }
1900 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001901 }
1902 Py_DECREF(input_chunk);
1903
1904 return (eof == 0);
1905
1906 fail:
1907 Py_XDECREF(dec_buffer);
1908 Py_XDECREF(dec_flags);
1909 Py_XDECREF(input_chunk);
1910 return -1;
1911}
1912
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001913/*[clinic input]
1914_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001915 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001916 /
1917[clinic start generated code]*/
1918
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001920_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001921/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923 PyObject *result = NULL, *chunks = NULL;
1924
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001925 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926 CHECK_CLOSED(self);
1927
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001928 if (self->decoder == NULL)
1929 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001930
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001931 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932 return NULL;
1933
1934 if (n < 0) {
1935 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001936 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 PyObject *decoded;
1938 if (bytes == NULL)
1939 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001940
Andy Lesterdffe4c02020-03-04 07:15:20 -06001941 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001942 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1943 bytes, 1);
1944 else
1945 decoded = PyObject_CallMethodObjArgs(
1946 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001948 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 goto fail;
1950
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001951 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952
1953 if (result == NULL) {
1954 Py_DECREF(decoded);
1955 return NULL;
1956 }
1957
1958 PyUnicode_AppendAndDel(&result, decoded);
1959 if (result == NULL)
1960 goto fail;
1961
Zackery Spytz23db9352018-06-29 04:14:58 -06001962 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 Py_CLEAR(self->snapshot);
1964 return result;
1965 }
1966 else {
1967 int res = 1;
1968 Py_ssize_t remaining = n;
1969
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001970 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971 if (result == NULL)
1972 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001973 if (PyUnicode_READY(result) == -1)
1974 goto fail;
1975 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976
1977 /* Keep reading chunks until we have n characters to return */
1978 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001979 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001980 if (res < 0) {
1981 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1982 when EINTR occurs so we needn't do it ourselves. */
1983 if (_PyIO_trap_eintr()) {
1984 continue;
1985 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001987 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988 if (res == 0) /* EOF */
1989 break;
1990 if (chunks == NULL) {
1991 chunks = PyList_New(0);
1992 if (chunks == NULL)
1993 goto fail;
1994 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001995 if (PyUnicode_GET_LENGTH(result) > 0 &&
1996 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 goto fail;
1998 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001999 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 if (result == NULL)
2001 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002002 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003 }
2004 if (chunks != NULL) {
2005 if (result != NULL && PyList_Append(chunks, result) < 0)
2006 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002007 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 if (result == NULL)
2009 goto fail;
2010 Py_CLEAR(chunks);
2011 }
2012 return result;
2013 }
2014 fail:
2015 Py_XDECREF(result);
2016 Py_XDECREF(chunks);
2017 return NULL;
2018}
2019
2020
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002021/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022 that is to the NUL character. Otherwise the function will produce
2023 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002024static const char *
2025find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002027 if (kind == PyUnicode_1BYTE_KIND) {
2028 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002029 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002030 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002032 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002033 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002034 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002035 return s;
2036 if (s == end)
2037 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002038 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 }
2040}
2041
2042Py_ssize_t
2043_PyIO_find_line_ending(
2044 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002045 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046{
Andy Lestere6be9b52020-02-11 20:28:35 -06002047 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048
2049 if (translated) {
2050 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002051 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002053 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 else {
2055 *consumed = len;
2056 return -1;
2057 }
2058 }
2059 else if (universal) {
2060 /* Universal newline search. Find any of \r, \r\n, \n
2061 * The decoder ensures that \r\n are not split in two pieces
2062 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002063 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002065 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002067 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002068 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002069 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070 if (s >= end) {
2071 *consumed = len;
2072 return -1;
2073 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002074 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002075 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002077 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002079 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002080 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002082 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 }
2084 }
2085 }
2086 else {
2087 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002088 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002089 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002090 /* Assume that readnl is an ASCII character. */
2091 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002093 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002094 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002095 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 *consumed = len;
2097 return -1;
2098 }
2099 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002100 const char *s = start;
2101 const char *e = end - (readnl_len - 1)*kind;
2102 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 if (e < s)
2104 e = s;
2105 while (s < e) {
2106 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002107 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 if (pos == NULL || pos >= e)
2109 break;
2110 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002111 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002112 break;
2113 }
2114 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002115 return (pos - start)/kind + readnl_len;
2116 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002118 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 if (pos == NULL)
2120 *consumed = len;
2121 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002122 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 return -1;
2124 }
2125 }
2126}
2127
2128static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002129_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130{
2131 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2132 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2133 int res;
2134
2135 CHECK_CLOSED(self);
2136
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002137 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138 return NULL;
2139
2140 chunked = 0;
2141
2142 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002143 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002144 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002145 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002146 Py_ssize_t consumed = 0;
2147
2148 /* First, get some data if necessary */
2149 res = 1;
2150 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002151 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002152 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002153 if (res < 0) {
2154 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2155 when EINTR occurs so we needn't do it ourselves. */
2156 if (_PyIO_trap_eintr()) {
2157 continue;
2158 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002159 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002160 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161 if (res == 0)
2162 break;
2163 }
2164 if (res == 0) {
2165 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002166 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 Py_CLEAR(self->snapshot);
2168 start = endpos = offset_to_buffer = 0;
2169 break;
2170 }
2171
2172 if (remaining == NULL) {
2173 line = self->decoded_chars;
2174 start = self->decoded_chars_used;
2175 offset_to_buffer = 0;
2176 Py_INCREF(line);
2177 }
2178 else {
2179 assert(self->decoded_chars_used == 0);
2180 line = PyUnicode_Concat(remaining, self->decoded_chars);
2181 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002182 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183 Py_CLEAR(remaining);
2184 if (line == NULL)
2185 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002186 if (PyUnicode_READY(line) == -1)
2187 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188 }
2189
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002190 ptr = PyUnicode_DATA(line);
2191 line_len = PyUnicode_GET_LENGTH(line);
2192 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193
2194 endpos = _PyIO_find_line_ending(
2195 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002196 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002197 ptr + kind * start,
2198 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002199 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200 if (endpos >= 0) {
2201 endpos += start;
2202 if (limit >= 0 && (endpos - start) + chunked >= limit)
2203 endpos = start + limit - chunked;
2204 break;
2205 }
2206
2207 /* We can put aside up to `endpos` */
2208 endpos = consumed + start;
2209 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2210 /* Didn't find line ending, but reached length limit */
2211 endpos = start + limit - chunked;
2212 break;
2213 }
2214
2215 if (endpos > start) {
2216 /* No line ending seen yet - put aside current data */
2217 PyObject *s;
2218 if (chunks == NULL) {
2219 chunks = PyList_New(0);
2220 if (chunks == NULL)
2221 goto error;
2222 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002223 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224 if (s == NULL)
2225 goto error;
2226 if (PyList_Append(chunks, s) < 0) {
2227 Py_DECREF(s);
2228 goto error;
2229 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002230 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 Py_DECREF(s);
2232 }
2233 /* There may be some remaining bytes we'll have to prepend to the
2234 next chunk of data */
2235 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002236 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237 if (remaining == NULL)
2238 goto error;
2239 }
2240 Py_CLEAR(line);
2241 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002242 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 }
2244
2245 if (line != NULL) {
2246 /* Our line ends in the current buffer */
2247 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002248 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2249 PyObject *s = PyUnicode_Substring(line, start, endpos);
2250 Py_CLEAR(line);
2251 if (s == NULL)
2252 goto error;
2253 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002254 }
2255 }
2256 if (remaining != NULL) {
2257 if (chunks == NULL) {
2258 chunks = PyList_New(0);
2259 if (chunks == NULL)
2260 goto error;
2261 }
2262 if (PyList_Append(chunks, remaining) < 0)
2263 goto error;
2264 Py_CLEAR(remaining);
2265 }
2266 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002267 if (line != NULL) {
2268 if (PyList_Append(chunks, line) < 0)
2269 goto error;
2270 Py_DECREF(line);
2271 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2273 if (line == NULL)
2274 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002275 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002276 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002277 if (line == NULL) {
2278 Py_INCREF(_PyIO_empty_str);
2279 line = _PyIO_empty_str;
2280 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281
2282 return line;
2283
2284 error:
2285 Py_XDECREF(chunks);
2286 Py_XDECREF(remaining);
2287 Py_XDECREF(line);
2288 return NULL;
2289}
2290
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002291/*[clinic input]
2292_io.TextIOWrapper.readline
2293 size: Py_ssize_t = -1
2294 /
2295[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002297static PyObject *
2298_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2299/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2300{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002301 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002302 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002303}
2304
2305/* Seek and Tell */
2306
2307typedef struct {
2308 Py_off_t start_pos;
2309 int dec_flags;
2310 int bytes_to_feed;
2311 int chars_to_skip;
2312 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002313} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314
2315/*
2316 To speed up cookie packing/unpacking, we store the fields in a temporary
2317 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2318 The following macros define at which offsets in the intermediary byte
2319 string the various CookieStruct fields will be stored.
2320 */
2321
2322#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2323
Christian Heimes743e0cd2012-10-17 23:52:17 +02002324#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325/* We want the least significant byte of start_pos to also be the least
2326 significant byte of the cookie, which means that in big-endian mode we
2327 must copy the fields in reverse order. */
2328
2329# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2330# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2331# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2332# define OFF_CHARS_TO_SKIP (sizeof(char))
2333# define OFF_NEED_EOF 0
2334
2335#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336/* Little-endian mode: the least significant byte of start_pos will
2337 naturally end up the least significant byte of the cookie. */
2338
2339# define OFF_START_POS 0
2340# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2341# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2342# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2343# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2344
2345#endif
2346
2347static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002348textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349{
2350 unsigned char buffer[COOKIE_BUF_LEN];
2351 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2352 if (cookieLong == NULL)
2353 return -1;
2354
2355 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002356 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357 Py_DECREF(cookieLong);
2358 return -1;
2359 }
2360 Py_DECREF(cookieLong);
2361
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002362 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2363 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2364 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2365 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2366 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002367
2368 return 0;
2369}
2370
2371static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002372textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373{
2374 unsigned char buffer[COOKIE_BUF_LEN];
2375
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002376 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2377 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2378 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2379 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2380 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002381
Christian Heimes743e0cd2012-10-17 23:52:17 +02002382 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2383 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385
2386static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002387_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388{
2389 PyObject *res;
2390 /* When seeking to the start of the stream, we call decoder.reset()
2391 rather than decoder.getstate().
2392 This is for a few decoders such as utf-16 for which the state value
2393 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2394 utf-16, that we are expecting a BOM).
2395 */
2396 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002397 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002399 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2400 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401 if (res == NULL)
2402 return -1;
2403 Py_DECREF(res);
2404 return 0;
2405}
2406
Antoine Pitroue4501852009-05-14 18:55:55 +00002407static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002408_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002409{
2410 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002411 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002412 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002413 self->encoding_start_of_stream = 1;
2414 }
2415 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002416 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002417 _PyLong_Zero);
Antoine Pitroue4501852009-05-14 18:55:55 +00002418 self->encoding_start_of_stream = 0;
2419 }
2420 if (res == NULL)
2421 return -1;
2422 Py_DECREF(res);
2423 return 0;
2424}
2425
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002426static int
2427_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2428{
2429 /* Same as _textiowrapper_decoder_setstate() above. */
2430 return _textiowrapper_encoder_reset(
2431 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2432}
2433
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002434/*[clinic input]
2435_io.TextIOWrapper.seek
2436 cookie as cookieObj: object
2437 whence: int = 0
2438 /
2439[clinic start generated code]*/
2440
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002441static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002442_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2443/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002444{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002445 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002446 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002447 PyObject *res;
2448 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002449 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002451 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452 CHECK_CLOSED(self);
2453
2454 Py_INCREF(cookieObj);
2455
2456 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002457 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 goto fail;
2459 }
2460
ngie-eign848037c2019-03-02 23:28:26 -08002461 switch (whence) {
2462 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002464 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465 if (cmp < 0)
2466 goto fail;
2467
2468 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002469 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470 goto fail;
2471 }
2472
2473 /* Seeking to the current position should attempt to
2474 * sync the underlying buffer with the current position.
2475 */
2476 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002477 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 if (cookieObj == NULL)
2479 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002480 break;
2481
ngie-eign848037c2019-03-02 23:28:26 -08002482 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002484 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 if (cmp < 0)
2486 goto fail;
2487
2488 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002489 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 goto fail;
2491 }
2492
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002493 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 if (res == NULL)
2495 goto fail;
2496 Py_DECREF(res);
2497
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002498 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499 Py_CLEAR(self->snapshot);
2500 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002501 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 if (res == NULL)
2503 goto fail;
2504 Py_DECREF(res);
2505 }
2506
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002507 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002508 Py_CLEAR(cookieObj);
2509 if (res == NULL)
2510 goto fail;
2511 if (self->encoder) {
2512 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002513 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002514 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2515 Py_DECREF(res);
2516 goto fail;
2517 }
2518 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002519 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002520
ngie-eign848037c2019-03-02 23:28:26 -08002521 case SEEK_SET:
2522 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002523
ngie-eign848037c2019-03-02 23:28:26 -08002524 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002526 "invalid whence (%d, should be %d, %d or %d)", whence,
2527 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528 goto fail;
2529 }
2530
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002531 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 if (cmp < 0)
2533 goto fail;
2534
2535 if (cmp == 1) {
2536 PyErr_Format(PyExc_ValueError,
2537 "negative seek position %R", cookieObj);
2538 goto fail;
2539 }
2540
Petr Viktorinffd97532020-02-11 17:46:57 +01002541 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542 if (res == NULL)
2543 goto fail;
2544 Py_DECREF(res);
2545
2546 /* The strategy of seek() is to go back to the safe start point
2547 * and replay the effect of read(chars_to_skip) from there.
2548 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002549 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550 goto fail;
2551
2552 /* Seek back to the safe start point. */
2553 posobj = PyLong_FromOff_t(cookie.start_pos);
2554 if (posobj == NULL)
2555 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002556 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 Py_DECREF(posobj);
2558 if (res == NULL)
2559 goto fail;
2560 Py_DECREF(res);
2561
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 Py_CLEAR(self->snapshot);
2564
2565 /* Restore the decoder to its state from the safe start point. */
2566 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002567 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568 goto fail;
2569 }
2570
2571 if (cookie.chars_to_skip) {
2572 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002573 PyObject *input_chunk = _PyObject_CallMethodId(
2574 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575 PyObject *decoded;
2576
2577 if (input_chunk == NULL)
2578 goto fail;
2579
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002580 if (!PyBytes_Check(input_chunk)) {
2581 PyErr_Format(PyExc_TypeError,
2582 "underlying read() should have returned a bytes "
2583 "object, not '%.200s'",
2584 Py_TYPE(input_chunk)->tp_name);
2585 Py_DECREF(input_chunk);
2586 goto fail;
2587 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002589 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2590 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591 goto fail;
2592 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002593 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002595 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2596 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002598 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599 goto fail;
2600
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002601 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602
2603 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002604 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002605 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606 goto fail;
2607 }
2608 self->decoded_chars_used = cookie.chars_to_skip;
2609 }
2610 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002611 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2612 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002613 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002614 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615 }
2616
Antoine Pitroue4501852009-05-14 18:55:55 +00002617 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2618 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002620 goto fail;
2621 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622 return cookieObj;
2623 fail:
2624 Py_XDECREF(cookieObj);
2625 return NULL;
2626
2627}
2628
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002629/*[clinic input]
2630_io.TextIOWrapper.tell
2631[clinic start generated code]*/
2632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002634_io_TextIOWrapper_tell_impl(textio *self)
2635/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636{
2637 PyObject *res;
2638 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002639 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640 PyObject *next_input;
2641 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002642 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002644 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002645 Py_ssize_t dec_buffer_len;
2646 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002648 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 CHECK_CLOSED(self);
2650
2651 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002652 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653 goto fail;
2654 }
2655 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002656 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657 "telling position disabled by next() call");
2658 goto fail;
2659 }
2660
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002663 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 if (res == NULL)
2665 goto fail;
2666 Py_DECREF(res);
2667
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002668 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669 if (posobj == NULL)
2670 goto fail;
2671
2672 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002673 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 return posobj;
2675 }
2676
2677#if defined(HAVE_LARGEFILE_SUPPORT)
2678 cookie.start_pos = PyLong_AsLongLong(posobj);
2679#else
2680 cookie.start_pos = PyLong_AsLong(posobj);
2681#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002682 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002683 if (PyErr_Occurred())
2684 goto fail;
2685
2686 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002687 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002688 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689 goto fail;
2690
2691 assert (PyBytes_Check(next_input));
2692
2693 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2694
2695 /* How many decoded characters have been used up since the snapshot? */
2696 if (self->decoded_chars_used == 0) {
2697 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699 }
2700
2701 chars_to_skip = self->decoded_chars_used;
2702
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002703 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002704 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002705 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 if (saved_state == NULL)
2707 goto fail;
2708
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002709#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002710 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002711 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002712 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002713 if (_state == NULL) \
2714 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002715 if (!PyTuple_Check(_state)) { \
2716 PyErr_SetString(PyExc_TypeError, \
2717 "illegal decoder state"); \
2718 Py_DECREF(_state); \
2719 goto fail; \
2720 } \
2721 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2722 &dec_buffer, &dec_flags)) \
2723 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002724 Py_DECREF(_state); \
2725 goto fail; \
2726 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002727 if (!PyBytes_Check(dec_buffer)) { \
2728 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002729 "illegal decoder state: the first item should be a " \
2730 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002731 Py_TYPE(dec_buffer)->tp_name); \
2732 Py_DECREF(_state); \
2733 goto fail; \
2734 } \
2735 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002736 Py_DECREF(_state); \
2737 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002738
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002739#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002740 PyObject *_decoded = _PyObject_CallMethodId( \
2741 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002742 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002743 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002744 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002745 Py_DECREF(_decoded); \
2746 } while (0)
2747
2748 /* Fast search for an acceptable start point, close to our
2749 current pos */
2750 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2751 skip_back = 1;
2752 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2753 input = PyBytes_AS_STRING(next_input);
2754 while (skip_bytes > 0) {
2755 /* Decode up to temptative start point */
2756 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2757 goto fail;
2758 DECODER_DECODE(input, skip_bytes, chars_decoded);
2759 if (chars_decoded <= chars_to_skip) {
2760 DECODER_GETSTATE();
2761 if (dec_buffer_len == 0) {
2762 /* Before pos and no bytes buffered in decoder => OK */
2763 cookie.dec_flags = dec_flags;
2764 chars_to_skip -= chars_decoded;
2765 break;
2766 }
2767 /* Skip back by buffered amount and reset heuristic */
2768 skip_bytes -= dec_buffer_len;
2769 skip_back = 1;
2770 }
2771 else {
2772 /* We're too far ahead, skip back a bit */
2773 skip_bytes -= skip_back;
2774 skip_back *= 2;
2775 }
2776 }
2777 if (skip_bytes <= 0) {
2778 skip_bytes = 0;
2779 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2780 goto fail;
2781 }
2782
2783 /* Note our initial start point. */
2784 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002785 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002786 if (chars_to_skip == 0)
2787 goto finally;
2788
2789 /* We should be close to the desired position. Now feed the decoder one
2790 * byte at a time until we reach the `chars_to_skip` target.
2791 * As we go, note the nearest "safe start point" before the current
2792 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002793 * can safely start from there and advance to this location).
2794 */
2795 chars_decoded = 0;
2796 input = PyBytes_AS_STRING(next_input);
2797 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002798 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002799 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002800 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002801
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002802 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002803 /* We got n chars for 1 byte */
2804 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002805 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002806 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002807
2808 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2809 /* Decoder buffer is empty, so this is a safe start point. */
2810 cookie.start_pos += cookie.bytes_to_feed;
2811 chars_to_skip -= chars_decoded;
2812 cookie.dec_flags = dec_flags;
2813 cookie.bytes_to_feed = 0;
2814 chars_decoded = 0;
2815 }
2816 if (chars_decoded >= chars_to_skip)
2817 break;
2818 input++;
2819 }
2820 if (input == input_end) {
2821 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002822 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002823 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002824 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002825 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002826 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002827 Py_DECREF(decoded);
2828 cookie.need_eof = 1;
2829
2830 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002831 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002832 "can't reconstruct logical file position");
2833 goto fail;
2834 }
2835 }
2836
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002837finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002838 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002839 Py_DECREF(saved_state);
2840 if (res == NULL)
2841 return NULL;
2842 Py_DECREF(res);
2843
2844 /* The returned cookie corresponds to the last safe start point. */
2845 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002846 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002847
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002848fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002849 if (saved_state) {
2850 PyObject *type, *value, *traceback;
2851 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002852 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002853 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002854 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002855 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002856 }
2857 return NULL;
2858}
2859
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002860/*[clinic input]
2861_io.TextIOWrapper.truncate
2862 pos: object = None
2863 /
2864[clinic start generated code]*/
2865
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002866static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002867_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2868/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002870 PyObject *res;
2871
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002872 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002873
Petr Viktorinffd97532020-02-11 17:46:57 +01002874 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002875 if (res == NULL)
2876 return NULL;
2877 Py_DECREF(res);
2878
Petr Viktorinffd97532020-02-11 17:46:57 +01002879 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880}
2881
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002882static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002883textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002884{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002885 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002886 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002887
2888 CHECK_INITIALIZED(self);
2889
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002890 res = PyUnicode_FromString("<_io.TextIOWrapper");
2891 if (res == NULL)
2892 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002893
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002894 status = Py_ReprEnter((PyObject *)self);
2895 if (status != 0) {
2896 if (status > 0) {
2897 PyErr_Format(PyExc_RuntimeError,
2898 "reentrant call inside %s.__repr__",
2899 Py_TYPE(self)->tp_name);
2900 }
2901 goto error;
2902 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002903 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2904 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002905 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002906 }
2907 /* Ignore ValueError raised if the underlying stream was detached */
2908 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002909 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002910 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002911 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002912 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002913 if (s == NULL)
2914 goto error;
2915 PyUnicode_AppendAndDel(&res, s);
2916 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002917 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002918 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002919 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2920 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002921 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002922 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002923 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2924 Py_DECREF(modeobj);
2925 if (s == NULL)
2926 goto error;
2927 PyUnicode_AppendAndDel(&res, s);
2928 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002929 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002930 }
2931 s = PyUnicode_FromFormat("%U encoding=%R>",
2932 res, self->encoding);
2933 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002934 if (status == 0) {
2935 Py_ReprLeave((PyObject *)self);
2936 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002937 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002938
2939 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002940 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002941 if (status == 0) {
2942 Py_ReprLeave((PyObject *)self);
2943 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002944 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002945}
2946
2947
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002948/* Inquiries */
2949
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002950/*[clinic input]
2951_io.TextIOWrapper.fileno
2952[clinic start generated code]*/
2953
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002954static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002955_io_TextIOWrapper_fileno_impl(textio *self)
2956/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002957{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002958 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002959 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002960}
2961
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002962/*[clinic input]
2963_io.TextIOWrapper.seekable
2964[clinic start generated code]*/
2965
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002966static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002967_io_TextIOWrapper_seekable_impl(textio *self)
2968/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002969{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002970 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002971 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002972}
2973
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002974/*[clinic input]
2975_io.TextIOWrapper.readable
2976[clinic start generated code]*/
2977
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002978static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002979_io_TextIOWrapper_readable_impl(textio *self)
2980/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002981{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002982 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002983 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002984}
2985
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002986/*[clinic input]
2987_io.TextIOWrapper.writable
2988[clinic start generated code]*/
2989
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002990static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002991_io_TextIOWrapper_writable_impl(textio *self)
2992/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002993{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002994 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002995 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002996}
2997
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002998/*[clinic input]
2999_io.TextIOWrapper.isatty
3000[clinic start generated code]*/
3001
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003002static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003003_io_TextIOWrapper_isatty_impl(textio *self)
3004/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003005{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003006 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003007 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003008}
3009
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003010/*[clinic input]
3011_io.TextIOWrapper.flush
3012[clinic start generated code]*/
3013
Antoine Pitrou243757e2010-11-05 21:15:39 +00003014static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003015_io_TextIOWrapper_flush_impl(textio *self)
3016/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003017{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003018 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003019 CHECK_CLOSED(self);
3020 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003021 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003022 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003023 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003024}
3025
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003026/*[clinic input]
3027_io.TextIOWrapper.close
3028[clinic start generated code]*/
3029
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003030static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003031_io_TextIOWrapper_close_impl(textio *self)
3032/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003033{
3034 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003035 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003036 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037
Antoine Pitrou6be88762010-05-03 16:48:20 +00003038 res = textiowrapper_closed_get(self, NULL);
3039 if (res == NULL)
3040 return NULL;
3041 r = PyObject_IsTrue(res);
3042 Py_DECREF(res);
3043 if (r < 0)
3044 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003045
Antoine Pitrou6be88762010-05-03 16:48:20 +00003046 if (r > 0) {
3047 Py_RETURN_NONE; /* stream already closed */
3048 }
3049 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003050 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003051 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003052 res = _PyObject_CallMethodIdOneArg(self->buffer,
3053 &PyId__dealloc_warn,
3054 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003055 if (res)
3056 Py_DECREF(res);
3057 else
3058 PyErr_Clear();
3059 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003060 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003061 if (res == NULL)
3062 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003063 else
3064 Py_DECREF(res);
3065
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003066 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003067 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003068 _PyErr_ChainExceptions(exc, val, tb);
3069 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003070 }
3071 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003072 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003073}
3074
3075static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003076textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003077{
3078 PyObject *line;
3079
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003080 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003081
3082 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003083 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003084 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003085 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003086 }
3087 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003088 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003089 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003090 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003091 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003092 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003093 "not '%.200s'", Py_TYPE(line)->tp_name);
3094 Py_DECREF(line);
3095 return NULL;
3096 }
3097 }
3098
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003099 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003100 return NULL;
3101
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003102 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003103 /* Reached EOF or would have blocked */
3104 Py_DECREF(line);
3105 Py_CLEAR(self->snapshot);
3106 self->telling = self->seekable;
3107 return NULL;
3108 }
3109
3110 return line;
3111}
3112
3113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003114textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003115{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003116 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003117 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003118}
3119
3120static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003121textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003122{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003123 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003124 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3125}
3126
3127static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003128textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003129{
3130 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003131 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003132 if (self->decoder == NULL ||
3133 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3134 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003135 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003136 }
3137 return res;
3138}
3139
3140static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003141textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003142{
3143 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003144 Py_INCREF(self->errors);
3145 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003146}
3147
3148static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003149textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003150{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003151 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003152 return PyLong_FromSsize_t(self->chunk_size);
3153}
3154
3155static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003156textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003157{
3158 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003159 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003160 if (arg == NULL) {
3161 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3162 return -1;
3163 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003164 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003165 if (n == -1 && PyErr_Occurred())
3166 return -1;
3167 if (n <= 0) {
3168 PyErr_SetString(PyExc_ValueError,
3169 "a strictly positive integer is required");
3170 return -1;
3171 }
3172 self->chunk_size = n;
3173 return 0;
3174}
3175
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003176#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003177
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003178static PyMethodDef incrementalnewlinedecoder_methods[] = {
3179 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3180 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3181 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3182 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3183 {NULL}
3184};
3185
3186static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3187 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3188 {NULL}
3189};
3190
3191PyTypeObject PyIncrementalNewlineDecoder_Type = {
3192 PyVarObject_HEAD_INIT(NULL, 0)
3193 "_io.IncrementalNewlineDecoder", /*tp_name*/
3194 sizeof(nldecoder_object), /*tp_basicsize*/
3195 0, /*tp_itemsize*/
3196 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003197 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003198 0, /*tp_getattr*/
3199 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003200 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003201 0, /*tp_repr*/
3202 0, /*tp_as_number*/
3203 0, /*tp_as_sequence*/
3204 0, /*tp_as_mapping*/
3205 0, /*tp_hash */
3206 0, /*tp_call*/
3207 0, /*tp_str*/
3208 0, /*tp_getattro*/
3209 0, /*tp_setattro*/
3210 0, /*tp_as_buffer*/
3211 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3212 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3213 0, /* tp_traverse */
3214 0, /* tp_clear */
3215 0, /* tp_richcompare */
3216 0, /*tp_weaklistoffset*/
3217 0, /* tp_iter */
3218 0, /* tp_iternext */
3219 incrementalnewlinedecoder_methods, /* tp_methods */
3220 0, /* tp_members */
3221 incrementalnewlinedecoder_getset, /* tp_getset */
3222 0, /* tp_base */
3223 0, /* tp_dict */
3224 0, /* tp_descr_get */
3225 0, /* tp_descr_set */
3226 0, /* tp_dictoffset */
3227 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3228 0, /* tp_alloc */
3229 PyType_GenericNew, /* tp_new */
3230};
3231
3232
3233static PyMethodDef textiowrapper_methods[] = {
3234 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003235 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003236 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3237 _IO_TEXTIOWRAPPER_READ_METHODDEF
3238 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3239 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3240 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3241
3242 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3243 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3244 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3245 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3246 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003247
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003248 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3249 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3250 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003251 {NULL, NULL}
3252};
3253
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003254static PyMemberDef textiowrapper_members[] = {
3255 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3256 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3257 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003258 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003259 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003260 {NULL}
3261};
3262
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003263static PyGetSetDef textiowrapper_getset[] = {
3264 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3265 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003266/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3267*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003268 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3269 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3270 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3271 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003272 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003273};
3274
3275PyTypeObject PyTextIOWrapper_Type = {
3276 PyVarObject_HEAD_INIT(NULL, 0)
3277 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003278 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003279 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003280 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003281 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003282 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003283 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003284 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003285 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003286 0, /*tp_as_number*/
3287 0, /*tp_as_sequence*/
3288 0, /*tp_as_mapping*/
3289 0, /*tp_hash */
3290 0, /*tp_call*/
3291 0, /*tp_str*/
3292 0, /*tp_getattro*/
3293 0, /*tp_setattro*/
3294 0, /*tp_as_buffer*/
3295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003296 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003297 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003298 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3299 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003300 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003301 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003302 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003303 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3304 textiowrapper_methods, /* tp_methods */
3305 textiowrapper_members, /* tp_members */
3306 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003307 0, /* tp_base */
3308 0, /* tp_dict */
3309 0, /* tp_descr_get */
3310 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003311 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003312 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003313 0, /* tp_alloc */
3314 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003315 0, /* tp_free */
3316 0, /* tp_is_gc */
3317 0, /* tp_bases */
3318 0, /* tp_mro */
3319 0, /* tp_cache */
3320 0, /* tp_subclasses */
3321 0, /* tp_weaklist */
3322 0, /* tp_del */
3323 0, /* tp_version_tag */
3324 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003325};