blob: f2c72ebd516589a0b838d925a156d11c8c0e9c67 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020011#include "pycore_interp.h" // PyInterpreterState.fs_codec
Victor Stinnerbcda8f12018-11-21 22:27:47 +010012#include "pycore_object.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020013#include "pycore_pystate.h" // _PyInterpreterState_GET()
14#include "structmember.h" // PyMemberDef
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000015#include "_iomodule.h"
16
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030017/*[clinic input]
18module _io
19class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
20class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
21[clinic start generated code]*/
22/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
23
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(close);
25_Py_IDENTIFIER(_dealloc_warn);
26_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(fileno);
28_Py_IDENTIFIER(flush);
29_Py_IDENTIFIER(getpreferredencoding);
30_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020031_Py_IDENTIFIER(mode);
32_Py_IDENTIFIER(name);
33_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020034_Py_IDENTIFIER(read);
35_Py_IDENTIFIER(readable);
36_Py_IDENTIFIER(replace);
37_Py_IDENTIFIER(reset);
38_Py_IDENTIFIER(seek);
39_Py_IDENTIFIER(seekable);
40_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090041_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020042_Py_IDENTIFIER(tell);
43_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020044
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045/* TextIOBase */
46
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000047PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000048 "Base class for text I/O.\n"
49 "\n"
50 "This class provides a character and line based interface to stream\n"
51 "I/O. There is no readinto method because Python's character strings\n"
52 "are immutable. There is no public constructor.\n"
53 );
54
55static PyObject *
56_unsupported(const char *message)
57{
Antoine Pitrou712cb732013-12-21 15:51:54 +010058 _PyIO_State *state = IO_STATE();
59 if (state != NULL)
60 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000061 return NULL;
62}
63
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000065 "Separate the underlying buffer from the TextIOBase and return it.\n"
66 "\n"
67 "After the underlying buffer has been detached, the TextIO is in an\n"
68 "unusable state.\n"
69 );
70
71static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053072textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000073{
74 return _unsupported("detach");
75}
76
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000077PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000078 "Read at most n characters from stream.\n"
79 "\n"
80 "Read from underlying buffer until we have n characters or we hit EOF.\n"
81 "If n is negative or omitted, read until EOF.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("read");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Read until newline or EOF.\n"
92 "\n"
93 "Returns an empty string if EOF is hit immediately.\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("readline");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Write string to stream.\n"
104 "Returns the number of characters written (which is always equal to\n"
105 "the length of the string).\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 return _unsupported("write");
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Encoding of the text stream.\n"
116 "\n"
117 "Subclasses should override.\n"
118 );
119
120static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000121textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000122{
123 Py_RETURN_NONE;
124}
125
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000126PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000127 "Line endings translated so far.\n"
128 "\n"
129 "Only line endings translated during reading are considered.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000140PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000141 "The error setting of the decoder or encoder.\n"
142 "\n"
143 "Subclasses should override.\n"
144 );
145
146static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000147textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000148{
149 Py_RETURN_NONE;
150}
151
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000152
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530154 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000155 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
156 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
157 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000158 {NULL, NULL}
159};
160
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000161static PyGetSetDef textiobase_getset[] = {
162 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
163 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
164 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000165 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166};
167
168PyTypeObject PyTextIOBase_Type = {
169 PyVarObject_HEAD_INIT(NULL, 0)
170 "_io._TextIOBase", /*tp_name*/
171 0, /*tp_basicsize*/
172 0, /*tp_itemsize*/
173 0, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200174 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 0, /*tp_getattr*/
176 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200177 0, /*tp_as_async*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /*tp_repr*/
179 0, /*tp_as_number*/
180 0, /*tp_as_sequence*/
181 0, /*tp_as_mapping*/
182 0, /*tp_hash */
183 0, /*tp_call*/
184 0, /*tp_str*/
185 0, /*tp_getattro*/
186 0, /*tp_setattro*/
187 0, /*tp_as_buffer*/
Antoine Pitrouada319b2019-05-29 22:12:38 +0200188 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 0, /* tp_traverse */
191 0, /* tp_clear */
192 0, /* tp_richcompare */
193 0, /* tp_weaklistoffset */
194 0, /* tp_iter */
195 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000198 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000199 &PyIOBase_Type, /* tp_base */
200 0, /* tp_dict */
201 0, /* tp_descr_get */
202 0, /* tp_descr_set */
203 0, /* tp_dictoffset */
204 0, /* tp_init */
205 0, /* tp_alloc */
206 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200207 0, /* tp_free */
208 0, /* tp_is_gc */
209 0, /* tp_bases */
210 0, /* tp_mro */
211 0, /* tp_cache */
212 0, /* tp_subclasses */
213 0, /* tp_weaklist */
214 0, /* tp_del */
215 0, /* tp_version_tag */
216 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000217};
218
219
220/* IncrementalNewlineDecoder */
221
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222typedef struct {
223 PyObject_HEAD
224 PyObject *decoder;
225 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200226 unsigned int pendingcr: 1;
227 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000228 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000229} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000230
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300231/*[clinic input]
232_io.IncrementalNewlineDecoder.__init__
233 decoder: object
234 translate: int
235 errors: object(c_default="NULL") = "strict"
236
237Codec used when reading a file in universal newlines mode.
238
239It wraps another incremental decoder, translating \r\n and \r into \n.
240It also records the types of newlines encountered. When used with
241translate=False, it ensures that the newline sequence is returned in
242one piece. When used with decoder=None, it expects unicode strings as
243decode input and translates newlines without first invoking an external
244decoder.
245[clinic start generated code]*/
246
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000247static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300248_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
249 PyObject *decoder, int translate,
250 PyObject *errors)
251/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000253 self->decoder = decoder;
254 Py_INCREF(decoder);
255
256 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900257 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 if (self->errors == NULL)
259 return -1;
260 }
261 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000262 self->errors = errors;
263 }
INADA Naoki507434f2017-12-21 09:59:53 +0900264 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265
Xiang Zhangb08746b2018-10-31 19:49:16 +0800266 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 self->seennl = 0;
268 self->pendingcr = 0;
269
270 return 0;
271}
272
273static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000274incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000275{
276 Py_CLEAR(self->decoder);
277 Py_CLEAR(self->errors);
278 Py_TYPE(self)->tp_free((PyObject *)self);
279}
280
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200281static int
282check_decoded(PyObject *decoded)
283{
284 if (decoded == NULL)
285 return -1;
286 if (!PyUnicode_Check(decoded)) {
287 PyErr_Format(PyExc_TypeError,
288 "decoder should return a string result, not '%.200s'",
289 Py_TYPE(decoded)->tp_name);
290 Py_DECREF(decoded);
291 return -1;
292 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200293 if (PyUnicode_READY(decoded) < 0) {
294 Py_DECREF(decoded);
295 return -1;
296 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200297 return 0;
298}
299
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000300#define SEEN_CR 1
301#define SEEN_LF 2
302#define SEEN_CRLF 4
303#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
304
305PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200306_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000307 PyObject *input, int final)
308{
309 PyObject *output;
310 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200311 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312
313 if (self->decoder == NULL) {
314 PyErr_SetString(PyExc_ValueError,
315 "IncrementalNewlineDecoder.__init__ not called");
316 return NULL;
317 }
318
319 /* decode input (with the eventual \r from a previous pass) */
320 if (self->decoder != Py_None) {
321 output = PyObject_CallMethodObjArgs(self->decoder,
322 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
323 }
324 else {
325 output = input;
326 Py_INCREF(output);
327 }
328
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200329 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000330 return NULL;
331
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200334 /* Prefix output with CR */
335 int kind;
336 PyObject *modified;
337 char *out;
338
339 modified = PyUnicode_New(output_len + 1,
340 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 if (modified == NULL)
342 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200343 kind = PyUnicode_KIND(modified);
344 out = PyUnicode_DATA(modified);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300345 PyUnicode_WRITE(kind, out, 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200346 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000349 self->pendingcr = 0;
350 output_len++;
351 }
352
353 /* retain last \r even when not translating data:
354 * then readline() is sure to get \r\n in one pass
355 */
356 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000357 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200358 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
359 {
360 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
361 if (modified == NULL)
362 goto error;
363 Py_DECREF(output);
364 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 self->pendingcr = 1;
366 }
367 }
368
369 /* Record which newlines are read and do newline translation if desired,
370 all in one pass. */
371 {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300372 const void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373 Py_ssize_t len;
374 int seennl = self->seennl;
375 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200378 in_str = PyUnicode_DATA(output);
379 len = PyUnicode_GET_LENGTH(output);
380 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381
382 if (len == 0)
383 return output;
384
385 /* If, up to now, newlines are consistently \n, do a quick check
386 for the \r *byte* with the libc's optimized memchr.
387 */
388 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000390 }
391
Antoine Pitrou66913e22009-03-06 23:40:56 +0000392 if (only_lf) {
393 /* If not already seen, quick scan for a possible "\n" character.
394 (there's nothing else to be done, even when in translation mode)
395 */
396 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200397 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100398 if (kind == PyUnicode_1BYTE_KIND)
399 seennl |= SEEN_LF;
400 else {
401 Py_ssize_t i = 0;
402 for (;;) {
403 Py_UCS4 c;
404 /* Fast loop for non-control characters */
405 while (PyUnicode_READ(kind, in_str, i) > '\n')
406 i++;
407 c = PyUnicode_READ(kind, in_str, i++);
408 if (c == '\n') {
409 seennl |= SEEN_LF;
410 break;
411 }
412 if (i >= len)
413 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 }
416 }
417 /* Finished: we have scanned for newlines, and none of them
418 need translating */
419 }
420 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000422 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (seennl == SEEN_ALL)
424 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 while (PyUnicode_READ(kind, in_str, i) > '\r')
429 i++;
430 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000431 if (c == '\n')
432 seennl |= SEEN_LF;
433 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 }
438 else
439 seennl |= SEEN_CR;
440 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 break;
443 if (seennl == SEEN_ALL)
444 break;
445 }
446 endscan:
447 ;
448 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000449 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200450 void *translated;
451 int kind = PyUnicode_KIND(output);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300452 const void *in_str = PyUnicode_DATA(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 Py_ssize_t in, out;
454 /* XXX: Previous in-place translation here is disabled as
455 resizing is not possible anymore */
456 /* We could try to optimize this so that we only do a copy
457 when there is something to translate. On the other hand,
458 we already know there is a \r byte, so chances are high
459 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200460 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 if (translated == NULL) {
462 PyErr_NoMemory();
463 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
470 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 seennl |= SEEN_LF;
474 continue;
475 }
476 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 in++;
479 seennl |= SEEN_CRLF;
480 }
481 else
482 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200483 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000484 continue;
485 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200486 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200490 Py_DECREF(output);
491 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100492 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200493 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200494 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000495 }
496 self->seennl |= seennl;
497 }
498
499 return output;
500
501 error:
502 Py_DECREF(output);
503 return NULL;
504}
505
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300506/*[clinic input]
507_io.IncrementalNewlineDecoder.decode
508 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200509 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300510[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000511
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300512static PyObject *
513_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
514 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200515/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300516{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000517 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
518}
519
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300520/*[clinic input]
521_io.IncrementalNewlineDecoder.getstate
522[clinic start generated code]*/
523
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000524static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300525_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
526/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000527{
528 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700529 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000530
531 if (self->decoder != Py_None) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100532 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200533 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000534 if (state == NULL)
535 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300536 if (!PyTuple_Check(state)) {
537 PyErr_SetString(PyExc_TypeError,
538 "illegal decoder state");
539 Py_DECREF(state);
540 return NULL;
541 }
542 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
543 &buffer, &flag))
544 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545 Py_DECREF(state);
546 return NULL;
547 }
548 Py_INCREF(buffer);
549 Py_DECREF(state);
550 }
551 else {
552 buffer = PyBytes_FromString("");
553 flag = 0;
554 }
555 flag <<= 1;
556 if (self->pendingcr)
557 flag |= 1;
558 return Py_BuildValue("NK", buffer, flag);
559}
560
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300561/*[clinic input]
562_io.IncrementalNewlineDecoder.setstate
563 state: object
564 /
565[clinic start generated code]*/
566
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300568_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
569 PyObject *state)
570/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571{
572 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700573 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000574
Oren Milman1d1d3e92017-08-20 18:35:36 +0300575 if (!PyTuple_Check(state)) {
576 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000577 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300578 }
579 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
580 &buffer, &flag))
581 {
582 return NULL;
583 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584
Victor Stinner7d7e7752014-06-17 23:31:25 +0200585 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000586 flag >>= 1;
587
588 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200589 return _PyObject_CallMethodId(self->decoder,
590 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 else
592 Py_RETURN_NONE;
593}
594
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300595/*[clinic input]
596_io.IncrementalNewlineDecoder.reset
597[clinic start generated code]*/
598
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300600_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
601/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000602{
603 self->seennl = 0;
604 self->pendingcr = 0;
605 if (self->decoder != Py_None)
Petr Viktorinffd97532020-02-11 17:46:57 +0100606 return PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 else
608 Py_RETURN_NONE;
609}
610
611static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000612incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613{
614 switch (self->seennl) {
615 case SEEN_CR:
616 return PyUnicode_FromString("\r");
617 case SEEN_LF:
618 return PyUnicode_FromString("\n");
619 case SEEN_CRLF:
620 return PyUnicode_FromString("\r\n");
621 case SEEN_CR | SEEN_LF:
622 return Py_BuildValue("ss", "\r", "\n");
623 case SEEN_CR | SEEN_CRLF:
624 return Py_BuildValue("ss", "\r", "\r\n");
625 case SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("ss", "\n", "\r\n");
627 case SEEN_CR | SEEN_LF | SEEN_CRLF:
628 return Py_BuildValue("sss", "\r", "\n", "\r\n");
629 default:
630 Py_RETURN_NONE;
631 }
632
633}
634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635/* TextIOWrapper */
636
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637typedef PyObject *
638 (*encodefunc_t)(PyObject *, PyObject *);
639
640typedef struct
641{
642 PyObject_HEAD
643 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000644 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 Py_ssize_t chunk_size;
646 PyObject *buffer;
647 PyObject *encoding;
648 PyObject *encoder;
649 PyObject *decoder;
650 PyObject *readnl;
651 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900652 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200654 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655 char readuniversal;
656 char readtranslate;
657 char writetranslate;
658 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200659 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200661 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 /* Specialized encoding func (see below) */
663 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000664 /* Whether or not it's the start of the stream */
665 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666
667 /* Reads and writes are internally buffered in order to speed things up.
668 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000669
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670 Please also note that text to be written is first encoded before being
671 buffered. This is necessary so that encoding errors are immediately
672 reported to the caller, but it unfortunately means that the
673 IncrementalEncoder (whose encode() method is always written in Python)
674 becomes a bottleneck for small writes.
675 */
676 PyObject *decoded_chars; /* buffer for text returned from decoder */
677 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
Inada Naokibfba8c32019-05-16 15:03:20 +0900678 PyObject *pending_bytes; // data waiting to be written.
679 // ascii unicode, bytes, or list of them.
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000681
Oren Milman13614e32017-08-24 19:51:24 +0300682 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000683 * dec_flags is the second (integer) item of the decoder state and
684 * next_input is the chunk of input bytes that comes next after the
685 * snapshot point. We use this to reconstruct decoder states in tell().
686 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000687 PyObject *snapshot;
688 /* Bytes-to-characters ratio for the current chunk. Serves as input for
689 the heuristic in tell(). */
690 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691
692 /* Cache raw object if it's a FileIO object */
693 PyObject *raw;
694
695 PyObject *weakreflist;
696 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000697} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698
Zackery Spytz23db9352018-06-29 04:14:58 -0600699static void
700textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
701
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000706ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707{
INADA Naoki507434f2017-12-21 09:59:53 +0900708 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709}
710
711static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000712utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100714 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900715 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716}
717
718static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100721 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900722 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 if (!self->encoding_start_of_stream) {
729 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200730#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000732#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000733 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000735 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100736 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900737 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738}
739
Antoine Pitroue4501852009-05-14 18:55:55 +0000740static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000741utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000742{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100743 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900744 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000745}
746
747static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000748utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000749{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100750 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900751 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000752}
753
754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200759#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000760 return utf32be_encode(self, text);
761#else
762 return utf32le_encode(self, text);
763#endif
764 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900766 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000767}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
INADA Naoki507434f2017-12-21 09:59:53 +0900772 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000773}
774
775static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000776latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777{
INADA Naoki507434f2017-12-21 09:59:53 +0900778 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779}
780
Inada Naokibfba8c32019-05-16 15:03:20 +0900781// Return true when encoding can be skipped when text is ascii.
782static inline int
783is_asciicompat_encoding(encodefunc_t f)
784{
785 return f == (encodefunc_t) ascii_encode
786 || f == (encodefunc_t) latin1_encode
787 || f == (encodefunc_t) utf8_encode;
788}
789
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790/* Map normalized encoding names onto the specialized encoding funcs */
791
792typedef struct {
793 const char *name;
794 encodefunc_t encodefunc;
795} encodefuncentry;
796
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200797static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 {"ascii", (encodefunc_t) ascii_encode},
799 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000800 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801 {"utf-16-be", (encodefunc_t) utf16be_encode},
802 {"utf-16-le", (encodefunc_t) utf16le_encode},
803 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 {"utf-32-be", (encodefunc_t) utf32be_encode},
805 {"utf-32-le", (encodefunc_t) utf32le_encode},
806 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807 {NULL, NULL}
808};
809
INADA Naoki507434f2017-12-21 09:59:53 +0900810static int
811validate_newline(const char *newline)
812{
813 if (newline && newline[0] != '\0'
814 && !(newline[0] == '\n' && newline[1] == '\0')
815 && !(newline[0] == '\r' && newline[1] == '\0')
816 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
817 PyErr_Format(PyExc_ValueError,
818 "illegal newline value: %s", newline);
819 return -1;
820 }
821 return 0;
822}
823
824static int
825set_newline(textio *self, const char *newline)
826{
827 PyObject *old = self->readnl;
828 if (newline == NULL) {
829 self->readnl = NULL;
830 }
831 else {
832 self->readnl = PyUnicode_FromString(newline);
833 if (self->readnl == NULL) {
834 self->readnl = old;
835 return -1;
836 }
837 }
838 self->readuniversal = (newline == NULL || newline[0] == '\0');
839 self->readtranslate = (newline == NULL);
840 self->writetranslate = (newline == NULL || newline[0] != '\0');
841 if (!self->readuniversal && self->readnl != NULL) {
842 // validate_newline() accepts only ASCII newlines.
843 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
844 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
845 if (strcmp(self->writenl, "\n") == 0) {
846 self->writenl = NULL;
847 }
848 }
849 else {
850#ifdef MS_WINDOWS
851 self->writenl = "\r\n";
852#else
853 self->writenl = NULL;
854#endif
855 }
856 Py_XDECREF(old);
857 return 0;
858}
859
860static int
861_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
862 const char *errors)
863{
864 PyObject *res;
865 int r;
866
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200867 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
INADA Naoki507434f2017-12-21 09:59:53 +0900868 if (res == NULL)
869 return -1;
870
871 r = PyObject_IsTrue(res);
872 Py_DECREF(res);
873 if (r == -1)
874 return -1;
875
876 if (r != 1)
877 return 0;
878
879 Py_CLEAR(self->decoder);
880 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
881 if (self->decoder == NULL)
882 return -1;
883
884 if (self->readuniversal) {
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300885 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
INADA Naoki507434f2017-12-21 09:59:53 +0900886 (PyObject *)&PyIncrementalNewlineDecoder_Type,
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +0300887 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
INADA Naoki507434f2017-12-21 09:59:53 +0900888 if (incrementalDecoder == NULL)
889 return -1;
890 Py_CLEAR(self->decoder);
891 self->decoder = incrementalDecoder;
892 }
893
894 return 0;
895}
896
897static PyObject*
898_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
899{
900 PyObject *chars;
901
Andy Lesterdffe4c02020-03-04 07:15:20 -0600902 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
INADA Naoki507434f2017-12-21 09:59:53 +0900903 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
904 else
905 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
906 eof ? Py_True : Py_False, NULL);
907
908 if (check_decoded(chars) < 0)
909 // check_decoded already decreases refcount
910 return NULL;
911
912 return chars;
913}
914
915static int
916_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
917 const char *errors)
918{
919 PyObject *res;
920 int r;
921
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200922 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
INADA Naoki507434f2017-12-21 09:59:53 +0900923 if (res == NULL)
924 return -1;
925
926 r = PyObject_IsTrue(res);
927 Py_DECREF(res);
928 if (r == -1)
929 return -1;
930
931 if (r != 1)
932 return 0;
933
934 Py_CLEAR(self->encoder);
935 self->encodefunc = NULL;
936 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
937 if (self->encoder == NULL)
938 return -1;
939
940 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200941 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
942 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900943 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200944 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900945 const encodefuncentry *e = encodefuncs;
946 while (e->name != NULL) {
947 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
948 self->encodefunc = e->encodefunc;
949 break;
950 }
951 e++;
952 }
953 }
954 Py_XDECREF(res);
955
956 return 0;
957}
958
959static int
960_textiowrapper_fix_encoder_state(textio *self)
961{
962 if (!self->seekable || !self->encoder) {
963 return 0;
964 }
965
966 self->encoding_start_of_stream = 1;
967
Petr Viktorinffd97532020-02-11 17:46:57 +0100968 PyObject *cookieObj = PyObject_CallMethodNoArgs(
Jeroen Demeyer762f93f2019-07-08 10:19:25 +0200969 self->buffer, _PyIO_str_tell);
INADA Naoki507434f2017-12-21 09:59:53 +0900970 if (cookieObj == NULL) {
971 return -1;
972 }
973
974 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
975 Py_DECREF(cookieObj);
976 if (cmp < 0) {
977 return -1;
978 }
979
980 if (cmp == 0) {
981 self->encoding_start_of_stream = 0;
Petr Viktorinffd97532020-02-11 17:46:57 +0100982 PyObject *res = PyObject_CallMethodOneArg(
Jeroen Demeyer59ad1102019-07-11 10:59:05 +0200983 self->encoder, _PyIO_str_setstate, _PyLong_Zero);
INADA Naoki507434f2017-12-21 09:59:53 +0900984 if (res == NULL) {
985 return -1;
986 }
987 Py_DECREF(res);
988 }
989
990 return 0;
991}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992
Victor Stinner22eb6892019-06-26 00:51:05 +0200993static int
994io_check_errors(PyObject *errors)
995{
996 assert(errors != NULL && errors != Py_None);
997
Victor Stinner81a7be32020-04-14 15:14:01 +0200998 PyInterpreterState *interp = _PyInterpreterState_GET();
Victor Stinner22eb6892019-06-26 00:51:05 +0200999#ifndef Py_DEBUG
1000 /* In release mode, only check in development mode (-X dev) */
Victor Stinnerda7933e2020-04-13 03:04:28 +02001001 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001002 return 0;
1003 }
1004#else
1005 /* Always check in debug mode */
1006#endif
1007
1008 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1009 before_PyUnicode_InitEncodings() is called. */
Victor Stinner3d17c042020-05-14 01:48:38 +02001010 if (!interp->unicode.fs_codec.encoding) {
Victor Stinner22eb6892019-06-26 00:51:05 +02001011 return 0;
1012 }
1013
1014 Py_ssize_t name_length;
1015 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1016 if (name == NULL) {
1017 return -1;
1018 }
1019 if (strlen(name) != (size_t)name_length) {
1020 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1021 return -1;
1022 }
1023 PyObject *handler = PyCodec_LookupError(name);
1024 if (handler != NULL) {
1025 Py_DECREF(handler);
1026 return 0;
1027 }
1028 return -1;
1029}
1030
1031
1032
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001033/*[clinic input]
1034_io.TextIOWrapper.__init__
1035 buffer: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001036 encoding: str(accept={str, NoneType}) = None
INADA Naoki507434f2017-12-21 09:59:53 +09001037 errors: object = None
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001038 newline: str(accept={str, NoneType}) = None
Serhiy Storchaka202fda52017-03-12 10:10:47 +02001039 line_buffering: bool(accept={int}) = False
1040 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001042Character and line based layer over a BufferedIOBase object, buffer.
1043
1044encoding gives the name of the encoding that the stream will be
1045decoded or encoded with. It defaults to locale.getpreferredencoding(False).
1046
1047errors determines the strictness of encoding and decoding (see
1048help(codecs.Codec) or the documentation for codecs.register) and
1049defaults to "strict".
1050
1051newline controls how line endings are handled. It can be None, '',
1052'\n', '\r', and '\r\n'. It works as follows:
1053
1054* On input, if newline is None, universal newlines mode is
1055 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1056 these are translated into '\n' before being returned to the
1057 caller. If it is '', universal newline mode is enabled, but line
1058 endings are returned to the caller untranslated. If it has any of
1059 the other legal values, input lines are only terminated by the given
1060 string, and the line ending is returned to the caller untranslated.
1061
1062* On output, if newline is None, any '\n' characters written are
1063 translated to the system default line separator, os.linesep. If
1064 newline is '' or '\n', no translation takes place. If newline is any
1065 of the other legal values, any '\n' characters written are translated
1066 to the given string.
1067
1068If line_buffering is True, a call to flush is implied when a call to
1069write contains a newline character.
1070[clinic start generated code]*/
1071
1072static int
1073_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001074 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001075 const char *newline, int line_buffering,
1076 int write_through)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001077/*[clinic end generated code: output=72267c0c01032ed2 input=77d8696d1a1f460b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001078{
1079 PyObject *raw, *codec_info = NULL;
1080 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 PyObject *res;
1082 int r;
1083
1084 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001085 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001086
INADA Naoki507434f2017-12-21 09:59:53 +09001087 if (errors == Py_None) {
1088 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001089 if (errors == NULL) {
1090 return -1;
1091 }
INADA Naoki507434f2017-12-21 09:59:53 +09001092 }
1093 else if (!PyUnicode_Check(errors)) {
1094 // Check 'errors' argument here because Argument Clinic doesn't support
1095 // 'str(accept={str, NoneType})' converter.
1096 PyErr_Format(
1097 PyExc_TypeError,
1098 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001099 Py_TYPE(errors)->tp_name);
INADA Naoki507434f2017-12-21 09:59:53 +09001100 return -1;
1101 }
Victor Stinner22eb6892019-06-26 00:51:05 +02001102 else if (io_check_errors(errors)) {
1103 return -1;
1104 }
INADA Naoki507434f2017-12-21 09:59:53 +09001105
1106 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107 return -1;
1108 }
1109
1110 Py_CLEAR(self->buffer);
1111 Py_CLEAR(self->encoding);
1112 Py_CLEAR(self->encoder);
1113 Py_CLEAR(self->decoder);
1114 Py_CLEAR(self->readnl);
1115 Py_CLEAR(self->decoded_chars);
1116 Py_CLEAR(self->pending_bytes);
1117 Py_CLEAR(self->snapshot);
1118 Py_CLEAR(self->errors);
1119 Py_CLEAR(self->raw);
1120 self->decoded_chars_used = 0;
1121 self->pending_bytes_count = 0;
1122 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001123 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124
1125 if (encoding == NULL) {
1126 /* Try os.device_encoding(fileno) */
1127 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001128 state = IO_STATE();
1129 if (state == NULL)
1130 goto error;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001131 fileno = _PyObject_CallMethodIdNoArgs(buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132 /* Ignore only AttributeError and UnsupportedOperation */
1133 if (fileno == NULL) {
1134 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1135 PyErr_ExceptionMatches(state->unsupported_operation)) {
1136 PyErr_Clear();
1137 }
1138 else {
1139 goto error;
1140 }
1141 }
1142 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001143 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001144 Py_DECREF(fileno);
1145 if (fd == -1 && PyErr_Occurred()) {
1146 goto error;
1147 }
1148
1149 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150 if (self->encoding == NULL)
1151 goto error;
1152 else if (!PyUnicode_Check(self->encoding))
1153 Py_CLEAR(self->encoding);
1154 }
1155 }
1156 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001157 PyObject *locale_module = _PyIO_get_locale_module(state);
1158 if (locale_module == NULL)
1159 goto catch_ImportError;
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001160 self->encoding = _PyObject_CallMethodIdOneArg(
1161 locale_module, &PyId_getpreferredencoding, Py_False);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001162 Py_DECREF(locale_module);
1163 if (self->encoding == NULL) {
1164 catch_ImportError:
1165 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001166 Importing locale can raise an ImportError because of
1167 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001168 ImportError if _locale is not available. These will happen
1169 during module building.
1170 */
1171 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1172 PyErr_Clear();
1173 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001175 else
1176 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001178 else if (!PyUnicode_Check(self->encoding))
1179 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001181 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001182 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001183 if (encoding == NULL)
1184 goto error;
1185 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 else if (encoding != NULL) {
1187 self->encoding = PyUnicode_FromString(encoding);
1188 if (self->encoding == NULL)
1189 goto error;
1190 }
1191 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001192 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001194 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001195 }
1196
Nick Coghlana9b15242014-02-04 22:11:18 +10001197 /* Check we have been asked for a real text encoding */
1198 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1199 if (codec_info == NULL) {
1200 Py_CLEAR(self->encoding);
1201 goto error;
1202 }
1203
1204 /* XXX: Failures beyond this point have the potential to leak elements
1205 * of the partially constructed object (like self->encoding)
1206 */
1207
INADA Naoki507434f2017-12-21 09:59:53 +09001208 Py_INCREF(errors);
1209 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001212 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001213 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215 }
1216
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 self->buffer = buffer;
1218 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001219
INADA Naoki507434f2017-12-21 09:59:53 +09001220 /* Build the decoder object */
1221 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1222 goto error;
1223
1224 /* Build the encoder object */
1225 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1226 goto error;
1227
1228 /* Finished sorting out the codec details */
1229 Py_CLEAR(codec_info);
1230
Andy Lesterdffe4c02020-03-04 07:15:20 -06001231 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1232 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1233 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001234 {
1235 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1236 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001238 if (raw != NULL) {
Andy Lesterdffe4c02020-03-04 07:15:20 -06001239 if (Py_IS_TYPE(raw, &PyFileIO_Type))
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001240 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001241 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001242 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001243 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244 }
1245
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001246 res = _PyObject_CallMethodIdNoArgs(buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 if (res == NULL)
1248 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001249 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001251 if (r < 0)
1252 goto error;
1253 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001255 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1256 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001257 goto error;
1258 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001259 Py_XDECREF(res);
1260 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001261
Antoine Pitroue4501852009-05-14 18:55:55 +00001262 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001263 if (_textiowrapper_fix_encoder_state(self) < 0) {
1264 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001265 }
1266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 self->ok = 1;
1268 return 0;
1269
1270 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001271 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272 return -1;
1273}
1274
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001275/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1276 * -1 on error.
1277 */
1278static int
1279convert_optional_bool(PyObject *obj, int default_value)
1280{
1281 long v;
1282 if (obj == Py_None) {
1283 v = default_value;
1284 }
1285 else {
1286 v = PyLong_AsLong(obj);
1287 if (v == -1 && PyErr_Occurred())
1288 return -1;
1289 }
1290 return v != 0;
1291}
1292
INADA Naoki507434f2017-12-21 09:59:53 +09001293static int
1294textiowrapper_change_encoding(textio *self, PyObject *encoding,
1295 PyObject *errors, int newline_changed)
1296{
1297 /* Use existing settings where new settings are not specified */
1298 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1299 return 0; // no change
1300 }
1301
1302 if (encoding == Py_None) {
1303 encoding = self->encoding;
1304 if (errors == Py_None) {
1305 errors = self->errors;
1306 }
1307 }
1308 else if (errors == Py_None) {
1309 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001310 if (errors == NULL) {
1311 return -1;
1312 }
INADA Naoki507434f2017-12-21 09:59:53 +09001313 }
1314
1315 const char *c_errors = PyUnicode_AsUTF8(errors);
1316 if (c_errors == NULL) {
1317 return -1;
1318 }
1319
1320 // Create new encoder & decoder
1321 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1322 PyUnicode_AsUTF8(encoding), "codecs.open()");
1323 if (codec_info == NULL) {
1324 return -1;
1325 }
1326 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1327 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1328 Py_DECREF(codec_info);
1329 return -1;
1330 }
1331 Py_DECREF(codec_info);
1332
1333 Py_INCREF(encoding);
1334 Py_INCREF(errors);
1335 Py_SETREF(self->encoding, encoding);
1336 Py_SETREF(self->errors, errors);
1337
1338 return _textiowrapper_fix_encoder_state(self);
1339}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001340
1341/*[clinic input]
1342_io.TextIOWrapper.reconfigure
1343 *
INADA Naoki507434f2017-12-21 09:59:53 +09001344 encoding: object = None
1345 errors: object = None
1346 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001347 line_buffering as line_buffering_obj: object = None
1348 write_through as write_through_obj: object = None
1349
1350Reconfigure the text stream with new parameters.
1351
1352This also does an implicit stream flush.
1353
1354[clinic start generated code]*/
1355
1356static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001357_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1358 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001359 PyObject *line_buffering_obj,
1360 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001361/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001362{
1363 int line_buffering;
1364 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001365 const char *newline = NULL;
1366
1367 /* Check if something is in the read buffer */
1368 if (self->decoded_chars != NULL) {
1369 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001370 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001371 "of stream after the first read");
1372 return NULL;
1373 }
1374 }
1375
1376 if (newline_obj != NULL && newline_obj != Py_None) {
1377 newline = PyUnicode_AsUTF8(newline_obj);
1378 if (newline == NULL || validate_newline(newline) < 0) {
1379 return NULL;
1380 }
1381 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001382
1383 line_buffering = convert_optional_bool(line_buffering_obj,
1384 self->line_buffering);
1385 write_through = convert_optional_bool(write_through_obj,
1386 self->write_through);
1387 if (line_buffering < 0 || write_through < 0) {
1388 return NULL;
1389 }
INADA Naoki507434f2017-12-21 09:59:53 +09001390
Petr Viktorinffd97532020-02-11 17:46:57 +01001391 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001392 if (res == NULL) {
1393 return NULL;
1394 }
INADA Naoki507434f2017-12-21 09:59:53 +09001395 Py_DECREF(res);
1396 self->b2cratio = 0;
1397
1398 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1399 return NULL;
1400 }
1401
1402 if (textiowrapper_change_encoding(
1403 self, encoding, errors, newline_obj != NULL) < 0) {
1404 return NULL;
1405 }
1406
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001407 self->line_buffering = line_buffering;
1408 self->write_through = write_through;
1409 Py_RETURN_NONE;
1410}
1411
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001413textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001415 self->ok = 0;
1416 Py_CLEAR(self->buffer);
1417 Py_CLEAR(self->encoding);
1418 Py_CLEAR(self->encoder);
1419 Py_CLEAR(self->decoder);
1420 Py_CLEAR(self->readnl);
1421 Py_CLEAR(self->decoded_chars);
1422 Py_CLEAR(self->pending_bytes);
1423 Py_CLEAR(self->snapshot);
1424 Py_CLEAR(self->errors);
1425 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001426
1427 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001428 return 0;
1429}
1430
1431static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001432textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001433{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001434 self->finalizing = 1;
1435 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001437 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001438 _PyObject_GC_UNTRACK(self);
1439 if (self->weakreflist != NULL)
1440 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001441 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001442 Py_TYPE(self)->tp_free((PyObject *)self);
1443}
1444
1445static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001446textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447{
1448 Py_VISIT(self->buffer);
1449 Py_VISIT(self->encoding);
1450 Py_VISIT(self->encoder);
1451 Py_VISIT(self->decoder);
1452 Py_VISIT(self->readnl);
1453 Py_VISIT(self->decoded_chars);
1454 Py_VISIT(self->pending_bytes);
1455 Py_VISIT(self->snapshot);
1456 Py_VISIT(self->errors);
1457 Py_VISIT(self->raw);
1458
1459 Py_VISIT(self->dict);
1460 return 0;
1461}
1462
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001464textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465
1466/* This macro takes some shortcuts to make the common case faster. */
1467#define CHECK_CLOSED(self) \
1468 do { \
1469 int r; \
1470 PyObject *_res; \
Andy Lesterdffe4c02020-03-04 07:15:20 -06001471 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 if (self->raw != NULL) \
1473 r = _PyFileIO_closed(self->raw); \
1474 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001475 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001476 if (_res == NULL) \
1477 return NULL; \
1478 r = PyObject_IsTrue(_res); \
1479 Py_DECREF(_res); \
1480 if (r < 0) \
1481 return NULL; \
1482 } \
1483 if (r > 0) { \
1484 PyErr_SetString(PyExc_ValueError, \
1485 "I/O operation on closed file."); \
1486 return NULL; \
1487 } \
1488 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001489 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490 return NULL; \
1491 } while (0)
1492
1493#define CHECK_INITIALIZED(self) \
1494 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001495 PyErr_SetString(PyExc_ValueError, \
1496 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497 return NULL; \
1498 }
1499
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001500#define CHECK_ATTACHED(self) \
1501 CHECK_INITIALIZED(self); \
1502 if (self->detached) { \
1503 PyErr_SetString(PyExc_ValueError, \
1504 "underlying buffer has been detached"); \
1505 return NULL; \
1506 }
1507
1508#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001509 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001510 PyErr_SetString(PyExc_ValueError, \
1511 "I/O operation on uninitialized object"); \
1512 return -1; \
1513 } else if (self->detached) { \
1514 PyErr_SetString(PyExc_ValueError, \
1515 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 return -1; \
1517 }
1518
1519
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001520/*[clinic input]
1521_io.TextIOWrapper.detach
1522[clinic start generated code]*/
1523
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001524static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001525_io_TextIOWrapper_detach_impl(textio *self)
1526/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001527{
1528 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001529 CHECK_ATTACHED(self);
Petr Viktorinffd97532020-02-11 17:46:57 +01001530 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001531 if (res == NULL)
1532 return NULL;
1533 Py_DECREF(res);
1534 buffer = self->buffer;
1535 self->buffer = NULL;
1536 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001537 return buffer;
1538}
1539
Antoine Pitrou24f36292009-03-28 22:16:42 +00001540/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 underlying buffered object, though. */
1542static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001543_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545 if (self->pending_bytes == NULL)
1546 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001547
Inada Naokibfba8c32019-05-16 15:03:20 +09001548 PyObject *pending = self->pending_bytes;
1549 PyObject *b;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001550
Inada Naokibfba8c32019-05-16 15:03:20 +09001551 if (PyBytes_Check(pending)) {
1552 b = pending;
1553 Py_INCREF(b);
1554 }
1555 else if (PyUnicode_Check(pending)) {
1556 assert(PyUnicode_IS_ASCII(pending));
1557 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1558 b = PyBytes_FromStringAndSize(
1559 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1560 if (b == NULL) {
1561 return -1;
1562 }
1563 }
1564 else {
1565 assert(PyList_Check(pending));
1566 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1567 if (b == NULL) {
1568 return -1;
1569 }
1570
1571 char *buf = PyBytes_AsString(b);
1572 Py_ssize_t pos = 0;
1573
1574 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1575 PyObject *obj = PyList_GET_ITEM(pending, i);
1576 char *src;
1577 Py_ssize_t len;
1578 if (PyUnicode_Check(obj)) {
1579 assert(PyUnicode_IS_ASCII(obj));
1580 src = PyUnicode_DATA(obj);
1581 len = PyUnicode_GET_LENGTH(obj);
1582 }
1583 else {
1584 assert(PyBytes_Check(obj));
1585 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1586 Py_DECREF(b);
1587 return -1;
1588 }
1589 }
1590 memcpy(buf + pos, src, len);
1591 pos += len;
1592 }
1593 assert(pos == self->pending_bytes_count);
1594 }
1595
1596 self->pending_bytes_count = 0;
1597 self->pending_bytes = NULL;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001598 Py_DECREF(pending);
Inada Naokibfba8c32019-05-16 15:03:20 +09001599
1600 PyObject *ret;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001601 do {
Petr Viktorinffd97532020-02-11 17:46:57 +01001602 ret = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_write, b);
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001603 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 Py_DECREF(b);
1605 if (ret == NULL)
1606 return -1;
1607 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001608 return 0;
1609}
1610
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001611/*[clinic input]
1612_io.TextIOWrapper.write
1613 text: unicode
1614 /
1615[clinic start generated code]*/
1616
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001618_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1619/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620{
1621 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001622 PyObject *b;
1623 Py_ssize_t textlen;
1624 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001625 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001627 if (PyUnicode_READY(text) == -1)
1628 return NULL;
1629
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001630 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 CHECK_CLOSED(self);
1632
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001633 if (self->encoder == NULL)
1634 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001636 Py_INCREF(text);
1637
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001638 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639
1640 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001641 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642 haslf = 1;
1643
1644 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001645 PyObject *newtext = _PyObject_CallMethodId(
1646 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 Py_DECREF(text);
1648 if (newtext == NULL)
1649 return NULL;
1650 text = newtext;
1651 }
1652
Antoine Pitroue96ec682011-07-23 21:46:35 +02001653 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001654 text_needflush = 1;
1655 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 needflush = 1;
1659
1660 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001661 if (self->encodefunc != NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001662 if (PyUnicode_IS_ASCII(text) && is_asciicompat_encoding(self->encodefunc)) {
1663 b = text;
1664 Py_INCREF(b);
1665 }
1666 else {
1667 b = (*self->encodefunc)((PyObject *) self, text);
1668 }
Antoine Pitroue4501852009-05-14 18:55:55 +00001669 self->encoding_start_of_stream = 0;
1670 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 else
Petr Viktorinffd97532020-02-11 17:46:57 +01001672 b = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_encode, text);
Inada Naokibfba8c32019-05-16 15:03:20 +09001673
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 Py_DECREF(text);
1675 if (b == NULL)
1676 return NULL;
Inada Naokibfba8c32019-05-16 15:03:20 +09001677 if (b != text && !PyBytes_Check(b)) {
Oren Milmana5b4ea12017-08-25 21:14:54 +03001678 PyErr_Format(PyExc_TypeError,
1679 "encoder should return a bytes object, not '%.200s'",
1680 Py_TYPE(b)->tp_name);
1681 Py_DECREF(b);
1682 return NULL;
1683 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684
Inada Naokibfba8c32019-05-16 15:03:20 +09001685 Py_ssize_t bytes_len;
1686 if (b == text) {
1687 bytes_len = PyUnicode_GET_LENGTH(b);
1688 }
1689 else {
1690 bytes_len = PyBytes_GET_SIZE(b);
1691 }
1692
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (self->pending_bytes == NULL) {
Inada Naokibfba8c32019-05-16 15:03:20 +09001694 self->pending_bytes_count = 0;
1695 self->pending_bytes = b;
1696 }
1697 else if (!PyList_CheckExact(self->pending_bytes)) {
1698 PyObject *list = PyList_New(2);
1699 if (list == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 Py_DECREF(b);
1701 return NULL;
1702 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001703 PyList_SET_ITEM(list, 0, self->pending_bytes);
1704 PyList_SET_ITEM(list, 1, b);
1705 self->pending_bytes = list;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001707 else {
1708 if (PyList_Append(self->pending_bytes, b) < 0) {
1709 Py_DECREF(b);
1710 return NULL;
1711 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 Py_DECREF(b);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 }
Inada Naokibfba8c32019-05-16 15:03:20 +09001714
1715 self->pending_bytes_count += bytes_len;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001716 if (self->pending_bytes_count > self->chunk_size || needflush ||
1717 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001718 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 return NULL;
1720 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001721
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 if (needflush) {
Petr Viktorinffd97532020-02-11 17:46:57 +01001723 ret = PyObject_CallMethodNoArgs(self->buffer, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724 if (ret == NULL)
1725 return NULL;
1726 Py_DECREF(ret);
1727 }
1728
Zackery Spytz23db9352018-06-29 04:14:58 -06001729 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 Py_CLEAR(self->snapshot);
1731
1732 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001733 ret = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 if (ret == NULL)
1735 return NULL;
1736 Py_DECREF(ret);
1737 }
1738
1739 return PyLong_FromSsize_t(textlen);
1740}
1741
1742/* Steal a reference to chars and store it in the decoded_char buffer;
1743 */
1744static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001745textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001747 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748 self->decoded_chars_used = 0;
1749}
1750
1751static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001752textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753{
1754 PyObject *chars;
1755 Py_ssize_t avail;
1756
1757 if (self->decoded_chars == NULL)
1758 return PyUnicode_FromStringAndSize(NULL, 0);
1759
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001760 /* decoded_chars is guaranteed to be "ready". */
1761 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 - self->decoded_chars_used);
1763
1764 assert(avail >= 0);
1765
1766 if (n < 0 || n > avail)
1767 n = avail;
1768
1769 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 chars = PyUnicode_Substring(self->decoded_chars,
1771 self->decoded_chars_used,
1772 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 if (chars == NULL)
1774 return NULL;
1775 }
1776 else {
1777 chars = self->decoded_chars;
1778 Py_INCREF(chars);
1779 }
1780
1781 self->decoded_chars_used += n;
1782 return chars;
1783}
1784
1785/* Read and decode the next chunk of data from the BufferedReader.
1786 */
1787static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001788textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789{
1790 PyObject *dec_buffer = NULL;
1791 PyObject *dec_flags = NULL;
1792 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001793 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001795 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001796 int eof;
1797
1798 /* The return value is True unless EOF was reached. The decoded string is
1799 * placed in self._decoded_chars (replacing its previous value). The
1800 * entire input chunk is sent to the decoder, though some of it may remain
1801 * buffered in the decoder, yet to be converted.
1802 */
1803
1804 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001805 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 return -1;
1807 }
1808
1809 if (self->telling) {
1810 /* To prepare for tell(), we need to snapshot a point in the file
1811 * where the decoder's input buffer is empty.
1812 */
Petr Viktorinffd97532020-02-11 17:46:57 +01001813 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001814 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 if (state == NULL)
1816 return -1;
1817 /* Given this, we know there was a valid snapshot point
1818 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1819 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001820 if (!PyTuple_Check(state)) {
1821 PyErr_SetString(PyExc_TypeError,
1822 "illegal decoder state");
1823 Py_DECREF(state);
1824 return -1;
1825 }
1826 if (!PyArg_ParseTuple(state,
1827 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1828 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829 Py_DECREF(state);
1830 return -1;
1831 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001832
1833 if (!PyBytes_Check(dec_buffer)) {
1834 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001835 "illegal decoder state: the first item should be a "
1836 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001837 Py_TYPE(dec_buffer)->tp_name);
1838 Py_DECREF(state);
1839 return -1;
1840 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 Py_INCREF(dec_buffer);
1842 Py_INCREF(dec_flags);
1843 Py_DECREF(state);
1844 }
1845
1846 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001847 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001848 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001849 }
1850 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 if (chunk_size == NULL)
1852 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001853
Petr Viktorinffd97532020-02-11 17:46:57 +01001854 input_chunk = PyObject_CallMethodOneArg(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001855 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02001856 chunk_size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001857 Py_DECREF(chunk_size);
1858 if (input_chunk == NULL)
1859 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001860
1861 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001862 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001863 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001864 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1865 Py_TYPE(input_chunk)->tp_name);
1866 goto fail;
1867 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868
Antoine Pitroub8503892014-04-29 10:14:02 +02001869 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001870 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871
INADA Naoki507434f2017-12-21 09:59:53 +09001872 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1873 PyBuffer_Release(&input_chunk_buf);
1874 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001876
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001877 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001878 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001879 if (nchars > 0)
1880 self->b2cratio = (double) nbytes / nchars;
1881 else
1882 self->b2cratio = 0.0;
1883 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884 eof = 0;
1885
1886 if (self->telling) {
1887 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1888 * next input to be decoded is dec_buffer + input_chunk.
1889 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001890 PyObject *next_input = dec_buffer;
1891 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001892 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001893 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001894 goto fail;
1895 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001896 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1897 if (snapshot == NULL) {
1898 dec_flags = NULL;
1899 goto fail;
1900 }
1901 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001902 }
1903 Py_DECREF(input_chunk);
1904
1905 return (eof == 0);
1906
1907 fail:
1908 Py_XDECREF(dec_buffer);
1909 Py_XDECREF(dec_flags);
1910 Py_XDECREF(input_chunk);
1911 return -1;
1912}
1913
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001914/*[clinic input]
1915_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001916 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001917 /
1918[clinic start generated code]*/
1919
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001921_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001922/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924 PyObject *result = NULL, *chunks = NULL;
1925
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001926 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927 CHECK_CLOSED(self);
1928
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001929 if (self->decoder == NULL)
1930 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001931
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001932 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001933 return NULL;
1934
1935 if (n < 0) {
1936 /* Read everything */
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02001937 PyObject *bytes = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_read);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938 PyObject *decoded;
1939 if (bytes == NULL)
1940 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001941
Andy Lesterdffe4c02020-03-04 07:15:20 -06001942 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
Victor Stinnerfd821132011-05-25 22:01:33 +02001943 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1944 bytes, 1);
1945 else
1946 decoded = PyObject_CallMethodObjArgs(
1947 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001949 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 goto fail;
1951
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001952 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953
1954 if (result == NULL) {
1955 Py_DECREF(decoded);
1956 return NULL;
1957 }
1958
1959 PyUnicode_AppendAndDel(&result, decoded);
1960 if (result == NULL)
1961 goto fail;
1962
Zackery Spytz23db9352018-06-29 04:14:58 -06001963 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964 Py_CLEAR(self->snapshot);
1965 return result;
1966 }
1967 else {
1968 int res = 1;
1969 Py_ssize_t remaining = n;
1970
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001971 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 if (result == NULL)
1973 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001974 if (PyUnicode_READY(result) == -1)
1975 goto fail;
1976 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977
1978 /* Keep reading chunks until we have n characters to return */
1979 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001980 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001981 if (res < 0) {
1982 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1983 when EINTR occurs so we needn't do it ourselves. */
1984 if (_PyIO_trap_eintr()) {
1985 continue;
1986 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001988 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (res == 0) /* EOF */
1990 break;
1991 if (chunks == NULL) {
1992 chunks = PyList_New(0);
1993 if (chunks == NULL)
1994 goto fail;
1995 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001996 if (PyUnicode_GET_LENGTH(result) > 0 &&
1997 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 goto fail;
1999 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002000 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001 if (result == NULL)
2002 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002003 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 }
2005 if (chunks != NULL) {
2006 if (result != NULL && PyList_Append(chunks, result) < 0)
2007 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03002008 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 if (result == NULL)
2010 goto fail;
2011 Py_CLEAR(chunks);
2012 }
2013 return result;
2014 }
2015 fail:
2016 Py_XDECREF(result);
2017 Py_XDECREF(chunks);
2018 return NULL;
2019}
2020
2021
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002022/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 that is to the NUL character. Otherwise the function will produce
2024 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002025static const char *
2026find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002028 if (kind == PyUnicode_1BYTE_KIND) {
2029 assert(ch < 256);
Andy Lestere6be9b52020-02-11 20:28:35 -06002030 return (char *) memchr((const void *) s, (char) ch, end - s);
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01002031 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002033 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002034 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002035 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 return s;
2037 if (s == end)
2038 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002039 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 }
2041}
2042
2043Py_ssize_t
2044_PyIO_find_line_ending(
2045 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002046 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047{
Andy Lestere6be9b52020-02-11 20:28:35 -06002048 Py_ssize_t len = (end - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049
2050 if (translated) {
2051 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002052 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002054 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 else {
2056 *consumed = len;
2057 return -1;
2058 }
2059 }
2060 else if (universal) {
2061 /* Universal newline search. Find any of \r, \r\n, \n
2062 * The decoder ensures that \r\n are not split in two pieces
2063 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002064 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002065 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002066 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02002068 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002069 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002070 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 if (s >= end) {
2072 *consumed = len;
2073 return -1;
2074 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002075 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002076 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002078 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002080 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002081 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002083 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002084 }
2085 }
2086 }
2087 else {
2088 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002089 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002090 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002091 /* Assume that readnl is an ASCII character. */
2092 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002093 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002094 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002096 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002097 *consumed = len;
2098 return -1;
2099 }
2100 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002101 const char *s = start;
2102 const char *e = end - (readnl_len - 1)*kind;
2103 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002104 if (e < s)
2105 e = s;
2106 while (s < e) {
2107 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002108 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 if (pos == NULL || pos >= e)
2110 break;
2111 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002112 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002113 break;
2114 }
2115 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002116 return (pos - start)/kind + readnl_len;
2117 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002119 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002120 if (pos == NULL)
2121 *consumed = len;
2122 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002123 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124 return -1;
2125 }
2126 }
2127}
2128
2129static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002130_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002131{
2132 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2133 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2134 int res;
2135
2136 CHECK_CLOSED(self);
2137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002138 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139 return NULL;
2140
2141 chunked = 0;
2142
2143 while (1) {
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002144 const char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002146 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 Py_ssize_t consumed = 0;
2148
2149 /* First, get some data if necessary */
2150 res = 1;
2151 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002152 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002153 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002154 if (res < 0) {
2155 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2156 when EINTR occurs so we needn't do it ourselves. */
2157 if (_PyIO_trap_eintr()) {
2158 continue;
2159 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002161 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162 if (res == 0)
2163 break;
2164 }
2165 if (res == 0) {
2166 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002167 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168 Py_CLEAR(self->snapshot);
2169 start = endpos = offset_to_buffer = 0;
2170 break;
2171 }
2172
2173 if (remaining == NULL) {
2174 line = self->decoded_chars;
2175 start = self->decoded_chars_used;
2176 offset_to_buffer = 0;
2177 Py_INCREF(line);
2178 }
2179 else {
2180 assert(self->decoded_chars_used == 0);
2181 line = PyUnicode_Concat(remaining, self->decoded_chars);
2182 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002183 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002184 Py_CLEAR(remaining);
2185 if (line == NULL)
2186 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002187 if (PyUnicode_READY(line) == -1)
2188 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189 }
2190
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002191 ptr = PyUnicode_DATA(line);
2192 line_len = PyUnicode_GET_LENGTH(line);
2193 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194
2195 endpos = _PyIO_find_line_ending(
2196 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002197 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002198 ptr + kind * start,
2199 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002200 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201 if (endpos >= 0) {
2202 endpos += start;
2203 if (limit >= 0 && (endpos - start) + chunked >= limit)
2204 endpos = start + limit - chunked;
2205 break;
2206 }
2207
2208 /* We can put aside up to `endpos` */
2209 endpos = consumed + start;
2210 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2211 /* Didn't find line ending, but reached length limit */
2212 endpos = start + limit - chunked;
2213 break;
2214 }
2215
2216 if (endpos > start) {
2217 /* No line ending seen yet - put aside current data */
2218 PyObject *s;
2219 if (chunks == NULL) {
2220 chunks = PyList_New(0);
2221 if (chunks == NULL)
2222 goto error;
2223 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002224 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 if (s == NULL)
2226 goto error;
2227 if (PyList_Append(chunks, s) < 0) {
2228 Py_DECREF(s);
2229 goto error;
2230 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002231 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 Py_DECREF(s);
2233 }
2234 /* There may be some remaining bytes we'll have to prepend to the
2235 next chunk of data */
2236 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002237 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002238 if (remaining == NULL)
2239 goto error;
2240 }
2241 Py_CLEAR(line);
2242 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002243 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002244 }
2245
2246 if (line != NULL) {
2247 /* Our line ends in the current buffer */
2248 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002249 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2250 PyObject *s = PyUnicode_Substring(line, start, endpos);
2251 Py_CLEAR(line);
2252 if (s == NULL)
2253 goto error;
2254 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255 }
2256 }
2257 if (remaining != NULL) {
2258 if (chunks == NULL) {
2259 chunks = PyList_New(0);
2260 if (chunks == NULL)
2261 goto error;
2262 }
2263 if (PyList_Append(chunks, remaining) < 0)
2264 goto error;
2265 Py_CLEAR(remaining);
2266 }
2267 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002268 if (line != NULL) {
2269 if (PyList_Append(chunks, line) < 0)
2270 goto error;
2271 Py_DECREF(line);
2272 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2274 if (line == NULL)
2275 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002276 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002277 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002278 if (line == NULL) {
2279 Py_INCREF(_PyIO_empty_str);
2280 line = _PyIO_empty_str;
2281 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282
2283 return line;
2284
2285 error:
2286 Py_XDECREF(chunks);
2287 Py_XDECREF(remaining);
2288 Py_XDECREF(line);
2289 return NULL;
2290}
2291
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002292/*[clinic input]
2293_io.TextIOWrapper.readline
2294 size: Py_ssize_t = -1
2295 /
2296[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002297
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002298static PyObject *
2299_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2300/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2301{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002302 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002303 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002304}
2305
2306/* Seek and Tell */
2307
2308typedef struct {
2309 Py_off_t start_pos;
2310 int dec_flags;
2311 int bytes_to_feed;
2312 int chars_to_skip;
2313 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002314} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315
2316/*
2317 To speed up cookie packing/unpacking, we store the fields in a temporary
2318 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2319 The following macros define at which offsets in the intermediary byte
2320 string the various CookieStruct fields will be stored.
2321 */
2322
2323#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2324
Christian Heimes743e0cd2012-10-17 23:52:17 +02002325#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326/* We want the least significant byte of start_pos to also be the least
2327 significant byte of the cookie, which means that in big-endian mode we
2328 must copy the fields in reverse order. */
2329
2330# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2331# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2332# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2333# define OFF_CHARS_TO_SKIP (sizeof(char))
2334# define OFF_NEED_EOF 0
2335
2336#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337/* Little-endian mode: the least significant byte of start_pos will
2338 naturally end up the least significant byte of the cookie. */
2339
2340# define OFF_START_POS 0
2341# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2342# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2343# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2344# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2345
2346#endif
2347
2348static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002349textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350{
2351 unsigned char buffer[COOKIE_BUF_LEN];
2352 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2353 if (cookieLong == NULL)
2354 return -1;
2355
2356 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002357 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358 Py_DECREF(cookieLong);
2359 return -1;
2360 }
2361 Py_DECREF(cookieLong);
2362
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002363 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2364 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2365 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2366 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2367 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368
2369 return 0;
2370}
2371
2372static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002373textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374{
2375 unsigned char buffer[COOKIE_BUF_LEN];
2376
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002377 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2378 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2379 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2380 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2381 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382
Christian Heimes743e0cd2012-10-17 23:52:17 +02002383 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2384 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386
2387static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002388_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389{
2390 PyObject *res;
2391 /* When seeking to the start of the stream, we call decoder.reset()
2392 rather than decoder.getstate().
2393 This is for a few decoders such as utf-16 for which the state value
2394 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2395 utf-16, that we are expecting a BOM).
2396 */
2397 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
Petr Viktorinffd97532020-02-11 17:46:57 +01002398 res = PyObject_CallMethodNoArgs(self->decoder, _PyIO_str_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002400 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2401 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402 if (res == NULL)
2403 return -1;
2404 Py_DECREF(res);
2405 return 0;
2406}
2407
Antoine Pitroue4501852009-05-14 18:55:55 +00002408static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002409_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002410{
2411 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002412 if (start_of_stream) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002413 res = PyObject_CallMethodNoArgs(self->encoder, _PyIO_str_reset);
Antoine Pitroue4501852009-05-14 18:55:55 +00002414 self->encoding_start_of_stream = 1;
2415 }
2416 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01002417 res = PyObject_CallMethodOneArg(self->encoder, _PyIO_str_setstate,
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002418 _PyLong_Zero);
Antoine Pitroue4501852009-05-14 18:55:55 +00002419 self->encoding_start_of_stream = 0;
2420 }
2421 if (res == NULL)
2422 return -1;
2423 Py_DECREF(res);
2424 return 0;
2425}
2426
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002427static int
2428_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2429{
2430 /* Same as _textiowrapper_decoder_setstate() above. */
2431 return _textiowrapper_encoder_reset(
2432 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2433}
2434
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002435/*[clinic input]
2436_io.TextIOWrapper.seek
2437 cookie as cookieObj: object
2438 whence: int = 0
2439 /
2440[clinic start generated code]*/
2441
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002443_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2444/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002446 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002447 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448 PyObject *res;
2449 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002450 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002452 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453 CHECK_CLOSED(self);
2454
2455 Py_INCREF(cookieObj);
2456
2457 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002458 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459 goto fail;
2460 }
2461
ngie-eign848037c2019-03-02 23:28:26 -08002462 switch (whence) {
2463 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002465 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466 if (cmp < 0)
2467 goto fail;
2468
2469 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002470 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471 goto fail;
2472 }
2473
2474 /* Seeking to the current position should attempt to
2475 * sync the underlying buffer with the current position.
2476 */
2477 Py_DECREF(cookieObj);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002478 cookieObj = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479 if (cookieObj == NULL)
2480 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002481 break;
2482
ngie-eign848037c2019-03-02 23:28:26 -08002483 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002485 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 if (cmp < 0)
2487 goto fail;
2488
2489 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002490 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 goto fail;
2492 }
2493
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002494 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 if (res == NULL)
2496 goto fail;
2497 Py_DECREF(res);
2498
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002499 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500 Py_CLEAR(self->snapshot);
2501 if (self->decoder) {
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002502 res = _PyObject_CallMethodIdNoArgs(self->decoder, &PyId_reset);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 if (res == NULL)
2504 goto fail;
2505 Py_DECREF(res);
2506 }
2507
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002508 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002509 Py_CLEAR(cookieObj);
2510 if (res == NULL)
2511 goto fail;
2512 if (self->encoder) {
2513 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002514 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002515 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2516 Py_DECREF(res);
2517 goto fail;
2518 }
2519 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002521
ngie-eign848037c2019-03-02 23:28:26 -08002522 case SEEK_SET:
2523 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002524
ngie-eign848037c2019-03-02 23:28:26 -08002525 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002527 "invalid whence (%d, should be %d, %d or %d)", whence,
2528 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529 goto fail;
2530 }
2531
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002532 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 if (cmp < 0)
2534 goto fail;
2535
2536 if (cmp == 1) {
2537 PyErr_Format(PyExc_ValueError,
2538 "negative seek position %R", cookieObj);
2539 goto fail;
2540 }
2541
Petr Viktorinffd97532020-02-11 17:46:57 +01002542 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543 if (res == NULL)
2544 goto fail;
2545 Py_DECREF(res);
2546
2547 /* The strategy of seek() is to go back to the safe start point
2548 * and replay the effect of read(chars_to_skip) from there.
2549 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002550 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 goto fail;
2552
2553 /* Seek back to the safe start point. */
2554 posobj = PyLong_FromOff_t(cookie.start_pos);
2555 if (posobj == NULL)
2556 goto fail;
Petr Viktorinffd97532020-02-11 17:46:57 +01002557 res = PyObject_CallMethodOneArg(self->buffer, _PyIO_str_seek, posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558 Py_DECREF(posobj);
2559 if (res == NULL)
2560 goto fail;
2561 Py_DECREF(res);
2562
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002563 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564 Py_CLEAR(self->snapshot);
2565
2566 /* Restore the decoder to its state from the safe start point. */
2567 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002568 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569 goto fail;
2570 }
2571
2572 if (cookie.chars_to_skip) {
2573 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002574 PyObject *input_chunk = _PyObject_CallMethodId(
2575 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576 PyObject *decoded;
2577
2578 if (input_chunk == NULL)
2579 goto fail;
2580
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002581 if (!PyBytes_Check(input_chunk)) {
2582 PyErr_Format(PyExc_TypeError,
2583 "underlying read() should have returned a bytes "
2584 "object, not '%.200s'",
2585 Py_TYPE(input_chunk)->tp_name);
2586 Py_DECREF(input_chunk);
2587 goto fail;
2588 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002590 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2591 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592 goto fail;
2593 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002594 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002596 decoded = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_decode,
2597 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002598
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002599 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600 goto fail;
2601
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002602 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603
2604 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002605 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002606 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607 goto fail;
2608 }
2609 self->decoded_chars_used = cookie.chars_to_skip;
2610 }
2611 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002612 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2613 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002615 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002616 }
2617
Antoine Pitroue4501852009-05-14 18:55:55 +00002618 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2619 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002620 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002621 goto fail;
2622 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002623 return cookieObj;
2624 fail:
2625 Py_XDECREF(cookieObj);
2626 return NULL;
2627
2628}
2629
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002630/*[clinic input]
2631_io.TextIOWrapper.tell
2632[clinic start generated code]*/
2633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002635_io_TextIOWrapper_tell_impl(textio *self)
2636/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637{
2638 PyObject *res;
2639 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002640 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002641 PyObject *next_input;
2642 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002643 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 PyObject *saved_state = NULL;
Serhiy Storchaka8f87eef2020-04-12 14:58:27 +03002645 const char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002646 Py_ssize_t dec_buffer_len;
2647 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002649 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002650 CHECK_CLOSED(self);
2651
2652 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002653 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654 goto fail;
2655 }
2656 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002657 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002658 "telling position disabled by next() call");
2659 goto fail;
2660 }
2661
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002664 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665 if (res == NULL)
2666 goto fail;
2667 Py_DECREF(res);
2668
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002669 posobj = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_tell);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002670 if (posobj == NULL)
2671 goto fail;
2672
2673 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002674 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675 return posobj;
2676 }
2677
2678#if defined(HAVE_LARGEFILE_SUPPORT)
2679 cookie.start_pos = PyLong_AsLongLong(posobj);
2680#else
2681 cookie.start_pos = PyLong_AsLong(posobj);
2682#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002683 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002684 if (PyErr_Occurred())
2685 goto fail;
2686
2687 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002688 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002689 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 goto fail;
2691
2692 assert (PyBytes_Check(next_input));
2693
2694 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2695
2696 /* How many decoded characters have been used up since the snapshot? */
2697 if (self->decoded_chars_used == 0) {
2698 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002699 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 }
2701
2702 chars_to_skip = self->decoded_chars_used;
2703
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002704 /* Decoder state will be restored at the end */
Petr Viktorinffd97532020-02-11 17:46:57 +01002705 saved_state = PyObject_CallMethodNoArgs(self->decoder,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002706 _PyIO_str_getstate);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 if (saved_state == NULL)
2708 goto fail;
2709
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002710#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002711 PyObject *dec_buffer; \
Petr Viktorinffd97532020-02-11 17:46:57 +01002712 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002713 _PyIO_str_getstate); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002714 if (_state == NULL) \
2715 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002716 if (!PyTuple_Check(_state)) { \
2717 PyErr_SetString(PyExc_TypeError, \
2718 "illegal decoder state"); \
2719 Py_DECREF(_state); \
2720 goto fail; \
2721 } \
2722 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2723 &dec_buffer, &dec_flags)) \
2724 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002725 Py_DECREF(_state); \
2726 goto fail; \
2727 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002728 if (!PyBytes_Check(dec_buffer)) { \
2729 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002730 "illegal decoder state: the first item should be a " \
2731 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002732 Py_TYPE(dec_buffer)->tp_name); \
2733 Py_DECREF(_state); \
2734 goto fail; \
2735 } \
2736 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002737 Py_DECREF(_state); \
2738 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002739
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002740#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002741 PyObject *_decoded = _PyObject_CallMethodId( \
2742 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002743 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002744 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002745 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002746 Py_DECREF(_decoded); \
2747 } while (0)
2748
2749 /* Fast search for an acceptable start point, close to our
2750 current pos */
2751 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2752 skip_back = 1;
2753 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2754 input = PyBytes_AS_STRING(next_input);
2755 while (skip_bytes > 0) {
2756 /* Decode up to temptative start point */
2757 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2758 goto fail;
2759 DECODER_DECODE(input, skip_bytes, chars_decoded);
2760 if (chars_decoded <= chars_to_skip) {
2761 DECODER_GETSTATE();
2762 if (dec_buffer_len == 0) {
2763 /* Before pos and no bytes buffered in decoder => OK */
2764 cookie.dec_flags = dec_flags;
2765 chars_to_skip -= chars_decoded;
2766 break;
2767 }
2768 /* Skip back by buffered amount and reset heuristic */
2769 skip_bytes -= dec_buffer_len;
2770 skip_back = 1;
2771 }
2772 else {
2773 /* We're too far ahead, skip back a bit */
2774 skip_bytes -= skip_back;
2775 skip_back *= 2;
2776 }
2777 }
2778 if (skip_bytes <= 0) {
2779 skip_bytes = 0;
2780 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2781 goto fail;
2782 }
2783
2784 /* Note our initial start point. */
2785 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002786 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002787 if (chars_to_skip == 0)
2788 goto finally;
2789
2790 /* We should be close to the desired position. Now feed the decoder one
2791 * byte at a time until we reach the `chars_to_skip` target.
2792 * As we go, note the nearest "safe start point" before the current
2793 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002794 * can safely start from there and advance to this location).
2795 */
2796 chars_decoded = 0;
2797 input = PyBytes_AS_STRING(next_input);
2798 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002799 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002800 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002801 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002802
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002803 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002804 /* We got n chars for 1 byte */
2805 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002806 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002807 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002808
2809 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2810 /* Decoder buffer is empty, so this is a safe start point. */
2811 cookie.start_pos += cookie.bytes_to_feed;
2812 chars_to_skip -= chars_decoded;
2813 cookie.dec_flags = dec_flags;
2814 cookie.bytes_to_feed = 0;
2815 chars_decoded = 0;
2816 }
2817 if (chars_decoded >= chars_to_skip)
2818 break;
2819 input++;
2820 }
2821 if (input == input_end) {
2822 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002823 PyObject *decoded = _PyObject_CallMethodId(
Serhiy Storchaka1f21eaa2019-09-01 12:16:51 +03002824 self->decoder, &PyId_decode, "yO", "", /* final = */ Py_True);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002825 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002826 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002827 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002828 Py_DECREF(decoded);
2829 cookie.need_eof = 1;
2830
2831 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002832 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002833 "can't reconstruct logical file position");
2834 goto fail;
2835 }
2836 }
2837
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002838finally:
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002839 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002840 Py_DECREF(saved_state);
2841 if (res == NULL)
2842 return NULL;
2843 Py_DECREF(res);
2844
2845 /* The returned cookie corresponds to the last safe start point. */
2846 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002847 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002848
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002849fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002850 if (saved_state) {
2851 PyObject *type, *value, *traceback;
2852 PyErr_Fetch(&type, &value, &traceback);
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02002853 res = _PyObject_CallMethodIdOneArg(self->decoder, &PyId_setstate, saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002854 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002855 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002856 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002857 }
2858 return NULL;
2859}
2860
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002861/*[clinic input]
2862_io.TextIOWrapper.truncate
2863 pos: object = None
2864 /
2865[clinic start generated code]*/
2866
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002867static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002868_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2869/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002870{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002871 PyObject *res;
2872
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002873 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874
Petr Viktorinffd97532020-02-11 17:46:57 +01002875 res = PyObject_CallMethodNoArgs((PyObject *)self, _PyIO_str_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002876 if (res == NULL)
2877 return NULL;
2878 Py_DECREF(res);
2879
Petr Viktorinffd97532020-02-11 17:46:57 +01002880 return PyObject_CallMethodOneArg(self->buffer, _PyIO_str_truncate, pos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002881}
2882
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002883static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002884textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002885{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002886 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002887 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002888
2889 CHECK_INITIALIZED(self);
2890
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002891 res = PyUnicode_FromString("<_io.TextIOWrapper");
2892 if (res == NULL)
2893 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002894
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002895 status = Py_ReprEnter((PyObject *)self);
2896 if (status != 0) {
2897 if (status > 0) {
2898 PyErr_Format(PyExc_RuntimeError,
2899 "reentrant call inside %s.__repr__",
2900 Py_TYPE(self)->tp_name);
2901 }
2902 goto error;
2903 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002904 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_name, &nameobj) < 0) {
2905 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002906 goto error;
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002907 }
2908 /* Ignore ValueError raised if the underlying stream was detached */
2909 PyErr_Clear();
Antoine Pitrou716c4442009-05-23 19:04:03 +00002910 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002911 if (nameobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002912 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002913 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002914 if (s == NULL)
2915 goto error;
2916 PyUnicode_AppendAndDel(&res, s);
2917 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002918 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002919 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002920 if (_PyObject_LookupAttrId((PyObject *) self, &PyId_mode, &modeobj) < 0) {
2921 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002922 }
Serhiy Storchakab235a1b2019-08-29 09:25:22 +03002923 if (modeobj != NULL) {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002924 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2925 Py_DECREF(modeobj);
2926 if (s == NULL)
2927 goto error;
2928 PyUnicode_AppendAndDel(&res, s);
2929 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002930 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002931 }
2932 s = PyUnicode_FromFormat("%U encoding=%R>",
2933 res, self->encoding);
2934 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002935 if (status == 0) {
2936 Py_ReprLeave((PyObject *)self);
2937 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002938 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002939
2940 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002941 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002942 if (status == 0) {
2943 Py_ReprLeave((PyObject *)self);
2944 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002945 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002946}
2947
2948
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002949/* Inquiries */
2950
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002951/*[clinic input]
2952_io.TextIOWrapper.fileno
2953[clinic start generated code]*/
2954
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002955static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002956_io_TextIOWrapper_fileno_impl(textio *self)
2957/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002958{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002959 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002960 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002961}
2962
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002963/*[clinic input]
2964_io.TextIOWrapper.seekable
2965[clinic start generated code]*/
2966
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002967static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002968_io_TextIOWrapper_seekable_impl(textio *self)
2969/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002970{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002971 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002972 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_seekable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973}
2974
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002975/*[clinic input]
2976_io.TextIOWrapper.readable
2977[clinic start generated code]*/
2978
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002979static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002980_io_TextIOWrapper_readable_impl(textio *self)
2981/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002982{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002983 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002984 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_readable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002985}
2986
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002987/*[clinic input]
2988_io.TextIOWrapper.writable
2989[clinic start generated code]*/
2990
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002991static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002992_io_TextIOWrapper_writable_impl(textio *self)
2993/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002995 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02002996 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_writable);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002997}
2998
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002999/*[clinic input]
3000_io.TextIOWrapper.isatty
3001[clinic start generated code]*/
3002
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003003static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003004_io_TextIOWrapper_isatty_impl(textio *self)
3005/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003006{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003007 CHECK_ATTACHED(self);
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003008 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_isatty);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009}
3010
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003011/*[clinic input]
3012_io.TextIOWrapper.flush
3013[clinic start generated code]*/
3014
Antoine Pitrou243757e2010-11-05 21:15:39 +00003015static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003016_io_TextIOWrapper_flush_impl(textio *self)
3017/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003018{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003019 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003020 CHECK_CLOSED(self);
3021 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003022 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003023 return NULL;
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003024 return _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_flush);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003025}
3026
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003027/*[clinic input]
3028_io.TextIOWrapper.close
3029[clinic start generated code]*/
3030
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003031static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003032_io_TextIOWrapper_close_impl(textio *self)
3033/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003034{
3035 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003036 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003037 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003038
Antoine Pitrou6be88762010-05-03 16:48:20 +00003039 res = textiowrapper_closed_get(self, NULL);
3040 if (res == NULL)
3041 return NULL;
3042 r = PyObject_IsTrue(res);
3043 Py_DECREF(res);
3044 if (r < 0)
3045 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00003046
Antoine Pitrou6be88762010-05-03 16:48:20 +00003047 if (r > 0) {
3048 Py_RETURN_NONE; /* stream already closed */
3049 }
3050 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06003051 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02003052 if (self->finalizing) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02003053 res = _PyObject_CallMethodIdOneArg(self->buffer,
3054 &PyId__dealloc_warn,
3055 (PyObject *)self);
Antoine Pitroue033e062010-10-29 10:38:18 +00003056 if (res)
3057 Py_DECREF(res);
3058 else
3059 PyErr_Clear();
3060 }
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003061 res = _PyObject_CallMethodIdNoArgs((PyObject *)self, &PyId_flush);
Benjamin Peterson68623612012-12-20 11:53:11 -06003062 if (res == NULL)
3063 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00003064 else
3065 Py_DECREF(res);
3066
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003067 res = _PyObject_CallMethodIdNoArgs(self->buffer, &PyId_close);
Benjamin Peterson68623612012-12-20 11:53:11 -06003068 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03003069 _PyErr_ChainExceptions(exc, val, tb);
3070 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06003071 }
3072 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00003073 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003074}
3075
3076static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003077textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003078{
3079 PyObject *line;
3080
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003081 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003082
3083 self->telling = 0;
Andy Lesterdffe4c02020-03-04 07:15:20 -06003084 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003085 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003086 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003087 }
3088 else {
Petr Viktorinffd97532020-02-11 17:46:57 +01003089 line = PyObject_CallMethodNoArgs((PyObject *)self,
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003090 _PyIO_str_readline);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003091 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03003092 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03003093 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003094 "not '%.200s'", Py_TYPE(line)->tp_name);
3095 Py_DECREF(line);
3096 return NULL;
3097 }
3098 }
3099
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003100 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003101 return NULL;
3102
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02003103 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003104 /* Reached EOF or would have blocked */
3105 Py_DECREF(line);
3106 Py_CLEAR(self->snapshot);
3107 self->telling = self->seekable;
3108 return NULL;
3109 }
3110
3111 return line;
3112}
3113
3114static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003115textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003116{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003117 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003118 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003119}
3120
3121static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003122textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003123{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003124 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003125 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3126}
3127
3128static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003129textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003130{
3131 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003132 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003133 if (self->decoder == NULL ||
3134 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3135 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003136 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003137 }
3138 return res;
3139}
3140
3141static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003142textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003143{
3144 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003145 Py_INCREF(self->errors);
3146 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003147}
3148
3149static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003150textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003151{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003152 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003153 return PyLong_FromSsize_t(self->chunk_size);
3154}
3155
3156static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003157textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003158{
3159 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003160 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003161 if (arg == NULL) {
3162 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3163 return -1;
3164 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003165 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003166 if (n == -1 && PyErr_Occurred())
3167 return -1;
3168 if (n <= 0) {
3169 PyErr_SetString(PyExc_ValueError,
3170 "a strictly positive integer is required");
3171 return -1;
3172 }
3173 self->chunk_size = n;
3174 return 0;
3175}
3176
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003177#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003178
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003179static PyMethodDef incrementalnewlinedecoder_methods[] = {
3180 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3181 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3182 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3183 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3184 {NULL}
3185};
3186
3187static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3188 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3189 {NULL}
3190};
3191
3192PyTypeObject PyIncrementalNewlineDecoder_Type = {
3193 PyVarObject_HEAD_INIT(NULL, 0)
3194 "_io.IncrementalNewlineDecoder", /*tp_name*/
3195 sizeof(nldecoder_object), /*tp_basicsize*/
3196 0, /*tp_itemsize*/
3197 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003198 0, /*tp_vectorcall_offset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003199 0, /*tp_getattr*/
3200 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003201 0, /*tp_as_async*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003202 0, /*tp_repr*/
3203 0, /*tp_as_number*/
3204 0, /*tp_as_sequence*/
3205 0, /*tp_as_mapping*/
3206 0, /*tp_hash */
3207 0, /*tp_call*/
3208 0, /*tp_str*/
3209 0, /*tp_getattro*/
3210 0, /*tp_setattro*/
3211 0, /*tp_as_buffer*/
3212 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3213 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3214 0, /* tp_traverse */
3215 0, /* tp_clear */
3216 0, /* tp_richcompare */
3217 0, /*tp_weaklistoffset*/
3218 0, /* tp_iter */
3219 0, /* tp_iternext */
3220 incrementalnewlinedecoder_methods, /* tp_methods */
3221 0, /* tp_members */
3222 incrementalnewlinedecoder_getset, /* tp_getset */
3223 0, /* tp_base */
3224 0, /* tp_dict */
3225 0, /* tp_descr_get */
3226 0, /* tp_descr_set */
3227 0, /* tp_dictoffset */
3228 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3229 0, /* tp_alloc */
3230 PyType_GenericNew, /* tp_new */
3231};
3232
3233
3234static PyMethodDef textiowrapper_methods[] = {
3235 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003236 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003237 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3238 _IO_TEXTIOWRAPPER_READ_METHODDEF
3239 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3240 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3241 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3242
3243 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3244 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3245 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3246 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3247 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003248
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003249 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3250 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3251 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003252 {NULL, NULL}
3253};
3254
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003255static PyMemberDef textiowrapper_members[] = {
3256 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3257 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3258 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003259 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003260 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003261 {NULL}
3262};
3263
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003264static PyGetSetDef textiowrapper_getset[] = {
3265 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3266 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003267/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3268*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003269 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3270 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3271 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3272 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003273 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003274};
3275
3276PyTypeObject PyTextIOWrapper_Type = {
3277 PyVarObject_HEAD_INIT(NULL, 0)
3278 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003279 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003280 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003281 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003282 0, /*tp_vectorcall_offset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003283 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003284 0, /*tps_etattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02003285 0, /*tp_as_async*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003286 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003287 0, /*tp_as_number*/
3288 0, /*tp_as_sequence*/
3289 0, /*tp_as_mapping*/
3290 0, /*tp_hash */
3291 0, /*tp_call*/
3292 0, /*tp_str*/
3293 0, /*tp_getattro*/
3294 0, /*tp_setattro*/
3295 0, /*tp_as_buffer*/
3296 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrouada319b2019-05-29 22:12:38 +02003297 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003298 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003299 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3300 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003301 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003302 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003303 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003304 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3305 textiowrapper_methods, /* tp_methods */
3306 textiowrapper_members, /* tp_members */
3307 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003308 0, /* tp_base */
3309 0, /* tp_dict */
3310 0, /* tp_descr_get */
3311 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003312 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003313 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003314 0, /* tp_alloc */
3315 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003316 0, /* tp_free */
3317 0, /* tp_is_gc */
3318 0, /* tp_bases */
3319 0, /* tp_mro */
3320 0, /* tp_cache */
3321 0, /* tp_subclasses */
3322 0, /* tp_weaklist */
3323 0, /* tp_del */
3324 0, /* tp_version_tag */
3325 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003326};