blob: 8c391659ecd8672929918134644d4cdb4cec275d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012#include "structmember.h"
13#include "_iomodule.h"
14
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030015/*[clinic input]
16module _io
17class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020022_Py_IDENTIFIER(close);
23_Py_IDENTIFIER(_dealloc_warn);
24_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(fileno);
26_Py_IDENTIFIER(flush);
27_Py_IDENTIFIER(getpreferredencoding);
28_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020029_Py_IDENTIFIER(mode);
30_Py_IDENTIFIER(name);
31_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020032_Py_IDENTIFIER(read);
33_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053070textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_print*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_compare */
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
187 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000188 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 0, /* tp_traverse */
190 0, /* tp_clear */
191 0, /* tp_richcompare */
192 0, /* tp_weaklistoffset */
193 0, /* tp_iter */
194 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000195 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 &PyIOBase_Type, /* tp_base */
199 0, /* tp_dict */
200 0, /* tp_descr_get */
201 0, /* tp_descr_set */
202 0, /* tp_dictoffset */
203 0, /* tp_init */
204 0, /* tp_alloc */
205 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200206 0, /* tp_free */
207 0, /* tp_is_gc */
208 0, /* tp_bases */
209 0, /* tp_mro */
210 0, /* tp_cache */
211 0, /* tp_subclasses */
212 0, /* tp_weaklist */
213 0, /* tp_del */
214 0, /* tp_version_tag */
215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000216};
217
218
219/* IncrementalNewlineDecoder */
220
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200225 unsigned int pendingcr: 1;
226 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300230/*[clinic input]
231_io.IncrementalNewlineDecoder.__init__
232 decoder: object
233 translate: int
234 errors: object(c_default="NULL") = "strict"
235
236Codec used when reading a file in universal newlines mode.
237
238It wraps another incremental decoder, translating \r\n and \r into \n.
239It also records the types of newlines encountered. When used with
240translate=False, it ensures that the newline sequence is returned in
241one piece. When used with decoder=None, it expects unicode strings as
242decode input and translates newlines without first invoking an external
243decoder.
244[clinic start generated code]*/
245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300247_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248 PyObject *decoder, int translate,
249 PyObject *errors)
250/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252 self->decoder = decoder;
253 Py_INCREF(decoder);
254
255 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900256 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 if (self->errors == NULL)
258 return -1;
259 }
260 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 self->errors = errors;
262 }
INADA Naoki507434f2017-12-21 09:59:53 +0900263 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264
Xiang Zhangb08746b2018-10-31 19:49:16 +0800265 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266 self->seennl = 0;
267 self->pendingcr = 0;
268
269 return 0;
270}
271
272static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000273incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000274{
275 Py_CLEAR(self->decoder);
276 Py_CLEAR(self->errors);
277 Py_TYPE(self)->tp_free((PyObject *)self);
278}
279
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200280static int
281check_decoded(PyObject *decoded)
282{
283 if (decoded == NULL)
284 return -1;
285 if (!PyUnicode_Check(decoded)) {
286 PyErr_Format(PyExc_TypeError,
287 "decoder should return a string result, not '%.200s'",
288 Py_TYPE(decoded)->tp_name);
289 Py_DECREF(decoded);
290 return -1;
291 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200292 if (PyUnicode_READY(decoded) < 0) {
293 Py_DECREF(decoded);
294 return -1;
295 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200296 return 0;
297}
298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299#define SEEN_CR 1
300#define SEEN_LF 2
301#define SEEN_CRLF 4
302#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
303
304PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200305_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306 PyObject *input, int final)
307{
308 PyObject *output;
309 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200310 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311
312 if (self->decoder == NULL) {
313 PyErr_SetString(PyExc_ValueError,
314 "IncrementalNewlineDecoder.__init__ not called");
315 return NULL;
316 }
317
318 /* decode input (with the eventual \r from a previous pass) */
319 if (self->decoder != Py_None) {
320 output = PyObject_CallMethodObjArgs(self->decoder,
321 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
322 }
323 else {
324 output = input;
325 Py_INCREF(output);
326 }
327
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200328 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 return NULL;
330
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 /* Prefix output with CR */
334 int kind;
335 PyObject *modified;
336 char *out;
337
338 modified = PyUnicode_New(output_len + 1,
339 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (modified == NULL)
341 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 kind = PyUnicode_KIND(modified);
343 out = PyUnicode_DATA(modified);
344 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 self->pendingcr = 0;
349 output_len++;
350 }
351
352 /* retain last \r even when not translating data:
353 * then readline() is sure to get \r\n in one pass
354 */
355 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000356 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
358 {
359 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
360 if (modified == NULL)
361 goto error;
362 Py_DECREF(output);
363 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 self->pendingcr = 1;
365 }
366 }
367
368 /* Record which newlines are read and do newline translation if desired,
369 all in one pass. */
370 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 Py_ssize_t len;
373 int seennl = self->seennl;
374 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 in_str = PyUnicode_DATA(output);
378 len = PyUnicode_GET_LENGTH(output);
379 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
381 if (len == 0)
382 return output;
383
384 /* If, up to now, newlines are consistently \n, do a quick check
385 for the \r *byte* with the libc's optimized memchr.
386 */
387 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200388 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 }
390
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 if (only_lf) {
392 /* If not already seen, quick scan for a possible "\n" character.
393 (there's nothing else to be done, even when in translation mode)
394 */
395 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200396 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100397 if (kind == PyUnicode_1BYTE_KIND)
398 seennl |= SEEN_LF;
399 else {
400 Py_ssize_t i = 0;
401 for (;;) {
402 Py_UCS4 c;
403 /* Fast loop for non-control characters */
404 while (PyUnicode_READ(kind, in_str, i) > '\n')
405 i++;
406 c = PyUnicode_READ(kind, in_str, i++);
407 if (c == '\n') {
408 seennl |= SEEN_LF;
409 break;
410 }
411 if (i >= len)
412 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
415 }
416 /* Finished: we have scanned for newlines, and none of them
417 need translating */
418 }
419 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000421 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 if (seennl == SEEN_ALL)
423 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200425 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 while (PyUnicode_READ(kind, in_str, i) > '\r')
428 i++;
429 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 if (c == '\n')
431 seennl |= SEEN_LF;
432 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 }
437 else
438 seennl |= SEEN_CR;
439 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 break;
442 if (seennl == SEEN_ALL)
443 break;
444 }
445 endscan:
446 ;
447 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000448 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 void *translated;
450 int kind = PyUnicode_KIND(output);
451 void *in_str = PyUnicode_DATA(output);
452 Py_ssize_t in, out;
453 /* XXX: Previous in-place translation here is disabled as
454 resizing is not possible anymore */
455 /* We could try to optimize this so that we only do a copy
456 when there is something to translate. On the other hand,
457 we already know there is a \r byte, so chances are high
458 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200459 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 if (translated == NULL) {
461 PyErr_NoMemory();
462 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
469 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 seennl |= SEEN_LF;
473 continue;
474 }
475 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 in++;
478 seennl |= SEEN_CRLF;
479 }
480 else
481 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 continue;
484 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 Py_DECREF(output);
490 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100491 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200492 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200493 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
495 self->seennl |= seennl;
496 }
497
498 return output;
499
500 error:
501 Py_DECREF(output);
502 return NULL;
503}
504
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300505/*[clinic input]
506_io.IncrementalNewlineDecoder.decode
507 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200508 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300509[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511static PyObject *
512_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
513 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200514/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300515{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
517}
518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300519/*[clinic input]
520_io.IncrementalNewlineDecoder.getstate
521[clinic start generated code]*/
522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300524_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
525/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000526{
527 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700528 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 if (self->decoder != Py_None) {
531 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
532 _PyIO_str_getstate, NULL);
533 if (state == NULL)
534 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300535 if (!PyTuple_Check(state)) {
536 PyErr_SetString(PyExc_TypeError,
537 "illegal decoder state");
538 Py_DECREF(state);
539 return NULL;
540 }
541 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
542 &buffer, &flag))
543 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 Py_DECREF(state);
545 return NULL;
546 }
547 Py_INCREF(buffer);
548 Py_DECREF(state);
549 }
550 else {
551 buffer = PyBytes_FromString("");
552 flag = 0;
553 }
554 flag <<= 1;
555 if (self->pendingcr)
556 flag |= 1;
557 return Py_BuildValue("NK", buffer, flag);
558}
559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300560/*[clinic input]
561_io.IncrementalNewlineDecoder.setstate
562 state: object
563 /
564[clinic start generated code]*/
565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300567_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
568 PyObject *state)
569/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700572 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Oren Milman1d1d3e92017-08-20 18:35:36 +0300574 if (!PyTuple_Check(state)) {
575 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300577 }
578 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579 &buffer, &flag))
580 {
581 return NULL;
582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583
Victor Stinner7d7e7752014-06-17 23:31:25 +0200584 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 flag >>= 1;
586
587 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200588 return _PyObject_CallMethodId(self->decoder,
589 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 else
591 Py_RETURN_NONE;
592}
593
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300594/*[clinic input]
595_io.IncrementalNewlineDecoder.reset
596[clinic start generated code]*/
597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300599_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
600/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601{
602 self->seennl = 0;
603 self->pendingcr = 0;
604 if (self->decoder != Py_None)
605 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
606 else
607 Py_RETURN_NONE;
608}
609
610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612{
613 switch (self->seennl) {
614 case SEEN_CR:
615 return PyUnicode_FromString("\r");
616 case SEEN_LF:
617 return PyUnicode_FromString("\n");
618 case SEEN_CRLF:
619 return PyUnicode_FromString("\r\n");
620 case SEEN_CR | SEEN_LF:
621 return Py_BuildValue("ss", "\r", "\n");
622 case SEEN_CR | SEEN_CRLF:
623 return Py_BuildValue("ss", "\r", "\r\n");
624 case SEEN_LF | SEEN_CRLF:
625 return Py_BuildValue("ss", "\n", "\r\n");
626 case SEEN_CR | SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("sss", "\r", "\n", "\r\n");
628 default:
629 Py_RETURN_NONE;
630 }
631
632}
633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634/* TextIOWrapper */
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636typedef PyObject *
637 (*encodefunc_t)(PyObject *, PyObject *);
638
639typedef struct
640{
641 PyObject_HEAD
642 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000643 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 Py_ssize_t chunk_size;
645 PyObject *buffer;
646 PyObject *encoding;
647 PyObject *encoder;
648 PyObject *decoder;
649 PyObject *readnl;
650 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900651 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200653 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char readuniversal;
655 char readtranslate;
656 char writetranslate;
657 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200658 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200660 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000680
Oren Milman13614e32017-08-24 19:51:24 +0300681 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000686 PyObject *snapshot;
687 /* Bytes-to-characters ratio for the current chunk. Serves as input for
688 the heuristic in tell(). */
689 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 /* Cache raw object if it's a FileIO object */
692 PyObject *raw;
693
694 PyObject *weakreflist;
695 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000696} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
Zackery Spytz23db9352018-06-29 04:14:58 -0600698static void
699textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
700
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701/* A couple of specialized cases in order to bypass the slow incremental
702 encoding methods for the most popular encodings. */
703
704static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000705ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706{
INADA Naoki507434f2017-12-21 09:59:53 +0900707 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708}
709
710static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100713 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900714 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715}
716
717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000718utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100720 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900721 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitroue4501852009-05-14 18:55:55 +0000727 if (!self->encoding_start_of_stream) {
728 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200729#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900736 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
Antoine Pitroue4501852009-05-14 18:55:55 +0000739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000741{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100742 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900743 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900750 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 if (!self->encoding_start_of_stream) {
757 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200758#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 return utf32be_encode(self, text);
760#else
761 return utf32le_encode(self, text);
762#endif
763 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900765 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770{
INADA Naoki507434f2017-12-21 09:59:53 +0900771 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776{
INADA Naoki507434f2017-12-21 09:59:53 +0900777 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778}
779
780/* Map normalized encoding names onto the specialized encoding funcs */
781
782typedef struct {
783 const char *name;
784 encodefunc_t encodefunc;
785} encodefuncentry;
786
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200787static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 {"ascii", (encodefunc_t) ascii_encode},
789 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000790 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791 {"utf-16-be", (encodefunc_t) utf16be_encode},
792 {"utf-16-le", (encodefunc_t) utf16le_encode},
793 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000794 {"utf-32-be", (encodefunc_t) utf32be_encode},
795 {"utf-32-le", (encodefunc_t) utf32le_encode},
796 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {NULL, NULL}
798};
799
INADA Naoki507434f2017-12-21 09:59:53 +0900800static int
801validate_newline(const char *newline)
802{
803 if (newline && newline[0] != '\0'
804 && !(newline[0] == '\n' && newline[1] == '\0')
805 && !(newline[0] == '\r' && newline[1] == '\0')
806 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
807 PyErr_Format(PyExc_ValueError,
808 "illegal newline value: %s", newline);
809 return -1;
810 }
811 return 0;
812}
813
814static int
815set_newline(textio *self, const char *newline)
816{
817 PyObject *old = self->readnl;
818 if (newline == NULL) {
819 self->readnl = NULL;
820 }
821 else {
822 self->readnl = PyUnicode_FromString(newline);
823 if (self->readnl == NULL) {
824 self->readnl = old;
825 return -1;
826 }
827 }
828 self->readuniversal = (newline == NULL || newline[0] == '\0');
829 self->readtranslate = (newline == NULL);
830 self->writetranslate = (newline == NULL || newline[0] != '\0');
831 if (!self->readuniversal && self->readnl != NULL) {
832 // validate_newline() accepts only ASCII newlines.
833 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
834 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
835 if (strcmp(self->writenl, "\n") == 0) {
836 self->writenl = NULL;
837 }
838 }
839 else {
840#ifdef MS_WINDOWS
841 self->writenl = "\r\n";
842#else
843 self->writenl = NULL;
844#endif
845 }
846 Py_XDECREF(old);
847 return 0;
848}
849
850static int
851_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
852 const char *errors)
853{
854 PyObject *res;
855 int r;
856
857 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
858 if (res == NULL)
859 return -1;
860
861 r = PyObject_IsTrue(res);
862 Py_DECREF(res);
863 if (r == -1)
864 return -1;
865
866 if (r != 1)
867 return 0;
868
869 Py_CLEAR(self->decoder);
870 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
871 if (self->decoder == NULL)
872 return -1;
873
874 if (self->readuniversal) {
875 PyObject *incrementalDecoder = PyObject_CallFunction(
876 (PyObject *)&PyIncrementalNewlineDecoder_Type,
877 "Oi", self->decoder, (int)self->readtranslate);
878 if (incrementalDecoder == NULL)
879 return -1;
880 Py_CLEAR(self->decoder);
881 self->decoder = incrementalDecoder;
882 }
883
884 return 0;
885}
886
887static PyObject*
888_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
889{
890 PyObject *chars;
891
892 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
893 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
894 else
895 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
896 eof ? Py_True : Py_False, NULL);
897
898 if (check_decoded(chars) < 0)
899 // check_decoded already decreases refcount
900 return NULL;
901
902 return chars;
903}
904
905static int
906_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
907 const char *errors)
908{
909 PyObject *res;
910 int r;
911
912 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
913 if (res == NULL)
914 return -1;
915
916 r = PyObject_IsTrue(res);
917 Py_DECREF(res);
918 if (r == -1)
919 return -1;
920
921 if (r != 1)
922 return 0;
923
924 Py_CLEAR(self->encoder);
925 self->encodefunc = NULL;
926 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
927 if (self->encoder == NULL)
928 return -1;
929
930 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200931 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
932 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900933 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200934 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900935 const encodefuncentry *e = encodefuncs;
936 while (e->name != NULL) {
937 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
938 self->encodefunc = e->encodefunc;
939 break;
940 }
941 e++;
942 }
943 }
944 Py_XDECREF(res);
945
946 return 0;
947}
948
949static int
950_textiowrapper_fix_encoder_state(textio *self)
951{
952 if (!self->seekable || !self->encoder) {
953 return 0;
954 }
955
956 self->encoding_start_of_stream = 1;
957
958 PyObject *cookieObj = PyObject_CallMethodObjArgs(
959 self->buffer, _PyIO_str_tell, NULL);
960 if (cookieObj == NULL) {
961 return -1;
962 }
963
964 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
965 Py_DECREF(cookieObj);
966 if (cmp < 0) {
967 return -1;
968 }
969
970 if (cmp == 0) {
971 self->encoding_start_of_stream = 0;
972 PyObject *res = PyObject_CallMethodObjArgs(
973 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
974 if (res == NULL) {
975 return -1;
976 }
977 Py_DECREF(res);
978 }
979
980 return 0;
981}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000982
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300983/*[clinic input]
984_io.TextIOWrapper.__init__
985 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700986 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900987 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700988 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200989 line_buffering: bool(accept={int}) = False
990 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300992Character and line based layer over a BufferedIOBase object, buffer.
993
994encoding gives the name of the encoding that the stream will be
995decoded or encoded with. It defaults to locale.getpreferredencoding(False).
996
997errors determines the strictness of encoding and decoding (see
998help(codecs.Codec) or the documentation for codecs.register) and
999defaults to "strict".
1000
1001newline controls how line endings are handled. It can be None, '',
1002'\n', '\r', and '\r\n'. It works as follows:
1003
1004* On input, if newline is None, universal newlines mode is
1005 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1006 these are translated into '\n' before being returned to the
1007 caller. If it is '', universal newline mode is enabled, but line
1008 endings are returned to the caller untranslated. If it has any of
1009 the other legal values, input lines are only terminated by the given
1010 string, and the line ending is returned to the caller untranslated.
1011
1012* On output, if newline is None, any '\n' characters written are
1013 translated to the system default line separator, os.linesep. If
1014 newline is '' or '\n', no translation takes place. If newline is any
1015 of the other legal values, any '\n' characters written are translated
1016 to the given string.
1017
1018If line_buffering is True, a call to flush is implied when a call to
1019write contains a newline character.
1020[clinic start generated code]*/
1021
1022static int
1023_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001024 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001025 const char *newline, int line_buffering,
1026 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001027/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001028{
1029 PyObject *raw, *codec_info = NULL;
1030 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 PyObject *res;
1032 int r;
1033
1034 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001035 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036
INADA Naoki507434f2017-12-21 09:59:53 +09001037 if (errors == Py_None) {
1038 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001039 if (errors == NULL) {
1040 return -1;
1041 }
INADA Naoki507434f2017-12-21 09:59:53 +09001042 }
1043 else if (!PyUnicode_Check(errors)) {
1044 // Check 'errors' argument here because Argument Clinic doesn't support
1045 // 'str(accept={str, NoneType})' converter.
1046 PyErr_Format(
1047 PyExc_TypeError,
1048 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1049 errors->ob_type->tp_name);
1050 return -1;
1051 }
1052
1053 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 return -1;
1055 }
1056
1057 Py_CLEAR(self->buffer);
1058 Py_CLEAR(self->encoding);
1059 Py_CLEAR(self->encoder);
1060 Py_CLEAR(self->decoder);
1061 Py_CLEAR(self->readnl);
1062 Py_CLEAR(self->decoded_chars);
1063 Py_CLEAR(self->pending_bytes);
1064 Py_CLEAR(self->snapshot);
1065 Py_CLEAR(self->errors);
1066 Py_CLEAR(self->raw);
1067 self->decoded_chars_used = 0;
1068 self->pending_bytes_count = 0;
1069 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001070 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071
1072 if (encoding == NULL) {
1073 /* Try os.device_encoding(fileno) */
1074 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001075 state = IO_STATE();
1076 if (state == NULL)
1077 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001078 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 /* Ignore only AttributeError and UnsupportedOperation */
1080 if (fileno == NULL) {
1081 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1082 PyErr_ExceptionMatches(state->unsupported_operation)) {
1083 PyErr_Clear();
1084 }
1085 else {
1086 goto error;
1087 }
1088 }
1089 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001090 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001091 Py_DECREF(fileno);
1092 if (fd == -1 && PyErr_Occurred()) {
1093 goto error;
1094 }
1095
1096 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 if (self->encoding == NULL)
1098 goto error;
1099 else if (!PyUnicode_Check(self->encoding))
1100 Py_CLEAR(self->encoding);
1101 }
1102 }
1103 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001104 PyObject *locale_module = _PyIO_get_locale_module(state);
1105 if (locale_module == NULL)
1106 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001107 self->encoding = _PyObject_CallMethodIdObjArgs(
1108 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001109 Py_DECREF(locale_module);
1110 if (self->encoding == NULL) {
1111 catch_ImportError:
1112 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001113 Importing locale can raise an ImportError because of
1114 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001115 ImportError if _locale is not available. These will happen
1116 during module building.
1117 */
1118 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1119 PyErr_Clear();
1120 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001122 else
1123 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001125 else if (!PyUnicode_Check(self->encoding))
1126 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001128 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001129 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001130 if (encoding == NULL)
1131 goto error;
1132 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133 else if (encoding != NULL) {
1134 self->encoding = PyUnicode_FromString(encoding);
1135 if (self->encoding == NULL)
1136 goto error;
1137 }
1138 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001139 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001141 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 }
1143
Nick Coghlana9b15242014-02-04 22:11:18 +10001144 /* Check we have been asked for a real text encoding */
1145 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1146 if (codec_info == NULL) {
1147 Py_CLEAR(self->encoding);
1148 goto error;
1149 }
1150
1151 /* XXX: Failures beyond this point have the potential to leak elements
1152 * of the partially constructed object (like self->encoding)
1153 */
1154
INADA Naoki507434f2017-12-21 09:59:53 +09001155 Py_INCREF(errors);
1156 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001159 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001160 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 }
1163
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 self->buffer = buffer;
1165 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001166
INADA Naoki507434f2017-12-21 09:59:53 +09001167 /* Build the decoder object */
1168 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1169 goto error;
1170
1171 /* Build the encoder object */
1172 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1173 goto error;
1174
1175 /* Finished sorting out the codec details */
1176 Py_CLEAR(codec_info);
1177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1179 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001180 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1181 {
1182 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1183 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001185 if (raw != NULL) {
1186 if (Py_TYPE(raw) == &PyFileIO_Type)
1187 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001188 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001189 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001190 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 }
1192
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 if (res == NULL)
1195 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001196 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001198 if (r < 0)
1199 goto error;
1200 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001202 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1203 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001204 goto error;
1205 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001206 Py_XDECREF(res);
1207 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001208
Antoine Pitroue4501852009-05-14 18:55:55 +00001209 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001210 if (_textiowrapper_fix_encoder_state(self) < 0) {
1211 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001212 }
1213
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 self->ok = 1;
1215 return 0;
1216
1217 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001218 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 return -1;
1220}
1221
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001222/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1223 * -1 on error.
1224 */
1225static int
1226convert_optional_bool(PyObject *obj, int default_value)
1227{
1228 long v;
1229 if (obj == Py_None) {
1230 v = default_value;
1231 }
1232 else {
1233 v = PyLong_AsLong(obj);
1234 if (v == -1 && PyErr_Occurred())
1235 return -1;
1236 }
1237 return v != 0;
1238}
1239
INADA Naoki507434f2017-12-21 09:59:53 +09001240static int
1241textiowrapper_change_encoding(textio *self, PyObject *encoding,
1242 PyObject *errors, int newline_changed)
1243{
1244 /* Use existing settings where new settings are not specified */
1245 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1246 return 0; // no change
1247 }
1248
1249 if (encoding == Py_None) {
1250 encoding = self->encoding;
1251 if (errors == Py_None) {
1252 errors = self->errors;
1253 }
1254 }
1255 else if (errors == Py_None) {
1256 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001257 if (errors == NULL) {
1258 return -1;
1259 }
INADA Naoki507434f2017-12-21 09:59:53 +09001260 }
1261
1262 const char *c_errors = PyUnicode_AsUTF8(errors);
1263 if (c_errors == NULL) {
1264 return -1;
1265 }
1266
1267 // Create new encoder & decoder
1268 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1269 PyUnicode_AsUTF8(encoding), "codecs.open()");
1270 if (codec_info == NULL) {
1271 return -1;
1272 }
1273 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1274 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1275 Py_DECREF(codec_info);
1276 return -1;
1277 }
1278 Py_DECREF(codec_info);
1279
1280 Py_INCREF(encoding);
1281 Py_INCREF(errors);
1282 Py_SETREF(self->encoding, encoding);
1283 Py_SETREF(self->errors, errors);
1284
1285 return _textiowrapper_fix_encoder_state(self);
1286}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001287
1288/*[clinic input]
1289_io.TextIOWrapper.reconfigure
1290 *
INADA Naoki507434f2017-12-21 09:59:53 +09001291 encoding: object = None
1292 errors: object = None
1293 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001294 line_buffering as line_buffering_obj: object = None
1295 write_through as write_through_obj: object = None
1296
1297Reconfigure the text stream with new parameters.
1298
1299This also does an implicit stream flush.
1300
1301[clinic start generated code]*/
1302
1303static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001304_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1305 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001306 PyObject *line_buffering_obj,
1307 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001308/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001309{
1310 int line_buffering;
1311 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001312 const char *newline = NULL;
1313
1314 /* Check if something is in the read buffer */
1315 if (self->decoded_chars != NULL) {
1316 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001317 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001318 "of stream after the first read");
1319 return NULL;
1320 }
1321 }
1322
1323 if (newline_obj != NULL && newline_obj != Py_None) {
1324 newline = PyUnicode_AsUTF8(newline_obj);
1325 if (newline == NULL || validate_newline(newline) < 0) {
1326 return NULL;
1327 }
1328 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001329
1330 line_buffering = convert_optional_bool(line_buffering_obj,
1331 self->line_buffering);
1332 write_through = convert_optional_bool(write_through_obj,
1333 self->write_through);
1334 if (line_buffering < 0 || write_through < 0) {
1335 return NULL;
1336 }
INADA Naoki507434f2017-12-21 09:59:53 +09001337
1338 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001339 if (res == NULL) {
1340 return NULL;
1341 }
INADA Naoki507434f2017-12-21 09:59:53 +09001342 Py_DECREF(res);
1343 self->b2cratio = 0;
1344
1345 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1346 return NULL;
1347 }
1348
1349 if (textiowrapper_change_encoding(
1350 self, encoding, errors, newline_obj != NULL) < 0) {
1351 return NULL;
1352 }
1353
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001354 self->line_buffering = line_buffering;
1355 self->write_through = write_through;
1356 Py_RETURN_NONE;
1357}
1358
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001360textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 self->ok = 0;
1363 Py_CLEAR(self->buffer);
1364 Py_CLEAR(self->encoding);
1365 Py_CLEAR(self->encoder);
1366 Py_CLEAR(self->decoder);
1367 Py_CLEAR(self->readnl);
1368 Py_CLEAR(self->decoded_chars);
1369 Py_CLEAR(self->pending_bytes);
1370 Py_CLEAR(self->snapshot);
1371 Py_CLEAR(self->errors);
1372 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001373
1374 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375 return 0;
1376}
1377
1378static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001379textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001381 self->finalizing = 1;
1382 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001384 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001385 _PyObject_GC_UNTRACK(self);
1386 if (self->weakreflist != NULL)
1387 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001388 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 Py_TYPE(self)->tp_free((PyObject *)self);
1390}
1391
1392static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001393textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394{
1395 Py_VISIT(self->buffer);
1396 Py_VISIT(self->encoding);
1397 Py_VISIT(self->encoder);
1398 Py_VISIT(self->decoder);
1399 Py_VISIT(self->readnl);
1400 Py_VISIT(self->decoded_chars);
1401 Py_VISIT(self->pending_bytes);
1402 Py_VISIT(self->snapshot);
1403 Py_VISIT(self->errors);
1404 Py_VISIT(self->raw);
1405
1406 Py_VISIT(self->dict);
1407 return 0;
1408}
1409
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001411textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412
1413/* This macro takes some shortcuts to make the common case faster. */
1414#define CHECK_CLOSED(self) \
1415 do { \
1416 int r; \
1417 PyObject *_res; \
1418 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1419 if (self->raw != NULL) \
1420 r = _PyFileIO_closed(self->raw); \
1421 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001422 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001423 if (_res == NULL) \
1424 return NULL; \
1425 r = PyObject_IsTrue(_res); \
1426 Py_DECREF(_res); \
1427 if (r < 0) \
1428 return NULL; \
1429 } \
1430 if (r > 0) { \
1431 PyErr_SetString(PyExc_ValueError, \
1432 "I/O operation on closed file."); \
1433 return NULL; \
1434 } \
1435 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001436 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 return NULL; \
1438 } while (0)
1439
1440#define CHECK_INITIALIZED(self) \
1441 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001442 PyErr_SetString(PyExc_ValueError, \
1443 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 return NULL; \
1445 }
1446
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001447#define CHECK_ATTACHED(self) \
1448 CHECK_INITIALIZED(self); \
1449 if (self->detached) { \
1450 PyErr_SetString(PyExc_ValueError, \
1451 "underlying buffer has been detached"); \
1452 return NULL; \
1453 }
1454
1455#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001457 PyErr_SetString(PyExc_ValueError, \
1458 "I/O operation on uninitialized object"); \
1459 return -1; \
1460 } else if (self->detached) { \
1461 PyErr_SetString(PyExc_ValueError, \
1462 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 return -1; \
1464 }
1465
1466
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001467/*[clinic input]
1468_io.TextIOWrapper.detach
1469[clinic start generated code]*/
1470
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001471static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001472_io_TextIOWrapper_detach_impl(textio *self)
1473/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001474{
1475 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001476 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001477 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1478 if (res == NULL)
1479 return NULL;
1480 Py_DECREF(res);
1481 buffer = self->buffer;
1482 self->buffer = NULL;
1483 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001484 return buffer;
1485}
1486
Antoine Pitrou24f36292009-03-28 22:16:42 +00001487/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 underlying buffered object, though. */
1489static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001490_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001492 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493
1494 if (self->pending_bytes == NULL)
1495 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001496
1497 pending = self->pending_bytes;
1498 Py_INCREF(pending);
1499 self->pending_bytes_count = 0;
1500 Py_CLEAR(self->pending_bytes);
1501
1502 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1503 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001504 if (b == NULL)
1505 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001506 ret = NULL;
1507 do {
1508 ret = PyObject_CallMethodObjArgs(self->buffer,
1509 _PyIO_str_write, b, NULL);
1510 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 Py_DECREF(b);
1512 if (ret == NULL)
1513 return -1;
1514 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 return 0;
1516}
1517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001518/*[clinic input]
1519_io.TextIOWrapper.write
1520 text: unicode
1521 /
1522[clinic start generated code]*/
1523
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001525_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1526/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527{
1528 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 PyObject *b;
1530 Py_ssize_t textlen;
1531 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001532 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001534 if (PyUnicode_READY(text) == -1)
1535 return NULL;
1536
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001537 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 CHECK_CLOSED(self);
1539
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001540 if (self->encoder == NULL)
1541 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001542
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543 Py_INCREF(text);
1544
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001545 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546
1547 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001548 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 haslf = 1;
1550
1551 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001552 PyObject *newtext = _PyObject_CallMethodId(
1553 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 Py_DECREF(text);
1555 if (newtext == NULL)
1556 return NULL;
1557 text = newtext;
1558 }
1559
Antoine Pitroue96ec682011-07-23 21:46:35 +02001560 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001561 text_needflush = 1;
1562 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001563 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 needflush = 1;
1566
1567 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001568 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001570 self->encoding_start_of_stream = 0;
1571 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572 else
1573 b = PyObject_CallMethodObjArgs(self->encoder,
1574 _PyIO_str_encode, text, NULL);
1575 Py_DECREF(text);
1576 if (b == NULL)
1577 return NULL;
Oren Milmana5b4ea12017-08-25 21:14:54 +03001578 if (!PyBytes_Check(b)) {
1579 PyErr_Format(PyExc_TypeError,
1580 "encoder should return a bytes object, not '%.200s'",
1581 Py_TYPE(b)->tp_name);
1582 Py_DECREF(b);
1583 return NULL;
1584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001585
1586 if (self->pending_bytes == NULL) {
1587 self->pending_bytes = PyList_New(0);
1588 if (self->pending_bytes == NULL) {
1589 Py_DECREF(b);
1590 return NULL;
1591 }
1592 self->pending_bytes_count = 0;
1593 }
1594 if (PyList_Append(self->pending_bytes, b) < 0) {
1595 Py_DECREF(b);
1596 return NULL;
1597 }
1598 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1599 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001600 if (self->pending_bytes_count > self->chunk_size || needflush ||
1601 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001602 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 return NULL;
1604 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001605
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 if (needflush) {
1607 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1608 if (ret == NULL)
1609 return NULL;
1610 Py_DECREF(ret);
1611 }
1612
Zackery Spytz23db9352018-06-29 04:14:58 -06001613 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 Py_CLEAR(self->snapshot);
1615
1616 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001617 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618 if (ret == NULL)
1619 return NULL;
1620 Py_DECREF(ret);
1621 }
1622
1623 return PyLong_FromSsize_t(textlen);
1624}
1625
1626/* Steal a reference to chars and store it in the decoded_char buffer;
1627 */
1628static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001629textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001631 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 self->decoded_chars_used = 0;
1633}
1634
1635static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001636textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637{
1638 PyObject *chars;
1639 Py_ssize_t avail;
1640
1641 if (self->decoded_chars == NULL)
1642 return PyUnicode_FromStringAndSize(NULL, 0);
1643
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001644 /* decoded_chars is guaranteed to be "ready". */
1645 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 - self->decoded_chars_used);
1647
1648 assert(avail >= 0);
1649
1650 if (n < 0 || n > avail)
1651 n = avail;
1652
1653 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001654 chars = PyUnicode_Substring(self->decoded_chars,
1655 self->decoded_chars_used,
1656 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 if (chars == NULL)
1658 return NULL;
1659 }
1660 else {
1661 chars = self->decoded_chars;
1662 Py_INCREF(chars);
1663 }
1664
1665 self->decoded_chars_used += n;
1666 return chars;
1667}
1668
1669/* Read and decode the next chunk of data from the BufferedReader.
1670 */
1671static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001672textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673{
1674 PyObject *dec_buffer = NULL;
1675 PyObject *dec_flags = NULL;
1676 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001677 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001679 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 int eof;
1681
1682 /* The return value is True unless EOF was reached. The decoded string is
1683 * placed in self._decoded_chars (replacing its previous value). The
1684 * entire input chunk is sent to the decoder, though some of it may remain
1685 * buffered in the decoder, yet to be converted.
1686 */
1687
1688 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001689 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 return -1;
1691 }
1692
1693 if (self->telling) {
1694 /* To prepare for tell(), we need to snapshot a point in the file
1695 * where the decoder's input buffer is empty.
1696 */
1697
1698 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1699 _PyIO_str_getstate, NULL);
1700 if (state == NULL)
1701 return -1;
1702 /* Given this, we know there was a valid snapshot point
1703 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1704 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001705 if (!PyTuple_Check(state)) {
1706 PyErr_SetString(PyExc_TypeError,
1707 "illegal decoder state");
1708 Py_DECREF(state);
1709 return -1;
1710 }
1711 if (!PyArg_ParseTuple(state,
1712 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1713 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 Py_DECREF(state);
1715 return -1;
1716 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001717
1718 if (!PyBytes_Check(dec_buffer)) {
1719 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001720 "illegal decoder state: the first item should be a "
1721 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001722 Py_TYPE(dec_buffer)->tp_name);
1723 Py_DECREF(state);
1724 return -1;
1725 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 Py_INCREF(dec_buffer);
1727 Py_INCREF(dec_flags);
1728 Py_DECREF(state);
1729 }
1730
1731 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001732 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001733 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001734 }
1735 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 if (chunk_size == NULL)
1737 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001738
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001740 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1741 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 Py_DECREF(chunk_size);
1743 if (input_chunk == NULL)
1744 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001745
1746 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001747 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001748 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001749 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1750 Py_TYPE(input_chunk)->tp_name);
1751 goto fail;
1752 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753
Antoine Pitroub8503892014-04-29 10:14:02 +02001754 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001755 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756
INADA Naoki507434f2017-12-21 09:59:53 +09001757 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1758 PyBuffer_Release(&input_chunk_buf);
1759 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001760 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001761
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001762 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001764 if (nchars > 0)
1765 self->b2cratio = (double) nbytes / nchars;
1766 else
1767 self->b2cratio = 0.0;
1768 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 eof = 0;
1770
1771 if (self->telling) {
1772 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1773 * next input to be decoded is dec_buffer + input_chunk.
1774 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001775 PyObject *next_input = dec_buffer;
1776 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001777 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001778 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001779 goto fail;
1780 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001781 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1782 if (snapshot == NULL) {
1783 dec_flags = NULL;
1784 goto fail;
1785 }
1786 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 }
1788 Py_DECREF(input_chunk);
1789
1790 return (eof == 0);
1791
1792 fail:
1793 Py_XDECREF(dec_buffer);
1794 Py_XDECREF(dec_flags);
1795 Py_XDECREF(input_chunk);
1796 return -1;
1797}
1798
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001799/*[clinic input]
1800_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001801 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001802 /
1803[clinic start generated code]*/
1804
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001806_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001807/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 PyObject *result = NULL, *chunks = NULL;
1810
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001811 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 CHECK_CLOSED(self);
1813
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001814 if (self->decoder == NULL)
1815 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001816
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001817 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 return NULL;
1819
1820 if (n < 0) {
1821 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001822 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 PyObject *decoded;
1824 if (bytes == NULL)
1825 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001826
1827 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1828 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1829 bytes, 1);
1830 else
1831 decoded = PyObject_CallMethodObjArgs(
1832 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001834 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 goto fail;
1836
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001837 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838
1839 if (result == NULL) {
1840 Py_DECREF(decoded);
1841 return NULL;
1842 }
1843
1844 PyUnicode_AppendAndDel(&result, decoded);
1845 if (result == NULL)
1846 goto fail;
1847
Zackery Spytz23db9352018-06-29 04:14:58 -06001848 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 Py_CLEAR(self->snapshot);
1850 return result;
1851 }
1852 else {
1853 int res = 1;
1854 Py_ssize_t remaining = n;
1855
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001856 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001857 if (result == NULL)
1858 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001859 if (PyUnicode_READY(result) == -1)
1860 goto fail;
1861 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001862
1863 /* Keep reading chunks until we have n characters to return */
1864 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001865 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001866 if (res < 0) {
1867 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1868 when EINTR occurs so we needn't do it ourselves. */
1869 if (_PyIO_trap_eintr()) {
1870 continue;
1871 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001873 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874 if (res == 0) /* EOF */
1875 break;
1876 if (chunks == NULL) {
1877 chunks = PyList_New(0);
1878 if (chunks == NULL)
1879 goto fail;
1880 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001881 if (PyUnicode_GET_LENGTH(result) > 0 &&
1882 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 goto fail;
1884 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001885 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886 if (result == NULL)
1887 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001888 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001889 }
1890 if (chunks != NULL) {
1891 if (result != NULL && PyList_Append(chunks, result) < 0)
1892 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001893 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 if (result == NULL)
1895 goto fail;
1896 Py_CLEAR(chunks);
1897 }
1898 return result;
1899 }
1900 fail:
1901 Py_XDECREF(result);
1902 Py_XDECREF(chunks);
1903 return NULL;
1904}
1905
1906
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001907/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 that is to the NUL character. Otherwise the function will produce
1909 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001910static const char *
1911find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001913 if (kind == PyUnicode_1BYTE_KIND) {
1914 assert(ch < 256);
1915 return (char *) memchr((void *) s, (char) ch, end - s);
1916 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001918 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001919 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001920 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921 return s;
1922 if (s == end)
1923 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001924 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 }
1926}
1927
1928Py_ssize_t
1929_PyIO_find_line_ending(
1930 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001931 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001933 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934
1935 if (translated) {
1936 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001937 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001939 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940 else {
1941 *consumed = len;
1942 return -1;
1943 }
1944 }
1945 else if (universal) {
1946 /* Universal newline search. Find any of \r, \r\n, \n
1947 * The decoder ensures that \r\n are not split in two pieces
1948 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001949 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001951 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001953 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001954 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001955 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 if (s >= end) {
1957 *consumed = len;
1958 return -1;
1959 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001960 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001961 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001963 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001965 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001966 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001968 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 }
1970 }
1971 }
1972 else {
1973 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001974 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001975 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001976 /* Assume that readnl is an ASCII character. */
1977 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001979 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001981 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 *consumed = len;
1983 return -1;
1984 }
1985 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001986 const char *s = start;
1987 const char *e = end - (readnl_len - 1)*kind;
1988 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (e < s)
1990 e = s;
1991 while (s < e) {
1992 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001993 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 if (pos == NULL || pos >= e)
1995 break;
1996 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001997 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 break;
1999 }
2000 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002001 return (pos - start)/kind + readnl_len;
2002 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002004 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 if (pos == NULL)
2006 *consumed = len;
2007 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002008 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 return -1;
2010 }
2011 }
2012}
2013
2014static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002015_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016{
2017 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2018 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2019 int res;
2020
2021 CHECK_CLOSED(self);
2022
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002023 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 return NULL;
2025
2026 chunked = 0;
2027
2028 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002029 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002031 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 Py_ssize_t consumed = 0;
2033
2034 /* First, get some data if necessary */
2035 res = 1;
2036 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002037 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002038 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002039 if (res < 0) {
2040 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2041 when EINTR occurs so we needn't do it ourselves. */
2042 if (_PyIO_trap_eintr()) {
2043 continue;
2044 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002046 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 if (res == 0)
2048 break;
2049 }
2050 if (res == 0) {
2051 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002052 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 Py_CLEAR(self->snapshot);
2054 start = endpos = offset_to_buffer = 0;
2055 break;
2056 }
2057
2058 if (remaining == NULL) {
2059 line = self->decoded_chars;
2060 start = self->decoded_chars_used;
2061 offset_to_buffer = 0;
2062 Py_INCREF(line);
2063 }
2064 else {
2065 assert(self->decoded_chars_used == 0);
2066 line = PyUnicode_Concat(remaining, self->decoded_chars);
2067 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002068 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 Py_CLEAR(remaining);
2070 if (line == NULL)
2071 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002072 if (PyUnicode_READY(line) == -1)
2073 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 }
2075
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002076 ptr = PyUnicode_DATA(line);
2077 line_len = PyUnicode_GET_LENGTH(line);
2078 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079
2080 endpos = _PyIO_find_line_ending(
2081 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002082 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002083 ptr + kind * start,
2084 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002085 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (endpos >= 0) {
2087 endpos += start;
2088 if (limit >= 0 && (endpos - start) + chunked >= limit)
2089 endpos = start + limit - chunked;
2090 break;
2091 }
2092
2093 /* We can put aside up to `endpos` */
2094 endpos = consumed + start;
2095 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2096 /* Didn't find line ending, but reached length limit */
2097 endpos = start + limit - chunked;
2098 break;
2099 }
2100
2101 if (endpos > start) {
2102 /* No line ending seen yet - put aside current data */
2103 PyObject *s;
2104 if (chunks == NULL) {
2105 chunks = PyList_New(0);
2106 if (chunks == NULL)
2107 goto error;
2108 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002109 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 if (s == NULL)
2111 goto error;
2112 if (PyList_Append(chunks, s) < 0) {
2113 Py_DECREF(s);
2114 goto error;
2115 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002116 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 Py_DECREF(s);
2118 }
2119 /* There may be some remaining bytes we'll have to prepend to the
2120 next chunk of data */
2121 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002122 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 if (remaining == NULL)
2124 goto error;
2125 }
2126 Py_CLEAR(line);
2127 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002128 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 }
2130
2131 if (line != NULL) {
2132 /* Our line ends in the current buffer */
2133 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002134 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2135 PyObject *s = PyUnicode_Substring(line, start, endpos);
2136 Py_CLEAR(line);
2137 if (s == NULL)
2138 goto error;
2139 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 }
2141 }
2142 if (remaining != NULL) {
2143 if (chunks == NULL) {
2144 chunks = PyList_New(0);
2145 if (chunks == NULL)
2146 goto error;
2147 }
2148 if (PyList_Append(chunks, remaining) < 0)
2149 goto error;
2150 Py_CLEAR(remaining);
2151 }
2152 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002153 if (line != NULL) {
2154 if (PyList_Append(chunks, line) < 0)
2155 goto error;
2156 Py_DECREF(line);
2157 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2159 if (line == NULL)
2160 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002161 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002163 if (line == NULL) {
2164 Py_INCREF(_PyIO_empty_str);
2165 line = _PyIO_empty_str;
2166 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167
2168 return line;
2169
2170 error:
2171 Py_XDECREF(chunks);
2172 Py_XDECREF(remaining);
2173 Py_XDECREF(line);
2174 return NULL;
2175}
2176
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002177/*[clinic input]
2178_io.TextIOWrapper.readline
2179 size: Py_ssize_t = -1
2180 /
2181[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002183static PyObject *
2184_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2185/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2186{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002187 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002188 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189}
2190
2191/* Seek and Tell */
2192
2193typedef struct {
2194 Py_off_t start_pos;
2195 int dec_flags;
2196 int bytes_to_feed;
2197 int chars_to_skip;
2198 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002199} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200
2201/*
2202 To speed up cookie packing/unpacking, we store the fields in a temporary
2203 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2204 The following macros define at which offsets in the intermediary byte
2205 string the various CookieStruct fields will be stored.
2206 */
2207
2208#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2209
Christian Heimes743e0cd2012-10-17 23:52:17 +02002210#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002211/* We want the least significant byte of start_pos to also be the least
2212 significant byte of the cookie, which means that in big-endian mode we
2213 must copy the fields in reverse order. */
2214
2215# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2216# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2217# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2218# define OFF_CHARS_TO_SKIP (sizeof(char))
2219# define OFF_NEED_EOF 0
2220
2221#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222/* Little-endian mode: the least significant byte of start_pos will
2223 naturally end up the least significant byte of the cookie. */
2224
2225# define OFF_START_POS 0
2226# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2227# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2228# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2229# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2230
2231#endif
2232
2233static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002234textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235{
2236 unsigned char buffer[COOKIE_BUF_LEN];
2237 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2238 if (cookieLong == NULL)
2239 return -1;
2240
2241 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002242 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 Py_DECREF(cookieLong);
2244 return -1;
2245 }
2246 Py_DECREF(cookieLong);
2247
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002248 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2249 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2250 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2251 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2252 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253
2254 return 0;
2255}
2256
2257static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002258textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259{
2260 unsigned char buffer[COOKIE_BUF_LEN];
2261
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002262 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2263 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2264 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2265 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2266 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267
Christian Heimes743e0cd2012-10-17 23:52:17 +02002268 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2269 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271
2272static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002273_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274{
2275 PyObject *res;
2276 /* When seeking to the start of the stream, we call decoder.reset()
2277 rather than decoder.getstate().
2278 This is for a few decoders such as utf-16 for which the state value
2279 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2280 utf-16, that we are expecting a BOM).
2281 */
2282 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2283 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2284 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002285 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2286 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287 if (res == NULL)
2288 return -1;
2289 Py_DECREF(res);
2290 return 0;
2291}
2292
Antoine Pitroue4501852009-05-14 18:55:55 +00002293static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002294_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002295{
2296 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002297 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002298 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2299 self->encoding_start_of_stream = 1;
2300 }
2301 else {
2302 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002303 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002304 self->encoding_start_of_stream = 0;
2305 }
2306 if (res == NULL)
2307 return -1;
2308 Py_DECREF(res);
2309 return 0;
2310}
2311
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002312static int
2313_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2314{
2315 /* Same as _textiowrapper_decoder_setstate() above. */
2316 return _textiowrapper_encoder_reset(
2317 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2318}
2319
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002320/*[clinic input]
2321_io.TextIOWrapper.seek
2322 cookie as cookieObj: object
2323 whence: int = 0
2324 /
2325[clinic start generated code]*/
2326
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002327static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002328_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2329/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002331 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002332 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 PyObject *res;
2334 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002335 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002337 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338 CHECK_CLOSED(self);
2339
2340 Py_INCREF(cookieObj);
2341
2342 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002343 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344 goto fail;
2345 }
2346
ngie-eign848037c2019-03-02 23:28:26 -08002347 switch (whence) {
2348 case SEEK_CUR:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002350 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 if (cmp < 0)
2352 goto fail;
2353
2354 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002355 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356 goto fail;
2357 }
2358
2359 /* Seeking to the current position should attempt to
2360 * sync the underlying buffer with the current position.
2361 */
2362 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002363 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 if (cookieObj == NULL)
2365 goto fail;
Inada Naoki8c17d922019-03-04 01:22:39 +09002366 break;
2367
ngie-eign848037c2019-03-02 23:28:26 -08002368 case SEEK_END:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002370 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 if (cmp < 0)
2372 goto fail;
2373
2374 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002375 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 goto fail;
2377 }
2378
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002379 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002380 if (res == NULL)
2381 goto fail;
2382 Py_DECREF(res);
2383
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002384 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385 Py_CLEAR(self->snapshot);
2386 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002387 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388 if (res == NULL)
2389 goto fail;
2390 Py_DECREF(res);
2391 }
2392
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002393 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002394 Py_CLEAR(cookieObj);
2395 if (res == NULL)
2396 goto fail;
2397 if (self->encoder) {
2398 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002399 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002400 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2401 Py_DECREF(res);
2402 goto fail;
2403 }
2404 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002405 return res;
Inada Naoki8c17d922019-03-04 01:22:39 +09002406
ngie-eign848037c2019-03-02 23:28:26 -08002407 case SEEK_SET:
2408 break;
Inada Naoki8c17d922019-03-04 01:22:39 +09002409
ngie-eign848037c2019-03-02 23:28:26 -08002410 default:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002411 PyErr_Format(PyExc_ValueError,
ngie-eign848037c2019-03-02 23:28:26 -08002412 "invalid whence (%d, should be %d, %d or %d)", whence,
2413 SEEK_SET, SEEK_CUR, SEEK_END);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414 goto fail;
2415 }
2416
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002417 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 if (cmp < 0)
2419 goto fail;
2420
2421 if (cmp == 1) {
2422 PyErr_Format(PyExc_ValueError,
2423 "negative seek position %R", cookieObj);
2424 goto fail;
2425 }
2426
2427 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2428 if (res == NULL)
2429 goto fail;
2430 Py_DECREF(res);
2431
2432 /* The strategy of seek() is to go back to the safe start point
2433 * and replay the effect of read(chars_to_skip) from there.
2434 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002435 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 goto fail;
2437
2438 /* Seek back to the safe start point. */
2439 posobj = PyLong_FromOff_t(cookie.start_pos);
2440 if (posobj == NULL)
2441 goto fail;
2442 res = PyObject_CallMethodObjArgs(self->buffer,
2443 _PyIO_str_seek, posobj, NULL);
2444 Py_DECREF(posobj);
2445 if (res == NULL)
2446 goto fail;
2447 Py_DECREF(res);
2448
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002449 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450 Py_CLEAR(self->snapshot);
2451
2452 /* Restore the decoder to its state from the safe start point. */
2453 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002454 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455 goto fail;
2456 }
2457
2458 if (cookie.chars_to_skip) {
2459 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002460 PyObject *input_chunk = _PyObject_CallMethodId(
2461 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462 PyObject *decoded;
2463
2464 if (input_chunk == NULL)
2465 goto fail;
2466
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002467 if (!PyBytes_Check(input_chunk)) {
2468 PyErr_Format(PyExc_TypeError,
2469 "underlying read() should have returned a bytes "
2470 "object, not '%.200s'",
2471 Py_TYPE(input_chunk)->tp_name);
2472 Py_DECREF(input_chunk);
2473 goto fail;
2474 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002476 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2477 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478 goto fail;
2479 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002480 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002482 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2483 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002485 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 goto fail;
2487
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002488 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489
2490 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002491 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002492 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493 goto fail;
2494 }
2495 self->decoded_chars_used = cookie.chars_to_skip;
2496 }
2497 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002498 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2499 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002501 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 }
2503
Antoine Pitroue4501852009-05-14 18:55:55 +00002504 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2505 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002506 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002507 goto fail;
2508 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509 return cookieObj;
2510 fail:
2511 Py_XDECREF(cookieObj);
2512 return NULL;
2513
2514}
2515
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002516/*[clinic input]
2517_io.TextIOWrapper.tell
2518[clinic start generated code]*/
2519
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002521_io_TextIOWrapper_tell_impl(textio *self)
2522/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523{
2524 PyObject *res;
2525 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002526 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527 PyObject *next_input;
2528 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002529 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 PyObject *saved_state = NULL;
2531 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002532 Py_ssize_t dec_buffer_len;
2533 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002535 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 CHECK_CLOSED(self);
2537
2538 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002539 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540 goto fail;
2541 }
2542 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002543 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544 "telling position disabled by next() call");
2545 goto fail;
2546 }
2547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002548 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002550 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 if (res == NULL)
2552 goto fail;
2553 Py_DECREF(res);
2554
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002555 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556 if (posobj == NULL)
2557 goto fail;
2558
2559 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002560 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 return posobj;
2562 }
2563
2564#if defined(HAVE_LARGEFILE_SUPPORT)
2565 cookie.start_pos = PyLong_AsLongLong(posobj);
2566#else
2567 cookie.start_pos = PyLong_AsLong(posobj);
2568#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002569 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570 if (PyErr_Occurred())
2571 goto fail;
2572
2573 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002574 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002575 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576 goto fail;
2577
2578 assert (PyBytes_Check(next_input));
2579
2580 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2581
2582 /* How many decoded characters have been used up since the snapshot? */
2583 if (self->decoded_chars_used == 0) {
2584 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586 }
2587
2588 chars_to_skip = self->decoded_chars_used;
2589
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002590 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2592 _PyIO_str_getstate, NULL);
2593 if (saved_state == NULL)
2594 goto fail;
2595
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002596#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002597 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002598 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2599 _PyIO_str_getstate, NULL); \
2600 if (_state == NULL) \
2601 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002602 if (!PyTuple_Check(_state)) { \
2603 PyErr_SetString(PyExc_TypeError, \
2604 "illegal decoder state"); \
2605 Py_DECREF(_state); \
2606 goto fail; \
2607 } \
2608 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2609 &dec_buffer, &dec_flags)) \
2610 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002611 Py_DECREF(_state); \
2612 goto fail; \
2613 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002614 if (!PyBytes_Check(dec_buffer)) { \
2615 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002616 "illegal decoder state: the first item should be a " \
2617 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002618 Py_TYPE(dec_buffer)->tp_name); \
2619 Py_DECREF(_state); \
2620 goto fail; \
2621 } \
2622 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002623 Py_DECREF(_state); \
2624 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002626#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002627 PyObject *_decoded = _PyObject_CallMethodId( \
2628 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002629 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002630 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002631 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002632 Py_DECREF(_decoded); \
2633 } while (0)
2634
2635 /* Fast search for an acceptable start point, close to our
2636 current pos */
2637 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2638 skip_back = 1;
2639 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2640 input = PyBytes_AS_STRING(next_input);
2641 while (skip_bytes > 0) {
2642 /* Decode up to temptative start point */
2643 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2644 goto fail;
2645 DECODER_DECODE(input, skip_bytes, chars_decoded);
2646 if (chars_decoded <= chars_to_skip) {
2647 DECODER_GETSTATE();
2648 if (dec_buffer_len == 0) {
2649 /* Before pos and no bytes buffered in decoder => OK */
2650 cookie.dec_flags = dec_flags;
2651 chars_to_skip -= chars_decoded;
2652 break;
2653 }
2654 /* Skip back by buffered amount and reset heuristic */
2655 skip_bytes -= dec_buffer_len;
2656 skip_back = 1;
2657 }
2658 else {
2659 /* We're too far ahead, skip back a bit */
2660 skip_bytes -= skip_back;
2661 skip_back *= 2;
2662 }
2663 }
2664 if (skip_bytes <= 0) {
2665 skip_bytes = 0;
2666 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2667 goto fail;
2668 }
2669
2670 /* Note our initial start point. */
2671 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002672 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002673 if (chars_to_skip == 0)
2674 goto finally;
2675
2676 /* We should be close to the desired position. Now feed the decoder one
2677 * byte at a time until we reach the `chars_to_skip` target.
2678 * As we go, note the nearest "safe start point" before the current
2679 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 * can safely start from there and advance to this location).
2681 */
2682 chars_decoded = 0;
2683 input = PyBytes_AS_STRING(next_input);
2684 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002685 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002687 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002689 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002690 /* We got n chars for 1 byte */
2691 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002692 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002693 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694
2695 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2696 /* Decoder buffer is empty, so this is a safe start point. */
2697 cookie.start_pos += cookie.bytes_to_feed;
2698 chars_to_skip -= chars_decoded;
2699 cookie.dec_flags = dec_flags;
2700 cookie.bytes_to_feed = 0;
2701 chars_decoded = 0;
2702 }
2703 if (chars_decoded >= chars_to_skip)
2704 break;
2705 input++;
2706 }
2707 if (input == input_end) {
2708 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002709 PyObject *decoded = _PyObject_CallMethodId(
2710 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002711 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002712 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002713 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714 Py_DECREF(decoded);
2715 cookie.need_eof = 1;
2716
2717 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002718 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 "can't reconstruct logical file position");
2720 goto fail;
2721 }
2722 }
2723
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002724finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002725 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002726 Py_DECREF(saved_state);
2727 if (res == NULL)
2728 return NULL;
2729 Py_DECREF(res);
2730
2731 /* The returned cookie corresponds to the last safe start point. */
2732 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002733 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002735fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 if (saved_state) {
2737 PyObject *type, *value, *traceback;
2738 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002739 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002740 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002741 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002742 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743 }
2744 return NULL;
2745}
2746
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002747/*[clinic input]
2748_io.TextIOWrapper.truncate
2749 pos: object = None
2750 /
2751[clinic start generated code]*/
2752
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002754_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2755/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002757 PyObject *res;
2758
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002759 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760
2761 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2762 if (res == NULL)
2763 return NULL;
2764 Py_DECREF(res);
2765
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002766 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002767}
2768
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002770textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002771{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002772 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002773 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002774
2775 CHECK_INITIALIZED(self);
2776
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002777 res = PyUnicode_FromString("<_io.TextIOWrapper");
2778 if (res == NULL)
2779 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002780
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002781 status = Py_ReprEnter((PyObject *)self);
2782 if (status != 0) {
2783 if (status > 0) {
2784 PyErr_Format(PyExc_RuntimeError,
2785 "reentrant call inside %s.__repr__",
2786 Py_TYPE(self)->tp_name);
2787 }
2788 goto error;
2789 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002790 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002791 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002792 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002793 PyErr_Clear();
2794 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002795 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002796 }
2797 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002798 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002799 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002800 if (s == NULL)
2801 goto error;
2802 PyUnicode_AppendAndDel(&res, s);
2803 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002804 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002805 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002806 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002807 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002808 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002809 PyErr_Clear();
2810 else
2811 goto error;
2812 }
2813 else {
2814 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2815 Py_DECREF(modeobj);
2816 if (s == NULL)
2817 goto error;
2818 PyUnicode_AppendAndDel(&res, s);
2819 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002820 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002821 }
2822 s = PyUnicode_FromFormat("%U encoding=%R>",
2823 res, self->encoding);
2824 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002825 if (status == 0) {
2826 Py_ReprLeave((PyObject *)self);
2827 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002828 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002829
2830 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002831 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002832 if (status == 0) {
2833 Py_ReprLeave((PyObject *)self);
2834 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002835 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002836}
2837
2838
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002839/* Inquiries */
2840
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002841/*[clinic input]
2842_io.TextIOWrapper.fileno
2843[clinic start generated code]*/
2844
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002845static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002846_io_TextIOWrapper_fileno_impl(textio *self)
2847/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002848{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002849 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002850 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002851}
2852
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002853/*[clinic input]
2854_io.TextIOWrapper.seekable
2855[clinic start generated code]*/
2856
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002857static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002858_io_TextIOWrapper_seekable_impl(textio *self)
2859/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002860{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002861 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002862 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002863}
2864
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002865/*[clinic input]
2866_io.TextIOWrapper.readable
2867[clinic start generated code]*/
2868
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002870_io_TextIOWrapper_readable_impl(textio *self)
2871/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002872{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002873 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002874 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002875}
2876
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002877/*[clinic input]
2878_io.TextIOWrapper.writable
2879[clinic start generated code]*/
2880
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002881static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002882_io_TextIOWrapper_writable_impl(textio *self)
2883/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002884{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002885 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002886 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002887}
2888
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002889/*[clinic input]
2890_io.TextIOWrapper.isatty
2891[clinic start generated code]*/
2892
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002893static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002894_io_TextIOWrapper_isatty_impl(textio *self)
2895/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002896{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002897 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002898 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002899}
2900
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002901/*[clinic input]
2902_io.TextIOWrapper.flush
2903[clinic start generated code]*/
2904
Antoine Pitrou243757e2010-11-05 21:15:39 +00002905static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002906_io_TextIOWrapper_flush_impl(textio *self)
2907/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002908{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002909 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002910 CHECK_CLOSED(self);
2911 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002912 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002913 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002914 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002915}
2916
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002917/*[clinic input]
2918_io.TextIOWrapper.close
2919[clinic start generated code]*/
2920
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002921static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002922_io_TextIOWrapper_close_impl(textio *self)
2923/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002924{
2925 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002926 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002927 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002928
Antoine Pitrou6be88762010-05-03 16:48:20 +00002929 res = textiowrapper_closed_get(self, NULL);
2930 if (res == NULL)
2931 return NULL;
2932 r = PyObject_IsTrue(res);
2933 Py_DECREF(res);
2934 if (r < 0)
2935 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002936
Antoine Pitrou6be88762010-05-03 16:48:20 +00002937 if (r > 0) {
2938 Py_RETURN_NONE; /* stream already closed */
2939 }
2940 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002941 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002942 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01002943 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2944 &PyId__dealloc_warn,
2945 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00002946 if (res)
2947 Py_DECREF(res);
2948 else
2949 PyErr_Clear();
2950 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002951 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002952 if (res == NULL)
2953 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002954 else
2955 Py_DECREF(res);
2956
Benjamin Peterson68623612012-12-20 11:53:11 -06002957 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2958 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002959 _PyErr_ChainExceptions(exc, val, tb);
2960 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002961 }
2962 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002963 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002964}
2965
2966static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002967textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002968{
2969 PyObject *line;
2970
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002971 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002972
2973 self->telling = 0;
2974 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2975 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002976 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002977 }
2978 else {
2979 line = PyObject_CallMethodObjArgs((PyObject *)self,
2980 _PyIO_str_readline, NULL);
2981 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002982 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002983 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002984 "not '%.200s'", Py_TYPE(line)->tp_name);
2985 Py_DECREF(line);
2986 return NULL;
2987 }
2988 }
2989
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002990 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002991 return NULL;
2992
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002993 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994 /* Reached EOF or would have blocked */
2995 Py_DECREF(line);
2996 Py_CLEAR(self->snapshot);
2997 self->telling = self->seekable;
2998 return NULL;
2999 }
3000
3001 return line;
3002}
3003
3004static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003005textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003006{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003007 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003008 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009}
3010
3011static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003012textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003013{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003014 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003015 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3016}
3017
3018static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003019textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003020{
3021 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003022 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003023 if (self->decoder == NULL ||
3024 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3025 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003026 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003027 }
3028 return res;
3029}
3030
3031static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003032textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003033{
3034 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003035 Py_INCREF(self->errors);
3036 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003037}
3038
3039static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003040textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003041{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003042 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003043 return PyLong_FromSsize_t(self->chunk_size);
3044}
3045
3046static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003047textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003048{
3049 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003050 CHECK_ATTACHED_INT(self);
Zackery Spytz842acaa2018-12-17 07:52:45 -07003051 if (arg == NULL) {
3052 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3053 return -1;
3054 }
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003055 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003056 if (n == -1 && PyErr_Occurred())
3057 return -1;
3058 if (n <= 0) {
3059 PyErr_SetString(PyExc_ValueError,
3060 "a strictly positive integer is required");
3061 return -1;
3062 }
3063 self->chunk_size = n;
3064 return 0;
3065}
3066
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003067#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003068
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003069static PyMethodDef incrementalnewlinedecoder_methods[] = {
3070 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3071 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3072 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3073 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3074 {NULL}
3075};
3076
3077static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3078 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3079 {NULL}
3080};
3081
3082PyTypeObject PyIncrementalNewlineDecoder_Type = {
3083 PyVarObject_HEAD_INIT(NULL, 0)
3084 "_io.IncrementalNewlineDecoder", /*tp_name*/
3085 sizeof(nldecoder_object), /*tp_basicsize*/
3086 0, /*tp_itemsize*/
3087 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3088 0, /*tp_print*/
3089 0, /*tp_getattr*/
3090 0, /*tp_setattr*/
3091 0, /*tp_compare */
3092 0, /*tp_repr*/
3093 0, /*tp_as_number*/
3094 0, /*tp_as_sequence*/
3095 0, /*tp_as_mapping*/
3096 0, /*tp_hash */
3097 0, /*tp_call*/
3098 0, /*tp_str*/
3099 0, /*tp_getattro*/
3100 0, /*tp_setattro*/
3101 0, /*tp_as_buffer*/
3102 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3103 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3104 0, /* tp_traverse */
3105 0, /* tp_clear */
3106 0, /* tp_richcompare */
3107 0, /*tp_weaklistoffset*/
3108 0, /* tp_iter */
3109 0, /* tp_iternext */
3110 incrementalnewlinedecoder_methods, /* tp_methods */
3111 0, /* tp_members */
3112 incrementalnewlinedecoder_getset, /* tp_getset */
3113 0, /* tp_base */
3114 0, /* tp_dict */
3115 0, /* tp_descr_get */
3116 0, /* tp_descr_set */
3117 0, /* tp_dictoffset */
3118 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3119 0, /* tp_alloc */
3120 PyType_GenericNew, /* tp_new */
3121};
3122
3123
3124static PyMethodDef textiowrapper_methods[] = {
3125 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003126 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003127 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3128 _IO_TEXTIOWRAPPER_READ_METHODDEF
3129 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3130 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3131 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3132
3133 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3134 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3135 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3136 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3137 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003138
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003139 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3140 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3141 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003142 {NULL, NULL}
3143};
3144
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003145static PyMemberDef textiowrapper_members[] = {
3146 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3147 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3148 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003149 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003150 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003151 {NULL}
3152};
3153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003154static PyGetSetDef textiowrapper_getset[] = {
3155 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3156 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003157/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3158*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003159 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3160 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3161 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3162 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003164};
3165
3166PyTypeObject PyTextIOWrapper_Type = {
3167 PyVarObject_HEAD_INIT(NULL, 0)
3168 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003169 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003170 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003171 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003172 0, /*tp_print*/
3173 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003174 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003175 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003176 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003177 0, /*tp_as_number*/
3178 0, /*tp_as_sequence*/
3179 0, /*tp_as_mapping*/
3180 0, /*tp_hash */
3181 0, /*tp_call*/
3182 0, /*tp_str*/
3183 0, /*tp_getattro*/
3184 0, /*tp_setattro*/
3185 0, /*tp_as_buffer*/
3186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003187 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003188 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003189 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3190 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003191 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003192 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003193 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003194 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3195 textiowrapper_methods, /* tp_methods */
3196 textiowrapper_members, /* tp_members */
3197 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003198 0, /* tp_base */
3199 0, /* tp_dict */
3200 0, /* tp_descr_get */
3201 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003202 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003203 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003204 0, /* tp_alloc */
3205 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003206 0, /* tp_free */
3207 0, /* tp_is_gc */
3208 0, /* tp_bases */
3209 0, /* tp_mro */
3210 0, /* tp_cache */
3211 0, /* tp_subclasses */
3212 0, /* tp_weaklist */
3213 0, /* tp_del */
3214 0, /* tp_version_tag */
3215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003216};