blob: 645d7123324c04998cf10c3496cbaef8709ee894 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
Victor Stinnerbcda8f12018-11-21 22:27:47 +010011#include "pycore_object.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000012#include "structmember.h"
13#include "_iomodule.h"
14
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030015/*[clinic input]
16module _io
17class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
18class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
19[clinic start generated code]*/
20/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
21
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020022_Py_IDENTIFIER(close);
23_Py_IDENTIFIER(_dealloc_warn);
24_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(fileno);
26_Py_IDENTIFIER(flush);
27_Py_IDENTIFIER(getpreferredencoding);
28_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020029_Py_IDENTIFIER(mode);
30_Py_IDENTIFIER(name);
31_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020032_Py_IDENTIFIER(read);
33_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053070textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530152 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_print*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_compare */
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
187 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000188 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 0, /* tp_traverse */
190 0, /* tp_clear */
191 0, /* tp_richcompare */
192 0, /* tp_weaklistoffset */
193 0, /* tp_iter */
194 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000195 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 &PyIOBase_Type, /* tp_base */
199 0, /* tp_dict */
200 0, /* tp_descr_get */
201 0, /* tp_descr_set */
202 0, /* tp_dictoffset */
203 0, /* tp_init */
204 0, /* tp_alloc */
205 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200206 0, /* tp_free */
207 0, /* tp_is_gc */
208 0, /* tp_bases */
209 0, /* tp_mro */
210 0, /* tp_cache */
211 0, /* tp_subclasses */
212 0, /* tp_weaklist */
213 0, /* tp_del */
214 0, /* tp_version_tag */
215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000216};
217
218
219/* IncrementalNewlineDecoder */
220
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200225 unsigned int pendingcr: 1;
226 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300230/*[clinic input]
231_io.IncrementalNewlineDecoder.__init__
232 decoder: object
233 translate: int
234 errors: object(c_default="NULL") = "strict"
235
236Codec used when reading a file in universal newlines mode.
237
238It wraps another incremental decoder, translating \r\n and \r into \n.
239It also records the types of newlines encountered. When used with
240translate=False, it ensures that the newline sequence is returned in
241one piece. When used with decoder=None, it expects unicode strings as
242decode input and translates newlines without first invoking an external
243decoder.
244[clinic start generated code]*/
245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300247_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248 PyObject *decoder, int translate,
249 PyObject *errors)
250/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252 self->decoder = decoder;
253 Py_INCREF(decoder);
254
255 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900256 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 if (self->errors == NULL)
258 return -1;
259 }
260 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 self->errors = errors;
262 }
INADA Naoki507434f2017-12-21 09:59:53 +0900263 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264
Xiang Zhangb08746b2018-10-31 19:49:16 +0800265 self->translate = translate ? 1 : 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266 self->seennl = 0;
267 self->pendingcr = 0;
268
269 return 0;
270}
271
272static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000273incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000274{
275 Py_CLEAR(self->decoder);
276 Py_CLEAR(self->errors);
277 Py_TYPE(self)->tp_free((PyObject *)self);
278}
279
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200280static int
281check_decoded(PyObject *decoded)
282{
283 if (decoded == NULL)
284 return -1;
285 if (!PyUnicode_Check(decoded)) {
286 PyErr_Format(PyExc_TypeError,
287 "decoder should return a string result, not '%.200s'",
288 Py_TYPE(decoded)->tp_name);
289 Py_DECREF(decoded);
290 return -1;
291 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200292 if (PyUnicode_READY(decoded) < 0) {
293 Py_DECREF(decoded);
294 return -1;
295 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200296 return 0;
297}
298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299#define SEEN_CR 1
300#define SEEN_LF 2
301#define SEEN_CRLF 4
302#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
303
304PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200305_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306 PyObject *input, int final)
307{
308 PyObject *output;
309 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200310 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311
312 if (self->decoder == NULL) {
313 PyErr_SetString(PyExc_ValueError,
314 "IncrementalNewlineDecoder.__init__ not called");
315 return NULL;
316 }
317
318 /* decode input (with the eventual \r from a previous pass) */
319 if (self->decoder != Py_None) {
320 output = PyObject_CallMethodObjArgs(self->decoder,
321 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
322 }
323 else {
324 output = input;
325 Py_INCREF(output);
326 }
327
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200328 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 return NULL;
330
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 /* Prefix output with CR */
334 int kind;
335 PyObject *modified;
336 char *out;
337
338 modified = PyUnicode_New(output_len + 1,
339 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (modified == NULL)
341 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 kind = PyUnicode_KIND(modified);
343 out = PyUnicode_DATA(modified);
344 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 self->pendingcr = 0;
349 output_len++;
350 }
351
352 /* retain last \r even when not translating data:
353 * then readline() is sure to get \r\n in one pass
354 */
355 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000356 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
358 {
359 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
360 if (modified == NULL)
361 goto error;
362 Py_DECREF(output);
363 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 self->pendingcr = 1;
365 }
366 }
367
368 /* Record which newlines are read and do newline translation if desired,
369 all in one pass. */
370 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 Py_ssize_t len;
373 int seennl = self->seennl;
374 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 in_str = PyUnicode_DATA(output);
378 len = PyUnicode_GET_LENGTH(output);
379 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
381 if (len == 0)
382 return output;
383
384 /* If, up to now, newlines are consistently \n, do a quick check
385 for the \r *byte* with the libc's optimized memchr.
386 */
387 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200388 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 }
390
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 if (only_lf) {
392 /* If not already seen, quick scan for a possible "\n" character.
393 (there's nothing else to be done, even when in translation mode)
394 */
395 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200396 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100397 if (kind == PyUnicode_1BYTE_KIND)
398 seennl |= SEEN_LF;
399 else {
400 Py_ssize_t i = 0;
401 for (;;) {
402 Py_UCS4 c;
403 /* Fast loop for non-control characters */
404 while (PyUnicode_READ(kind, in_str, i) > '\n')
405 i++;
406 c = PyUnicode_READ(kind, in_str, i++);
407 if (c == '\n') {
408 seennl |= SEEN_LF;
409 break;
410 }
411 if (i >= len)
412 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
415 }
416 /* Finished: we have scanned for newlines, and none of them
417 need translating */
418 }
419 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000421 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 if (seennl == SEEN_ALL)
423 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200425 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 while (PyUnicode_READ(kind, in_str, i) > '\r')
428 i++;
429 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 if (c == '\n')
431 seennl |= SEEN_LF;
432 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 }
437 else
438 seennl |= SEEN_CR;
439 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 break;
442 if (seennl == SEEN_ALL)
443 break;
444 }
445 endscan:
446 ;
447 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000448 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 void *translated;
450 int kind = PyUnicode_KIND(output);
451 void *in_str = PyUnicode_DATA(output);
452 Py_ssize_t in, out;
453 /* XXX: Previous in-place translation here is disabled as
454 resizing is not possible anymore */
455 /* We could try to optimize this so that we only do a copy
456 when there is something to translate. On the other hand,
457 we already know there is a \r byte, so chances are high
458 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200459 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 if (translated == NULL) {
461 PyErr_NoMemory();
462 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
469 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 seennl |= SEEN_LF;
473 continue;
474 }
475 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 in++;
478 seennl |= SEEN_CRLF;
479 }
480 else
481 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 continue;
484 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 Py_DECREF(output);
490 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100491 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200492 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200493 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
495 self->seennl |= seennl;
496 }
497
498 return output;
499
500 error:
501 Py_DECREF(output);
502 return NULL;
503}
504
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300505/*[clinic input]
506_io.IncrementalNewlineDecoder.decode
507 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200508 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300509[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511static PyObject *
512_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
513 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200514/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300515{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
517}
518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300519/*[clinic input]
520_io.IncrementalNewlineDecoder.getstate
521[clinic start generated code]*/
522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300524_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
525/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000526{
527 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700528 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 if (self->decoder != Py_None) {
531 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
532 _PyIO_str_getstate, NULL);
533 if (state == NULL)
534 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300535 if (!PyTuple_Check(state)) {
536 PyErr_SetString(PyExc_TypeError,
537 "illegal decoder state");
538 Py_DECREF(state);
539 return NULL;
540 }
541 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
542 &buffer, &flag))
543 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 Py_DECREF(state);
545 return NULL;
546 }
547 Py_INCREF(buffer);
548 Py_DECREF(state);
549 }
550 else {
551 buffer = PyBytes_FromString("");
552 flag = 0;
553 }
554 flag <<= 1;
555 if (self->pendingcr)
556 flag |= 1;
557 return Py_BuildValue("NK", buffer, flag);
558}
559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300560/*[clinic input]
561_io.IncrementalNewlineDecoder.setstate
562 state: object
563 /
564[clinic start generated code]*/
565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300567_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
568 PyObject *state)
569/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700572 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Oren Milman1d1d3e92017-08-20 18:35:36 +0300574 if (!PyTuple_Check(state)) {
575 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300577 }
578 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579 &buffer, &flag))
580 {
581 return NULL;
582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583
Victor Stinner7d7e7752014-06-17 23:31:25 +0200584 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 flag >>= 1;
586
587 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200588 return _PyObject_CallMethodId(self->decoder,
589 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 else
591 Py_RETURN_NONE;
592}
593
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300594/*[clinic input]
595_io.IncrementalNewlineDecoder.reset
596[clinic start generated code]*/
597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300599_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
600/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601{
602 self->seennl = 0;
603 self->pendingcr = 0;
604 if (self->decoder != Py_None)
605 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
606 else
607 Py_RETURN_NONE;
608}
609
610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612{
613 switch (self->seennl) {
614 case SEEN_CR:
615 return PyUnicode_FromString("\r");
616 case SEEN_LF:
617 return PyUnicode_FromString("\n");
618 case SEEN_CRLF:
619 return PyUnicode_FromString("\r\n");
620 case SEEN_CR | SEEN_LF:
621 return Py_BuildValue("ss", "\r", "\n");
622 case SEEN_CR | SEEN_CRLF:
623 return Py_BuildValue("ss", "\r", "\r\n");
624 case SEEN_LF | SEEN_CRLF:
625 return Py_BuildValue("ss", "\n", "\r\n");
626 case SEEN_CR | SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("sss", "\r", "\n", "\r\n");
628 default:
629 Py_RETURN_NONE;
630 }
631
632}
633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634/* TextIOWrapper */
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636typedef PyObject *
637 (*encodefunc_t)(PyObject *, PyObject *);
638
639typedef struct
640{
641 PyObject_HEAD
642 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000643 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 Py_ssize_t chunk_size;
645 PyObject *buffer;
646 PyObject *encoding;
647 PyObject *encoder;
648 PyObject *decoder;
649 PyObject *readnl;
650 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900651 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200653 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char readuniversal;
655 char readtranslate;
656 char writetranslate;
657 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200658 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200660 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000680
Oren Milman13614e32017-08-24 19:51:24 +0300681 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000686 PyObject *snapshot;
687 /* Bytes-to-characters ratio for the current chunk. Serves as input for
688 the heuristic in tell(). */
689 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 /* Cache raw object if it's a FileIO object */
692 PyObject *raw;
693
694 PyObject *weakreflist;
695 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000696} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
Zackery Spytz23db9352018-06-29 04:14:58 -0600698static void
699textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
700
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701/* A couple of specialized cases in order to bypass the slow incremental
702 encoding methods for the most popular encodings. */
703
704static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000705ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706{
INADA Naoki507434f2017-12-21 09:59:53 +0900707 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708}
709
710static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100713 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900714 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715}
716
717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000718utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100720 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900721 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitroue4501852009-05-14 18:55:55 +0000727 if (!self->encoding_start_of_stream) {
728 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200729#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000734 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900736 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
Antoine Pitroue4501852009-05-14 18:55:55 +0000739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000741{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100742 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900743 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900750 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 if (!self->encoding_start_of_stream) {
757 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200758#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 return utf32be_encode(self, text);
760#else
761 return utf32le_encode(self, text);
762#endif
763 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900765 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770{
INADA Naoki507434f2017-12-21 09:59:53 +0900771 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776{
INADA Naoki507434f2017-12-21 09:59:53 +0900777 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778}
779
780/* Map normalized encoding names onto the specialized encoding funcs */
781
782typedef struct {
783 const char *name;
784 encodefunc_t encodefunc;
785} encodefuncentry;
786
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200787static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 {"ascii", (encodefunc_t) ascii_encode},
789 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000790 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791 {"utf-16-be", (encodefunc_t) utf16be_encode},
792 {"utf-16-le", (encodefunc_t) utf16le_encode},
793 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000794 {"utf-32-be", (encodefunc_t) utf32be_encode},
795 {"utf-32-le", (encodefunc_t) utf32le_encode},
796 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {NULL, NULL}
798};
799
INADA Naoki507434f2017-12-21 09:59:53 +0900800static int
801validate_newline(const char *newline)
802{
803 if (newline && newline[0] != '\0'
804 && !(newline[0] == '\n' && newline[1] == '\0')
805 && !(newline[0] == '\r' && newline[1] == '\0')
806 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
807 PyErr_Format(PyExc_ValueError,
808 "illegal newline value: %s", newline);
809 return -1;
810 }
811 return 0;
812}
813
814static int
815set_newline(textio *self, const char *newline)
816{
817 PyObject *old = self->readnl;
818 if (newline == NULL) {
819 self->readnl = NULL;
820 }
821 else {
822 self->readnl = PyUnicode_FromString(newline);
823 if (self->readnl == NULL) {
824 self->readnl = old;
825 return -1;
826 }
827 }
828 self->readuniversal = (newline == NULL || newline[0] == '\0');
829 self->readtranslate = (newline == NULL);
830 self->writetranslate = (newline == NULL || newline[0] != '\0');
831 if (!self->readuniversal && self->readnl != NULL) {
832 // validate_newline() accepts only ASCII newlines.
833 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
834 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
835 if (strcmp(self->writenl, "\n") == 0) {
836 self->writenl = NULL;
837 }
838 }
839 else {
840#ifdef MS_WINDOWS
841 self->writenl = "\r\n";
842#else
843 self->writenl = NULL;
844#endif
845 }
846 Py_XDECREF(old);
847 return 0;
848}
849
850static int
851_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
852 const char *errors)
853{
854 PyObject *res;
855 int r;
856
857 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
858 if (res == NULL)
859 return -1;
860
861 r = PyObject_IsTrue(res);
862 Py_DECREF(res);
863 if (r == -1)
864 return -1;
865
866 if (r != 1)
867 return 0;
868
869 Py_CLEAR(self->decoder);
870 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
871 if (self->decoder == NULL)
872 return -1;
873
874 if (self->readuniversal) {
875 PyObject *incrementalDecoder = PyObject_CallFunction(
876 (PyObject *)&PyIncrementalNewlineDecoder_Type,
877 "Oi", self->decoder, (int)self->readtranslate);
878 if (incrementalDecoder == NULL)
879 return -1;
880 Py_CLEAR(self->decoder);
881 self->decoder = incrementalDecoder;
882 }
883
884 return 0;
885}
886
887static PyObject*
888_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
889{
890 PyObject *chars;
891
892 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
893 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
894 else
895 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
896 eof ? Py_True : Py_False, NULL);
897
898 if (check_decoded(chars) < 0)
899 // check_decoded already decreases refcount
900 return NULL;
901
902 return chars;
903}
904
905static int
906_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
907 const char *errors)
908{
909 PyObject *res;
910 int r;
911
912 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
913 if (res == NULL)
914 return -1;
915
916 r = PyObject_IsTrue(res);
917 Py_DECREF(res);
918 if (r == -1)
919 return -1;
920
921 if (r != 1)
922 return 0;
923
924 Py_CLEAR(self->encoder);
925 self->encodefunc = NULL;
926 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
927 if (self->encoder == NULL)
928 return -1;
929
930 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200931 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
932 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900933 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200934 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900935 const encodefuncentry *e = encodefuncs;
936 while (e->name != NULL) {
937 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
938 self->encodefunc = e->encodefunc;
939 break;
940 }
941 e++;
942 }
943 }
944 Py_XDECREF(res);
945
946 return 0;
947}
948
949static int
950_textiowrapper_fix_encoder_state(textio *self)
951{
952 if (!self->seekable || !self->encoder) {
953 return 0;
954 }
955
956 self->encoding_start_of_stream = 1;
957
958 PyObject *cookieObj = PyObject_CallMethodObjArgs(
959 self->buffer, _PyIO_str_tell, NULL);
960 if (cookieObj == NULL) {
961 return -1;
962 }
963
964 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
965 Py_DECREF(cookieObj);
966 if (cmp < 0) {
967 return -1;
968 }
969
970 if (cmp == 0) {
971 self->encoding_start_of_stream = 0;
972 PyObject *res = PyObject_CallMethodObjArgs(
973 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
974 if (res == NULL) {
975 return -1;
976 }
977 Py_DECREF(res);
978 }
979
980 return 0;
981}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000982
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300983/*[clinic input]
984_io.TextIOWrapper.__init__
985 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700986 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900987 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700988 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200989 line_buffering: bool(accept={int}) = False
990 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300992Character and line based layer over a BufferedIOBase object, buffer.
993
994encoding gives the name of the encoding that the stream will be
995decoded or encoded with. It defaults to locale.getpreferredencoding(False).
996
997errors determines the strictness of encoding and decoding (see
998help(codecs.Codec) or the documentation for codecs.register) and
999defaults to "strict".
1000
1001newline controls how line endings are handled. It can be None, '',
1002'\n', '\r', and '\r\n'. It works as follows:
1003
1004* On input, if newline is None, universal newlines mode is
1005 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1006 these are translated into '\n' before being returned to the
1007 caller. If it is '', universal newline mode is enabled, but line
1008 endings are returned to the caller untranslated. If it has any of
1009 the other legal values, input lines are only terminated by the given
1010 string, and the line ending is returned to the caller untranslated.
1011
1012* On output, if newline is None, any '\n' characters written are
1013 translated to the system default line separator, os.linesep. If
1014 newline is '' or '\n', no translation takes place. If newline is any
1015 of the other legal values, any '\n' characters written are translated
1016 to the given string.
1017
1018If line_buffering is True, a call to flush is implied when a call to
1019write contains a newline character.
1020[clinic start generated code]*/
1021
1022static int
1023_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001024 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001025 const char *newline, int line_buffering,
1026 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001027/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001028{
1029 PyObject *raw, *codec_info = NULL;
1030 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 PyObject *res;
1032 int r;
1033
1034 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001035 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036
INADA Naoki507434f2017-12-21 09:59:53 +09001037 if (errors == Py_None) {
1038 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001039 if (errors == NULL) {
1040 return -1;
1041 }
INADA Naoki507434f2017-12-21 09:59:53 +09001042 }
1043 else if (!PyUnicode_Check(errors)) {
1044 // Check 'errors' argument here because Argument Clinic doesn't support
1045 // 'str(accept={str, NoneType})' converter.
1046 PyErr_Format(
1047 PyExc_TypeError,
1048 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1049 errors->ob_type->tp_name);
1050 return -1;
1051 }
1052
1053 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 return -1;
1055 }
1056
1057 Py_CLEAR(self->buffer);
1058 Py_CLEAR(self->encoding);
1059 Py_CLEAR(self->encoder);
1060 Py_CLEAR(self->decoder);
1061 Py_CLEAR(self->readnl);
1062 Py_CLEAR(self->decoded_chars);
1063 Py_CLEAR(self->pending_bytes);
1064 Py_CLEAR(self->snapshot);
1065 Py_CLEAR(self->errors);
1066 Py_CLEAR(self->raw);
1067 self->decoded_chars_used = 0;
1068 self->pending_bytes_count = 0;
1069 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001070 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071
1072 if (encoding == NULL) {
1073 /* Try os.device_encoding(fileno) */
1074 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001075 state = IO_STATE();
1076 if (state == NULL)
1077 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001078 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 /* Ignore only AttributeError and UnsupportedOperation */
1080 if (fileno == NULL) {
1081 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1082 PyErr_ExceptionMatches(state->unsupported_operation)) {
1083 PyErr_Clear();
1084 }
1085 else {
1086 goto error;
1087 }
1088 }
1089 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001090 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001091 Py_DECREF(fileno);
1092 if (fd == -1 && PyErr_Occurred()) {
1093 goto error;
1094 }
1095
1096 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 if (self->encoding == NULL)
1098 goto error;
1099 else if (!PyUnicode_Check(self->encoding))
1100 Py_CLEAR(self->encoding);
1101 }
1102 }
1103 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001104 PyObject *locale_module = _PyIO_get_locale_module(state);
1105 if (locale_module == NULL)
1106 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001107 self->encoding = _PyObject_CallMethodIdObjArgs(
1108 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001109 Py_DECREF(locale_module);
1110 if (self->encoding == NULL) {
1111 catch_ImportError:
1112 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001113 Importing locale can raise an ImportError because of
1114 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001115 ImportError if _locale is not available. These will happen
1116 during module building.
1117 */
1118 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1119 PyErr_Clear();
1120 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001122 else
1123 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001125 else if (!PyUnicode_Check(self->encoding))
1126 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001127 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001128 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001129 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001130 if (encoding == NULL)
1131 goto error;
1132 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133 else if (encoding != NULL) {
1134 self->encoding = PyUnicode_FromString(encoding);
1135 if (self->encoding == NULL)
1136 goto error;
1137 }
1138 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001139 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001141 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 }
1143
Nick Coghlana9b15242014-02-04 22:11:18 +10001144 /* Check we have been asked for a real text encoding */
1145 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1146 if (codec_info == NULL) {
1147 Py_CLEAR(self->encoding);
1148 goto error;
1149 }
1150
1151 /* XXX: Failures beyond this point have the potential to leak elements
1152 * of the partially constructed object (like self->encoding)
1153 */
1154
INADA Naoki507434f2017-12-21 09:59:53 +09001155 Py_INCREF(errors);
1156 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001159 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001160 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 }
1163
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164 self->buffer = buffer;
1165 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001166
INADA Naoki507434f2017-12-21 09:59:53 +09001167 /* Build the decoder object */
1168 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1169 goto error;
1170
1171 /* Build the encoder object */
1172 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1173 goto error;
1174
1175 /* Finished sorting out the codec details */
1176 Py_CLEAR(codec_info);
1177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1179 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001180 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1181 {
1182 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1183 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001185 if (raw != NULL) {
1186 if (Py_TYPE(raw) == &PyFileIO_Type)
1187 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001188 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001189 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001190 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 }
1192
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 if (res == NULL)
1195 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001196 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001198 if (r < 0)
1199 goto error;
1200 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001202 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1203 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001204 goto error;
1205 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001206 Py_XDECREF(res);
1207 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001208
Antoine Pitroue4501852009-05-14 18:55:55 +00001209 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001210 if (_textiowrapper_fix_encoder_state(self) < 0) {
1211 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001212 }
1213
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 self->ok = 1;
1215 return 0;
1216
1217 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001218 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 return -1;
1220}
1221
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001222/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1223 * -1 on error.
1224 */
1225static int
1226convert_optional_bool(PyObject *obj, int default_value)
1227{
1228 long v;
1229 if (obj == Py_None) {
1230 v = default_value;
1231 }
1232 else {
1233 v = PyLong_AsLong(obj);
1234 if (v == -1 && PyErr_Occurred())
1235 return -1;
1236 }
1237 return v != 0;
1238}
1239
INADA Naoki507434f2017-12-21 09:59:53 +09001240static int
1241textiowrapper_change_encoding(textio *self, PyObject *encoding,
1242 PyObject *errors, int newline_changed)
1243{
1244 /* Use existing settings where new settings are not specified */
1245 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1246 return 0; // no change
1247 }
1248
1249 if (encoding == Py_None) {
1250 encoding = self->encoding;
1251 if (errors == Py_None) {
1252 errors = self->errors;
1253 }
1254 }
1255 else if (errors == Py_None) {
1256 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001257 if (errors == NULL) {
1258 return -1;
1259 }
INADA Naoki507434f2017-12-21 09:59:53 +09001260 }
1261
1262 const char *c_errors = PyUnicode_AsUTF8(errors);
1263 if (c_errors == NULL) {
1264 return -1;
1265 }
1266
1267 // Create new encoder & decoder
1268 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1269 PyUnicode_AsUTF8(encoding), "codecs.open()");
1270 if (codec_info == NULL) {
1271 return -1;
1272 }
1273 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1274 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1275 Py_DECREF(codec_info);
1276 return -1;
1277 }
1278 Py_DECREF(codec_info);
1279
1280 Py_INCREF(encoding);
1281 Py_INCREF(errors);
1282 Py_SETREF(self->encoding, encoding);
1283 Py_SETREF(self->errors, errors);
1284
1285 return _textiowrapper_fix_encoder_state(self);
1286}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001287
1288/*[clinic input]
1289_io.TextIOWrapper.reconfigure
1290 *
INADA Naoki507434f2017-12-21 09:59:53 +09001291 encoding: object = None
1292 errors: object = None
1293 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001294 line_buffering as line_buffering_obj: object = None
1295 write_through as write_through_obj: object = None
1296
1297Reconfigure the text stream with new parameters.
1298
1299This also does an implicit stream flush.
1300
1301[clinic start generated code]*/
1302
1303static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001304_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1305 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001306 PyObject *line_buffering_obj,
1307 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001308/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001309{
1310 int line_buffering;
1311 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001312 const char *newline = NULL;
1313
1314 /* Check if something is in the read buffer */
1315 if (self->decoded_chars != NULL) {
1316 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02001317 _unsupported("It is not possible to set the encoding or newline "
INADA Naoki507434f2017-12-21 09:59:53 +09001318 "of stream after the first read");
1319 return NULL;
1320 }
1321 }
1322
1323 if (newline_obj != NULL && newline_obj != Py_None) {
1324 newline = PyUnicode_AsUTF8(newline_obj);
1325 if (newline == NULL || validate_newline(newline) < 0) {
1326 return NULL;
1327 }
1328 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001329
1330 line_buffering = convert_optional_bool(line_buffering_obj,
1331 self->line_buffering);
1332 write_through = convert_optional_bool(write_through_obj,
1333 self->write_through);
1334 if (line_buffering < 0 || write_through < 0) {
1335 return NULL;
1336 }
INADA Naoki507434f2017-12-21 09:59:53 +09001337
1338 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001339 if (res == NULL) {
1340 return NULL;
1341 }
INADA Naoki507434f2017-12-21 09:59:53 +09001342 Py_DECREF(res);
1343 self->b2cratio = 0;
1344
1345 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1346 return NULL;
1347 }
1348
1349 if (textiowrapper_change_encoding(
1350 self, encoding, errors, newline_obj != NULL) < 0) {
1351 return NULL;
1352 }
1353
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001354 self->line_buffering = line_buffering;
1355 self->write_through = write_through;
1356 Py_RETURN_NONE;
1357}
1358
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001360textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 self->ok = 0;
1363 Py_CLEAR(self->buffer);
1364 Py_CLEAR(self->encoding);
1365 Py_CLEAR(self->encoder);
1366 Py_CLEAR(self->decoder);
1367 Py_CLEAR(self->readnl);
1368 Py_CLEAR(self->decoded_chars);
1369 Py_CLEAR(self->pending_bytes);
1370 Py_CLEAR(self->snapshot);
1371 Py_CLEAR(self->errors);
1372 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001373
1374 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375 return 0;
1376}
1377
1378static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001379textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001380{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001381 self->finalizing = 1;
1382 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001384 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001385 _PyObject_GC_UNTRACK(self);
1386 if (self->weakreflist != NULL)
1387 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001388 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 Py_TYPE(self)->tp_free((PyObject *)self);
1390}
1391
1392static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001393textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394{
1395 Py_VISIT(self->buffer);
1396 Py_VISIT(self->encoding);
1397 Py_VISIT(self->encoder);
1398 Py_VISIT(self->decoder);
1399 Py_VISIT(self->readnl);
1400 Py_VISIT(self->decoded_chars);
1401 Py_VISIT(self->pending_bytes);
1402 Py_VISIT(self->snapshot);
1403 Py_VISIT(self->errors);
1404 Py_VISIT(self->raw);
1405
1406 Py_VISIT(self->dict);
1407 return 0;
1408}
1409
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001411textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412
1413/* This macro takes some shortcuts to make the common case faster. */
1414#define CHECK_CLOSED(self) \
1415 do { \
1416 int r; \
1417 PyObject *_res; \
1418 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1419 if (self->raw != NULL) \
1420 r = _PyFileIO_closed(self->raw); \
1421 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001422 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001423 if (_res == NULL) \
1424 return NULL; \
1425 r = PyObject_IsTrue(_res); \
1426 Py_DECREF(_res); \
1427 if (r < 0) \
1428 return NULL; \
1429 } \
1430 if (r > 0) { \
1431 PyErr_SetString(PyExc_ValueError, \
1432 "I/O operation on closed file."); \
1433 return NULL; \
1434 } \
1435 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001436 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 return NULL; \
1438 } while (0)
1439
1440#define CHECK_INITIALIZED(self) \
1441 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001442 PyErr_SetString(PyExc_ValueError, \
1443 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 return NULL; \
1445 }
1446
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001447#define CHECK_ATTACHED(self) \
1448 CHECK_INITIALIZED(self); \
1449 if (self->detached) { \
1450 PyErr_SetString(PyExc_ValueError, \
1451 "underlying buffer has been detached"); \
1452 return NULL; \
1453 }
1454
1455#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001457 PyErr_SetString(PyExc_ValueError, \
1458 "I/O operation on uninitialized object"); \
1459 return -1; \
1460 } else if (self->detached) { \
1461 PyErr_SetString(PyExc_ValueError, \
1462 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 return -1; \
1464 }
1465
1466
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001467/*[clinic input]
1468_io.TextIOWrapper.detach
1469[clinic start generated code]*/
1470
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001471static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001472_io_TextIOWrapper_detach_impl(textio *self)
1473/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001474{
1475 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001476 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001477 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1478 if (res == NULL)
1479 return NULL;
1480 Py_DECREF(res);
1481 buffer = self->buffer;
1482 self->buffer = NULL;
1483 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001484 return buffer;
1485}
1486
Antoine Pitrou24f36292009-03-28 22:16:42 +00001487/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 underlying buffered object, though. */
1489static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001490_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001492 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493
1494 if (self->pending_bytes == NULL)
1495 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001496
1497 pending = self->pending_bytes;
1498 Py_INCREF(pending);
1499 self->pending_bytes_count = 0;
1500 Py_CLEAR(self->pending_bytes);
1501
1502 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1503 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001504 if (b == NULL)
1505 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001506 ret = NULL;
1507 do {
1508 ret = PyObject_CallMethodObjArgs(self->buffer,
1509 _PyIO_str_write, b, NULL);
1510 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 Py_DECREF(b);
1512 if (ret == NULL)
1513 return -1;
1514 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 return 0;
1516}
1517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001518/*[clinic input]
1519_io.TextIOWrapper.write
1520 text: unicode
1521 /
1522[clinic start generated code]*/
1523
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001525_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1526/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527{
1528 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 PyObject *b;
1530 Py_ssize_t textlen;
1531 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001532 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001534 if (PyUnicode_READY(text) == -1)
1535 return NULL;
1536
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001537 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 CHECK_CLOSED(self);
1539
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001540 if (self->encoder == NULL)
1541 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001542
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543 Py_INCREF(text);
1544
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001545 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546
1547 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001548 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 haslf = 1;
1550
1551 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001552 PyObject *newtext = _PyObject_CallMethodId(
1553 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 Py_DECREF(text);
1555 if (newtext == NULL)
1556 return NULL;
1557 text = newtext;
1558 }
1559
Antoine Pitroue96ec682011-07-23 21:46:35 +02001560 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001561 text_needflush = 1;
1562 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001563 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001564 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 needflush = 1;
1566
1567 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001568 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001570 self->encoding_start_of_stream = 0;
1571 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572 else
1573 b = PyObject_CallMethodObjArgs(self->encoder,
1574 _PyIO_str_encode, text, NULL);
1575 Py_DECREF(text);
1576 if (b == NULL)
1577 return NULL;
Oren Milmana5b4ea12017-08-25 21:14:54 +03001578 if (!PyBytes_Check(b)) {
1579 PyErr_Format(PyExc_TypeError,
1580 "encoder should return a bytes object, not '%.200s'",
1581 Py_TYPE(b)->tp_name);
1582 Py_DECREF(b);
1583 return NULL;
1584 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001585
1586 if (self->pending_bytes == NULL) {
1587 self->pending_bytes = PyList_New(0);
1588 if (self->pending_bytes == NULL) {
1589 Py_DECREF(b);
1590 return NULL;
1591 }
1592 self->pending_bytes_count = 0;
1593 }
1594 if (PyList_Append(self->pending_bytes, b) < 0) {
1595 Py_DECREF(b);
1596 return NULL;
1597 }
1598 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1599 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001600 if (self->pending_bytes_count > self->chunk_size || needflush ||
1601 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001602 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 return NULL;
1604 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001605
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 if (needflush) {
1607 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1608 if (ret == NULL)
1609 return NULL;
1610 Py_DECREF(ret);
1611 }
1612
Zackery Spytz23db9352018-06-29 04:14:58 -06001613 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 Py_CLEAR(self->snapshot);
1615
1616 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001617 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618 if (ret == NULL)
1619 return NULL;
1620 Py_DECREF(ret);
1621 }
1622
1623 return PyLong_FromSsize_t(textlen);
1624}
1625
1626/* Steal a reference to chars and store it in the decoded_char buffer;
1627 */
1628static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001629textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001631 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 self->decoded_chars_used = 0;
1633}
1634
1635static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001636textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637{
1638 PyObject *chars;
1639 Py_ssize_t avail;
1640
1641 if (self->decoded_chars == NULL)
1642 return PyUnicode_FromStringAndSize(NULL, 0);
1643
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001644 /* decoded_chars is guaranteed to be "ready". */
1645 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 - self->decoded_chars_used);
1647
1648 assert(avail >= 0);
1649
1650 if (n < 0 || n > avail)
1651 n = avail;
1652
1653 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001654 chars = PyUnicode_Substring(self->decoded_chars,
1655 self->decoded_chars_used,
1656 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 if (chars == NULL)
1658 return NULL;
1659 }
1660 else {
1661 chars = self->decoded_chars;
1662 Py_INCREF(chars);
1663 }
1664
1665 self->decoded_chars_used += n;
1666 return chars;
1667}
1668
1669/* Read and decode the next chunk of data from the BufferedReader.
1670 */
1671static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001672textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673{
1674 PyObject *dec_buffer = NULL;
1675 PyObject *dec_flags = NULL;
1676 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001677 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001679 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 int eof;
1681
1682 /* The return value is True unless EOF was reached. The decoded string is
1683 * placed in self._decoded_chars (replacing its previous value). The
1684 * entire input chunk is sent to the decoder, though some of it may remain
1685 * buffered in the decoder, yet to be converted.
1686 */
1687
1688 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001689 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 return -1;
1691 }
1692
1693 if (self->telling) {
1694 /* To prepare for tell(), we need to snapshot a point in the file
1695 * where the decoder's input buffer is empty.
1696 */
1697
1698 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1699 _PyIO_str_getstate, NULL);
1700 if (state == NULL)
1701 return -1;
1702 /* Given this, we know there was a valid snapshot point
1703 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1704 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001705 if (!PyTuple_Check(state)) {
1706 PyErr_SetString(PyExc_TypeError,
1707 "illegal decoder state");
1708 Py_DECREF(state);
1709 return -1;
1710 }
1711 if (!PyArg_ParseTuple(state,
1712 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1713 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 Py_DECREF(state);
1715 return -1;
1716 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001717
1718 if (!PyBytes_Check(dec_buffer)) {
1719 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001720 "illegal decoder state: the first item should be a "
1721 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001722 Py_TYPE(dec_buffer)->tp_name);
1723 Py_DECREF(state);
1724 return -1;
1725 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 Py_INCREF(dec_buffer);
1727 Py_INCREF(dec_flags);
1728 Py_DECREF(state);
1729 }
1730
1731 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001732 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001733 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001734 }
1735 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 if (chunk_size == NULL)
1737 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001738
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001740 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1741 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 Py_DECREF(chunk_size);
1743 if (input_chunk == NULL)
1744 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001745
1746 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001747 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001748 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001749 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1750 Py_TYPE(input_chunk)->tp_name);
1751 goto fail;
1752 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753
Antoine Pitroub8503892014-04-29 10:14:02 +02001754 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001755 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756
INADA Naoki507434f2017-12-21 09:59:53 +09001757 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1758 PyBuffer_Release(&input_chunk_buf);
1759 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001760 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001761
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001762 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001764 if (nchars > 0)
1765 self->b2cratio = (double) nbytes / nchars;
1766 else
1767 self->b2cratio = 0.0;
1768 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 eof = 0;
1770
1771 if (self->telling) {
1772 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1773 * next input to be decoded is dec_buffer + input_chunk.
1774 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001775 PyObject *next_input = dec_buffer;
1776 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001777 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001778 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001779 goto fail;
1780 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001781 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1782 if (snapshot == NULL) {
1783 dec_flags = NULL;
1784 goto fail;
1785 }
1786 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 }
1788 Py_DECREF(input_chunk);
1789
1790 return (eof == 0);
1791
1792 fail:
1793 Py_XDECREF(dec_buffer);
1794 Py_XDECREF(dec_flags);
1795 Py_XDECREF(input_chunk);
1796 return -1;
1797}
1798
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001799/*[clinic input]
1800_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001801 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001802 /
1803[clinic start generated code]*/
1804
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001806_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001807/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 PyObject *result = NULL, *chunks = NULL;
1810
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001811 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 CHECK_CLOSED(self);
1813
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001814 if (self->decoder == NULL)
1815 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001816
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001817 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 return NULL;
1819
1820 if (n < 0) {
1821 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001822 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 PyObject *decoded;
1824 if (bytes == NULL)
1825 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001826
1827 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1828 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1829 bytes, 1);
1830 else
1831 decoded = PyObject_CallMethodObjArgs(
1832 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001834 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 goto fail;
1836
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001837 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838
1839 if (result == NULL) {
1840 Py_DECREF(decoded);
1841 return NULL;
1842 }
1843
1844 PyUnicode_AppendAndDel(&result, decoded);
1845 if (result == NULL)
1846 goto fail;
1847
Zackery Spytz23db9352018-06-29 04:14:58 -06001848 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 Py_CLEAR(self->snapshot);
1850 return result;
1851 }
1852 else {
1853 int res = 1;
1854 Py_ssize_t remaining = n;
1855
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001856 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001857 if (result == NULL)
1858 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001859 if (PyUnicode_READY(result) == -1)
1860 goto fail;
1861 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001862
1863 /* Keep reading chunks until we have n characters to return */
1864 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001865 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001866 if (res < 0) {
1867 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1868 when EINTR occurs so we needn't do it ourselves. */
1869 if (_PyIO_trap_eintr()) {
1870 continue;
1871 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001873 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874 if (res == 0) /* EOF */
1875 break;
1876 if (chunks == NULL) {
1877 chunks = PyList_New(0);
1878 if (chunks == NULL)
1879 goto fail;
1880 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001881 if (PyUnicode_GET_LENGTH(result) > 0 &&
1882 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 goto fail;
1884 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001885 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886 if (result == NULL)
1887 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001888 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001889 }
1890 if (chunks != NULL) {
1891 if (result != NULL && PyList_Append(chunks, result) < 0)
1892 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001893 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 if (result == NULL)
1895 goto fail;
1896 Py_CLEAR(chunks);
1897 }
1898 return result;
1899 }
1900 fail:
1901 Py_XDECREF(result);
1902 Py_XDECREF(chunks);
1903 return NULL;
1904}
1905
1906
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001907/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908 that is to the NUL character. Otherwise the function will produce
1909 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001910static const char *
1911find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001913 if (kind == PyUnicode_1BYTE_KIND) {
1914 assert(ch < 256);
1915 return (char *) memchr((void *) s, (char) ch, end - s);
1916 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001918 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001919 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001920 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921 return s;
1922 if (s == end)
1923 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001924 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 }
1926}
1927
1928Py_ssize_t
1929_PyIO_find_line_ending(
1930 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001931 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001933 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934
1935 if (translated) {
1936 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001937 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001939 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940 else {
1941 *consumed = len;
1942 return -1;
1943 }
1944 }
1945 else if (universal) {
1946 /* Universal newline search. Find any of \r, \r\n, \n
1947 * The decoder ensures that \r\n are not split in two pieces
1948 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001949 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001951 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001953 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001954 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001955 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 if (s >= end) {
1957 *consumed = len;
1958 return -1;
1959 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001960 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001961 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001963 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001965 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001966 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001968 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 }
1970 }
1971 }
1972 else {
1973 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001974 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001975 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001976 /* Assume that readnl is an ASCII character. */
1977 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001979 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001981 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 *consumed = len;
1983 return -1;
1984 }
1985 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001986 const char *s = start;
1987 const char *e = end - (readnl_len - 1)*kind;
1988 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (e < s)
1990 e = s;
1991 while (s < e) {
1992 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001993 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 if (pos == NULL || pos >= e)
1995 break;
1996 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001997 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 break;
1999 }
2000 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002001 return (pos - start)/kind + readnl_len;
2002 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002004 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 if (pos == NULL)
2006 *consumed = len;
2007 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002008 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 return -1;
2010 }
2011 }
2012}
2013
2014static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002015_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016{
2017 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2018 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2019 int res;
2020
2021 CHECK_CLOSED(self);
2022
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002023 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 return NULL;
2025
2026 chunked = 0;
2027
2028 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002029 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002031 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 Py_ssize_t consumed = 0;
2033
2034 /* First, get some data if necessary */
2035 res = 1;
2036 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002037 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002038 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002039 if (res < 0) {
2040 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2041 when EINTR occurs so we needn't do it ourselves. */
2042 if (_PyIO_trap_eintr()) {
2043 continue;
2044 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002046 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 if (res == 0)
2048 break;
2049 }
2050 if (res == 0) {
2051 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002052 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 Py_CLEAR(self->snapshot);
2054 start = endpos = offset_to_buffer = 0;
2055 break;
2056 }
2057
2058 if (remaining == NULL) {
2059 line = self->decoded_chars;
2060 start = self->decoded_chars_used;
2061 offset_to_buffer = 0;
2062 Py_INCREF(line);
2063 }
2064 else {
2065 assert(self->decoded_chars_used == 0);
2066 line = PyUnicode_Concat(remaining, self->decoded_chars);
2067 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002068 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 Py_CLEAR(remaining);
2070 if (line == NULL)
2071 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002072 if (PyUnicode_READY(line) == -1)
2073 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 }
2075
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002076 ptr = PyUnicode_DATA(line);
2077 line_len = PyUnicode_GET_LENGTH(line);
2078 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079
2080 endpos = _PyIO_find_line_ending(
2081 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002082 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002083 ptr + kind * start,
2084 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002085 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (endpos >= 0) {
2087 endpos += start;
2088 if (limit >= 0 && (endpos - start) + chunked >= limit)
2089 endpos = start + limit - chunked;
2090 break;
2091 }
2092
2093 /* We can put aside up to `endpos` */
2094 endpos = consumed + start;
2095 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2096 /* Didn't find line ending, but reached length limit */
2097 endpos = start + limit - chunked;
2098 break;
2099 }
2100
2101 if (endpos > start) {
2102 /* No line ending seen yet - put aside current data */
2103 PyObject *s;
2104 if (chunks == NULL) {
2105 chunks = PyList_New(0);
2106 if (chunks == NULL)
2107 goto error;
2108 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002109 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 if (s == NULL)
2111 goto error;
2112 if (PyList_Append(chunks, s) < 0) {
2113 Py_DECREF(s);
2114 goto error;
2115 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002116 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 Py_DECREF(s);
2118 }
2119 /* There may be some remaining bytes we'll have to prepend to the
2120 next chunk of data */
2121 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002122 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 if (remaining == NULL)
2124 goto error;
2125 }
2126 Py_CLEAR(line);
2127 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002128 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 }
2130
2131 if (line != NULL) {
2132 /* Our line ends in the current buffer */
2133 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002134 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2135 PyObject *s = PyUnicode_Substring(line, start, endpos);
2136 Py_CLEAR(line);
2137 if (s == NULL)
2138 goto error;
2139 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 }
2141 }
2142 if (remaining != NULL) {
2143 if (chunks == NULL) {
2144 chunks = PyList_New(0);
2145 if (chunks == NULL)
2146 goto error;
2147 }
2148 if (PyList_Append(chunks, remaining) < 0)
2149 goto error;
2150 Py_CLEAR(remaining);
2151 }
2152 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002153 if (line != NULL) {
2154 if (PyList_Append(chunks, line) < 0)
2155 goto error;
2156 Py_DECREF(line);
2157 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2159 if (line == NULL)
2160 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002161 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002163 if (line == NULL) {
2164 Py_INCREF(_PyIO_empty_str);
2165 line = _PyIO_empty_str;
2166 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167
2168 return line;
2169
2170 error:
2171 Py_XDECREF(chunks);
2172 Py_XDECREF(remaining);
2173 Py_XDECREF(line);
2174 return NULL;
2175}
2176
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002177/*[clinic input]
2178_io.TextIOWrapper.readline
2179 size: Py_ssize_t = -1
2180 /
2181[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002183static PyObject *
2184_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2185/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2186{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002187 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002188 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189}
2190
2191/* Seek and Tell */
2192
2193typedef struct {
2194 Py_off_t start_pos;
2195 int dec_flags;
2196 int bytes_to_feed;
2197 int chars_to_skip;
2198 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002199} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200
2201/*
2202 To speed up cookie packing/unpacking, we store the fields in a temporary
2203 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2204 The following macros define at which offsets in the intermediary byte
2205 string the various CookieStruct fields will be stored.
2206 */
2207
2208#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2209
Christian Heimes743e0cd2012-10-17 23:52:17 +02002210#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002211/* We want the least significant byte of start_pos to also be the least
2212 significant byte of the cookie, which means that in big-endian mode we
2213 must copy the fields in reverse order. */
2214
2215# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2216# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2217# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2218# define OFF_CHARS_TO_SKIP (sizeof(char))
2219# define OFF_NEED_EOF 0
2220
2221#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222/* Little-endian mode: the least significant byte of start_pos will
2223 naturally end up the least significant byte of the cookie. */
2224
2225# define OFF_START_POS 0
2226# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2227# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2228# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2229# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2230
2231#endif
2232
2233static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002234textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235{
2236 unsigned char buffer[COOKIE_BUF_LEN];
2237 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2238 if (cookieLong == NULL)
2239 return -1;
2240
2241 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002242 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 Py_DECREF(cookieLong);
2244 return -1;
2245 }
2246 Py_DECREF(cookieLong);
2247
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002248 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2249 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2250 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2251 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2252 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253
2254 return 0;
2255}
2256
2257static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002258textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259{
2260 unsigned char buffer[COOKIE_BUF_LEN];
2261
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002262 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2263 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2264 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2265 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2266 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267
Christian Heimes743e0cd2012-10-17 23:52:17 +02002268 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2269 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271
2272static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002273_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274{
2275 PyObject *res;
2276 /* When seeking to the start of the stream, we call decoder.reset()
2277 rather than decoder.getstate().
2278 This is for a few decoders such as utf-16 for which the state value
2279 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2280 utf-16, that we are expecting a BOM).
2281 */
2282 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2283 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2284 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002285 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2286 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287 if (res == NULL)
2288 return -1;
2289 Py_DECREF(res);
2290 return 0;
2291}
2292
Antoine Pitroue4501852009-05-14 18:55:55 +00002293static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002294_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002295{
2296 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002297 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002298 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2299 self->encoding_start_of_stream = 1;
2300 }
2301 else {
2302 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002303 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002304 self->encoding_start_of_stream = 0;
2305 }
2306 if (res == NULL)
2307 return -1;
2308 Py_DECREF(res);
2309 return 0;
2310}
2311
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002312static int
2313_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2314{
2315 /* Same as _textiowrapper_decoder_setstate() above. */
2316 return _textiowrapper_encoder_reset(
2317 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2318}
2319
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002320/*[clinic input]
2321_io.TextIOWrapper.seek
2322 cookie as cookieObj: object
2323 whence: int = 0
2324 /
2325[clinic start generated code]*/
2326
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002327static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002328_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2329/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002331 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002332 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 PyObject *res;
2334 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002335 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002337 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338 CHECK_CLOSED(self);
2339
2340 Py_INCREF(cookieObj);
2341
2342 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002343 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344 goto fail;
2345 }
2346
2347 if (whence == 1) {
2348 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002349 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350 if (cmp < 0)
2351 goto fail;
2352
2353 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002354 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002355 goto fail;
2356 }
2357
2358 /* Seeking to the current position should attempt to
2359 * sync the underlying buffer with the current position.
2360 */
2361 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002362 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363 if (cookieObj == NULL)
2364 goto fail;
2365 }
2366 else if (whence == 2) {
2367 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002368 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 if (cmp < 0)
2370 goto fail;
2371
2372 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002373 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 goto fail;
2375 }
2376
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002377 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378 if (res == NULL)
2379 goto fail;
2380 Py_DECREF(res);
2381
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002382 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383 Py_CLEAR(self->snapshot);
2384 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002385 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386 if (res == NULL)
2387 goto fail;
2388 Py_DECREF(res);
2389 }
2390
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002391 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002392 Py_CLEAR(cookieObj);
2393 if (res == NULL)
2394 goto fail;
2395 if (self->encoder) {
2396 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002397 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002398 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2399 Py_DECREF(res);
2400 goto fail;
2401 }
2402 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403 return res;
2404 }
2405 else if (whence != 0) {
2406 PyErr_Format(PyExc_ValueError,
2407 "invalid whence (%d, should be 0, 1 or 2)", whence);
2408 goto fail;
2409 }
2410
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002411 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412 if (cmp < 0)
2413 goto fail;
2414
2415 if (cmp == 1) {
2416 PyErr_Format(PyExc_ValueError,
2417 "negative seek position %R", cookieObj);
2418 goto fail;
2419 }
2420
2421 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2422 if (res == NULL)
2423 goto fail;
2424 Py_DECREF(res);
2425
2426 /* The strategy of seek() is to go back to the safe start point
2427 * and replay the effect of read(chars_to_skip) from there.
2428 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002429 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430 goto fail;
2431
2432 /* Seek back to the safe start point. */
2433 posobj = PyLong_FromOff_t(cookie.start_pos);
2434 if (posobj == NULL)
2435 goto fail;
2436 res = PyObject_CallMethodObjArgs(self->buffer,
2437 _PyIO_str_seek, posobj, NULL);
2438 Py_DECREF(posobj);
2439 if (res == NULL)
2440 goto fail;
2441 Py_DECREF(res);
2442
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002443 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002444 Py_CLEAR(self->snapshot);
2445
2446 /* Restore the decoder to its state from the safe start point. */
2447 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002448 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 goto fail;
2450 }
2451
2452 if (cookie.chars_to_skip) {
2453 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002454 PyObject *input_chunk = _PyObject_CallMethodId(
2455 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 PyObject *decoded;
2457
2458 if (input_chunk == NULL)
2459 goto fail;
2460
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002461 if (!PyBytes_Check(input_chunk)) {
2462 PyErr_Format(PyExc_TypeError,
2463 "underlying read() should have returned a bytes "
2464 "object, not '%.200s'",
2465 Py_TYPE(input_chunk)->tp_name);
2466 Py_DECREF(input_chunk);
2467 goto fail;
2468 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002470 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2471 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472 goto fail;
2473 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002474 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002476 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2477 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002479 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480 goto fail;
2481
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002482 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483
2484 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002485 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002486 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487 goto fail;
2488 }
2489 self->decoded_chars_used = cookie.chars_to_skip;
2490 }
2491 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002492 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2493 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002495 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496 }
2497
Antoine Pitroue4501852009-05-14 18:55:55 +00002498 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2499 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002500 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002501 goto fail;
2502 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 return cookieObj;
2504 fail:
2505 Py_XDECREF(cookieObj);
2506 return NULL;
2507
2508}
2509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002510/*[clinic input]
2511_io.TextIOWrapper.tell
2512[clinic start generated code]*/
2513
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002515_io_TextIOWrapper_tell_impl(textio *self)
2516/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517{
2518 PyObject *res;
2519 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002520 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 PyObject *next_input;
2522 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002523 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524 PyObject *saved_state = NULL;
2525 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002526 Py_ssize_t dec_buffer_len;
2527 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002529 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 CHECK_CLOSED(self);
2531
2532 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002533 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534 goto fail;
2535 }
2536 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002537 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538 "telling position disabled by next() call");
2539 goto fail;
2540 }
2541
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002542 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002544 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545 if (res == NULL)
2546 goto fail;
2547 Py_DECREF(res);
2548
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002549 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550 if (posobj == NULL)
2551 goto fail;
2552
2553 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002554 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555 return posobj;
2556 }
2557
2558#if defined(HAVE_LARGEFILE_SUPPORT)
2559 cookie.start_pos = PyLong_AsLongLong(posobj);
2560#else
2561 cookie.start_pos = PyLong_AsLong(posobj);
2562#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002563 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564 if (PyErr_Occurred())
2565 goto fail;
2566
2567 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002568 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002569 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570 goto fail;
2571
2572 assert (PyBytes_Check(next_input));
2573
2574 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2575
2576 /* How many decoded characters have been used up since the snapshot? */
2577 if (self->decoded_chars_used == 0) {
2578 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580 }
2581
2582 chars_to_skip = self->decoded_chars_used;
2583
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002584 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2586 _PyIO_str_getstate, NULL);
2587 if (saved_state == NULL)
2588 goto fail;
2589
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002590#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002591 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002592 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2593 _PyIO_str_getstate, NULL); \
2594 if (_state == NULL) \
2595 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002596 if (!PyTuple_Check(_state)) { \
2597 PyErr_SetString(PyExc_TypeError, \
2598 "illegal decoder state"); \
2599 Py_DECREF(_state); \
2600 goto fail; \
2601 } \
2602 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2603 &dec_buffer, &dec_flags)) \
2604 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002605 Py_DECREF(_state); \
2606 goto fail; \
2607 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002608 if (!PyBytes_Check(dec_buffer)) { \
2609 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002610 "illegal decoder state: the first item should be a " \
2611 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002612 Py_TYPE(dec_buffer)->tp_name); \
2613 Py_DECREF(_state); \
2614 goto fail; \
2615 } \
2616 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002617 Py_DECREF(_state); \
2618 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002620#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002621 PyObject *_decoded = _PyObject_CallMethodId( \
2622 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002623 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002624 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002625 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002626 Py_DECREF(_decoded); \
2627 } while (0)
2628
2629 /* Fast search for an acceptable start point, close to our
2630 current pos */
2631 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2632 skip_back = 1;
2633 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2634 input = PyBytes_AS_STRING(next_input);
2635 while (skip_bytes > 0) {
2636 /* Decode up to temptative start point */
2637 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2638 goto fail;
2639 DECODER_DECODE(input, skip_bytes, chars_decoded);
2640 if (chars_decoded <= chars_to_skip) {
2641 DECODER_GETSTATE();
2642 if (dec_buffer_len == 0) {
2643 /* Before pos and no bytes buffered in decoder => OK */
2644 cookie.dec_flags = dec_flags;
2645 chars_to_skip -= chars_decoded;
2646 break;
2647 }
2648 /* Skip back by buffered amount and reset heuristic */
2649 skip_bytes -= dec_buffer_len;
2650 skip_back = 1;
2651 }
2652 else {
2653 /* We're too far ahead, skip back a bit */
2654 skip_bytes -= skip_back;
2655 skip_back *= 2;
2656 }
2657 }
2658 if (skip_bytes <= 0) {
2659 skip_bytes = 0;
2660 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2661 goto fail;
2662 }
2663
2664 /* Note our initial start point. */
2665 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002666 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002667 if (chars_to_skip == 0)
2668 goto finally;
2669
2670 /* We should be close to the desired position. Now feed the decoder one
2671 * byte at a time until we reach the `chars_to_skip` target.
2672 * As we go, note the nearest "safe start point" before the current
2673 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 * can safely start from there and advance to this location).
2675 */
2676 chars_decoded = 0;
2677 input = PyBytes_AS_STRING(next_input);
2678 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002679 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002681 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002683 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002684 /* We got n chars for 1 byte */
2685 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002687 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688
2689 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2690 /* Decoder buffer is empty, so this is a safe start point. */
2691 cookie.start_pos += cookie.bytes_to_feed;
2692 chars_to_skip -= chars_decoded;
2693 cookie.dec_flags = dec_flags;
2694 cookie.bytes_to_feed = 0;
2695 chars_decoded = 0;
2696 }
2697 if (chars_decoded >= chars_to_skip)
2698 break;
2699 input++;
2700 }
2701 if (input == input_end) {
2702 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002703 PyObject *decoded = _PyObject_CallMethodId(
2704 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002705 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002707 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002708 Py_DECREF(decoded);
2709 cookie.need_eof = 1;
2710
2711 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002712 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002713 "can't reconstruct logical file position");
2714 goto fail;
2715 }
2716 }
2717
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002718finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002719 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720 Py_DECREF(saved_state);
2721 if (res == NULL)
2722 return NULL;
2723 Py_DECREF(res);
2724
2725 /* The returned cookie corresponds to the last safe start point. */
2726 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002727 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002729fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002730 if (saved_state) {
2731 PyObject *type, *value, *traceback;
2732 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002733 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002734 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002735 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002736 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737 }
2738 return NULL;
2739}
2740
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002741/*[clinic input]
2742_io.TextIOWrapper.truncate
2743 pos: object = None
2744 /
2745[clinic start generated code]*/
2746
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002747static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002748_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2749/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002751 PyObject *res;
2752
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002753 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002754
2755 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2756 if (res == NULL)
2757 return NULL;
2758 Py_DECREF(res);
2759
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002760 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761}
2762
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002763static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002764textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002765{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002766 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002767 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002768
2769 CHECK_INITIALIZED(self);
2770
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002771 res = PyUnicode_FromString("<_io.TextIOWrapper");
2772 if (res == NULL)
2773 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002774
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002775 status = Py_ReprEnter((PyObject *)self);
2776 if (status != 0) {
2777 if (status > 0) {
2778 PyErr_Format(PyExc_RuntimeError,
2779 "reentrant call inside %s.__repr__",
2780 Py_TYPE(self)->tp_name);
2781 }
2782 goto error;
2783 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002784 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002785 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002786 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002787 PyErr_Clear();
2788 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002789 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002790 }
2791 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002792 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002793 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002794 if (s == NULL)
2795 goto error;
2796 PyUnicode_AppendAndDel(&res, s);
2797 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002798 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002799 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002800 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002801 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002802 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002803 PyErr_Clear();
2804 else
2805 goto error;
2806 }
2807 else {
2808 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2809 Py_DECREF(modeobj);
2810 if (s == NULL)
2811 goto error;
2812 PyUnicode_AppendAndDel(&res, s);
2813 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002814 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002815 }
2816 s = PyUnicode_FromFormat("%U encoding=%R>",
2817 res, self->encoding);
2818 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002819 if (status == 0) {
2820 Py_ReprLeave((PyObject *)self);
2821 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002822 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002823
2824 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002825 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002826 if (status == 0) {
2827 Py_ReprLeave((PyObject *)self);
2828 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002829 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002830}
2831
2832
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002833/* Inquiries */
2834
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002835/*[clinic input]
2836_io.TextIOWrapper.fileno
2837[clinic start generated code]*/
2838
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002839static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002840_io_TextIOWrapper_fileno_impl(textio *self)
2841/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002842{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002843 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002844 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002845}
2846
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002847/*[clinic input]
2848_io.TextIOWrapper.seekable
2849[clinic start generated code]*/
2850
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002851static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002852_io_TextIOWrapper_seekable_impl(textio *self)
2853/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002854{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002855 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002856 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002857}
2858
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002859/*[clinic input]
2860_io.TextIOWrapper.readable
2861[clinic start generated code]*/
2862
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002863static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002864_io_TextIOWrapper_readable_impl(textio *self)
2865/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002866{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002867 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002868 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002869}
2870
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002871/*[clinic input]
2872_io.TextIOWrapper.writable
2873[clinic start generated code]*/
2874
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002875static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002876_io_TextIOWrapper_writable_impl(textio *self)
2877/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002878{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002879 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002880 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002881}
2882
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002883/*[clinic input]
2884_io.TextIOWrapper.isatty
2885[clinic start generated code]*/
2886
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002887static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002888_io_TextIOWrapper_isatty_impl(textio *self)
2889/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002890{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002891 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002892 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002893}
2894
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002895/*[clinic input]
2896_io.TextIOWrapper.flush
2897[clinic start generated code]*/
2898
Antoine Pitrou243757e2010-11-05 21:15:39 +00002899static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002900_io_TextIOWrapper_flush_impl(textio *self)
2901/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002902{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002903 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002904 CHECK_CLOSED(self);
2905 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002906 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002907 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002908 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002909}
2910
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002911/*[clinic input]
2912_io.TextIOWrapper.close
2913[clinic start generated code]*/
2914
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002915static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002916_io_TextIOWrapper_close_impl(textio *self)
2917/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002918{
2919 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002920 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002921 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002922
Antoine Pitrou6be88762010-05-03 16:48:20 +00002923 res = textiowrapper_closed_get(self, NULL);
2924 if (res == NULL)
2925 return NULL;
2926 r = PyObject_IsTrue(res);
2927 Py_DECREF(res);
2928 if (r < 0)
2929 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002930
Antoine Pitrou6be88762010-05-03 16:48:20 +00002931 if (r > 0) {
2932 Py_RETURN_NONE; /* stream already closed */
2933 }
2934 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002935 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002936 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01002937 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2938 &PyId__dealloc_warn,
2939 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00002940 if (res)
2941 Py_DECREF(res);
2942 else
2943 PyErr_Clear();
2944 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002945 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002946 if (res == NULL)
2947 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002948 else
2949 Py_DECREF(res);
2950
Benjamin Peterson68623612012-12-20 11:53:11 -06002951 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2952 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002953 _PyErr_ChainExceptions(exc, val, tb);
2954 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002955 }
2956 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002957 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002958}
2959
2960static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002961textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002962{
2963 PyObject *line;
2964
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002965 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002966
2967 self->telling = 0;
2968 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2969 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002970 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002971 }
2972 else {
2973 line = PyObject_CallMethodObjArgs((PyObject *)self,
2974 _PyIO_str_readline, NULL);
2975 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002976 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002977 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002978 "not '%.200s'", Py_TYPE(line)->tp_name);
2979 Py_DECREF(line);
2980 return NULL;
2981 }
2982 }
2983
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002984 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002985 return NULL;
2986
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002987 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002988 /* Reached EOF or would have blocked */
2989 Py_DECREF(line);
2990 Py_CLEAR(self->snapshot);
2991 self->telling = self->seekable;
2992 return NULL;
2993 }
2994
2995 return line;
2996}
2997
2998static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002999textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003000{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003001 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003002 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003003}
3004
3005static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003006textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003007{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003008 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3010}
3011
3012static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003013textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003014{
3015 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003016 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003017 if (self->decoder == NULL ||
3018 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3019 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003020 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003021 }
3022 return res;
3023}
3024
3025static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003026textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003027{
3028 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003029 Py_INCREF(self->errors);
3030 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003031}
3032
3033static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003034textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003035{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003036 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037 return PyLong_FromSsize_t(self->chunk_size);
3038}
3039
3040static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003041textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003042{
3043 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003044 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003045 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003046 if (n == -1 && PyErr_Occurred())
3047 return -1;
3048 if (n <= 0) {
3049 PyErr_SetString(PyExc_ValueError,
3050 "a strictly positive integer is required");
3051 return -1;
3052 }
3053 self->chunk_size = n;
3054 return 0;
3055}
3056
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003057#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003058
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003059static PyMethodDef incrementalnewlinedecoder_methods[] = {
3060 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3061 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3062 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3063 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3064 {NULL}
3065};
3066
3067static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3068 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3069 {NULL}
3070};
3071
3072PyTypeObject PyIncrementalNewlineDecoder_Type = {
3073 PyVarObject_HEAD_INIT(NULL, 0)
3074 "_io.IncrementalNewlineDecoder", /*tp_name*/
3075 sizeof(nldecoder_object), /*tp_basicsize*/
3076 0, /*tp_itemsize*/
3077 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3078 0, /*tp_print*/
3079 0, /*tp_getattr*/
3080 0, /*tp_setattr*/
3081 0, /*tp_compare */
3082 0, /*tp_repr*/
3083 0, /*tp_as_number*/
3084 0, /*tp_as_sequence*/
3085 0, /*tp_as_mapping*/
3086 0, /*tp_hash */
3087 0, /*tp_call*/
3088 0, /*tp_str*/
3089 0, /*tp_getattro*/
3090 0, /*tp_setattro*/
3091 0, /*tp_as_buffer*/
3092 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3093 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3094 0, /* tp_traverse */
3095 0, /* tp_clear */
3096 0, /* tp_richcompare */
3097 0, /*tp_weaklistoffset*/
3098 0, /* tp_iter */
3099 0, /* tp_iternext */
3100 incrementalnewlinedecoder_methods, /* tp_methods */
3101 0, /* tp_members */
3102 incrementalnewlinedecoder_getset, /* tp_getset */
3103 0, /* tp_base */
3104 0, /* tp_dict */
3105 0, /* tp_descr_get */
3106 0, /* tp_descr_set */
3107 0, /* tp_dictoffset */
3108 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3109 0, /* tp_alloc */
3110 PyType_GenericNew, /* tp_new */
3111};
3112
3113
3114static PyMethodDef textiowrapper_methods[] = {
3115 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003116 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003117 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3118 _IO_TEXTIOWRAPPER_READ_METHODDEF
3119 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3120 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3121 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3122
3123 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3124 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3125 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3126 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3127 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003128
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003129 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3130 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3131 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003132 {NULL, NULL}
3133};
3134
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003135static PyMemberDef textiowrapper_members[] = {
3136 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3137 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3138 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003139 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003140 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003141 {NULL}
3142};
3143
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003144static PyGetSetDef textiowrapper_getset[] = {
3145 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3146 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003147/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3148*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003149 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3150 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3151 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3152 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003154};
3155
3156PyTypeObject PyTextIOWrapper_Type = {
3157 PyVarObject_HEAD_INIT(NULL, 0)
3158 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003159 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003160 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003161 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003162 0, /*tp_print*/
3163 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003164 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003165 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003166 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003167 0, /*tp_as_number*/
3168 0, /*tp_as_sequence*/
3169 0, /*tp_as_mapping*/
3170 0, /*tp_hash */
3171 0, /*tp_call*/
3172 0, /*tp_str*/
3173 0, /*tp_getattro*/
3174 0, /*tp_setattro*/
3175 0, /*tp_as_buffer*/
3176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003177 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003178 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003179 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3180 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003181 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003182 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003183 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003184 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3185 textiowrapper_methods, /* tp_methods */
3186 textiowrapper_members, /* tp_members */
3187 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003188 0, /* tp_base */
3189 0, /* tp_dict */
3190 0, /* tp_descr_get */
3191 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003192 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003193 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003194 0, /* tp_alloc */
3195 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003196 0, /* tp_free */
3197 0, /* tp_is_gc */
3198 0, /* tp_bases */
3199 0, /* tp_mro */
3200 0, /* tp_cache */
3201 0, /* tp_subclasses */
3202 0, /* tp_weaklist */
3203 0, /* tp_del */
3204 0, /* tp_version_tag */
3205 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003206};