blob: a466d3a03a5ba313f303e1d89649027721b1938e [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020021_Py_IDENTIFIER(close);
22_Py_IDENTIFIER(_dealloc_warn);
23_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(fileno);
25_Py_IDENTIFIER(flush);
26_Py_IDENTIFIER(getpreferredencoding);
27_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020028_Py_IDENTIFIER(mode);
29_Py_IDENTIFIER(name);
30_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(read);
32_Py_IDENTIFIER(readable);
33_Py_IDENTIFIER(replace);
34_Py_IDENTIFIER(reset);
35_Py_IDENTIFIER(seek);
36_Py_IDENTIFIER(seekable);
37_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090038_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020039_Py_IDENTIFIER(tell);
40_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042/* TextIOBase */
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Base class for text I/O.\n"
46 "\n"
47 "This class provides a character and line based interface to stream\n"
48 "I/O. There is no readinto method because Python's character strings\n"
49 "are immutable. There is no public constructor.\n"
50 );
51
52static PyObject *
53_unsupported(const char *message)
54{
Antoine Pitrou712cb732013-12-21 15:51:54 +010055 _PyIO_State *state = IO_STATE();
56 if (state != NULL)
57 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 return NULL;
59}
60
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062 "Separate the underlying buffer from the TextIOBase and return it.\n"
63 "\n"
64 "After the underlying buffer has been detached, the TextIO is in an\n"
65 "unusable state.\n"
66 );
67
68static PyObject *
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +053069textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
Benjamin Petersond2e0c792009-05-01 20:40:59 +000070{
71 return _unsupported("detach");
72}
73
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075 "Read at most n characters from stream.\n"
76 "\n"
77 "Read from underlying buffer until we have n characters or we hit EOF.\n"
78 "If n is negative or omitted, read until EOF.\n"
79 );
80
81static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000082textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000083{
84 return _unsupported("read");
85}
86
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088 "Read until newline or EOF.\n"
89 "\n"
90 "Returns an empty string if EOF is hit immediately.\n"
91 );
92
93static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095{
96 return _unsupported("readline");
97}
98
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 "Write string to stream.\n"
101 "Returns the number of characters written (which is always equal to\n"
102 "the length of the string).\n"
103 );
104
105static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107{
108 return _unsupported("write");
109}
110
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112 "Encoding of the text stream.\n"
113 "\n"
114 "Subclasses should override.\n"
115 );
116
117static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000118textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119{
120 Py_RETURN_NONE;
121}
122
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124 "Line endings translated so far.\n"
125 "\n"
126 "Only line endings translated during reading are considered.\n"
127 "\n"
128 "Subclasses should override.\n"
129 );
130
131static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000132textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133{
134 Py_RETURN_NONE;
135}
136
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138 "The error setting of the decoder or encoder.\n"
139 "\n"
140 "Subclasses should override.\n"
141 );
142
143static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000144textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000145{
146 Py_RETURN_NONE;
147}
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyMethodDef textiobase_methods[] = {
Siddhesh Poyarekar55edd0c2018-04-30 00:29:33 +0530151 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000152 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
153 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
154 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155 {NULL, NULL}
156};
157
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000158static PyGetSetDef textiobase_getset[] = {
159 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
160 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
161 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000162 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163};
164
165PyTypeObject PyTextIOBase_Type = {
166 PyVarObject_HEAD_INIT(NULL, 0)
167 "_io._TextIOBase", /*tp_name*/
168 0, /*tp_basicsize*/
169 0, /*tp_itemsize*/
170 0, /*tp_dealloc*/
171 0, /*tp_print*/
172 0, /*tp_getattr*/
173 0, /*tp_setattr*/
174 0, /*tp_compare */
175 0, /*tp_repr*/
176 0, /*tp_as_number*/
177 0, /*tp_as_sequence*/
178 0, /*tp_as_mapping*/
179 0, /*tp_hash */
180 0, /*tp_call*/
181 0, /*tp_str*/
182 0, /*tp_getattro*/
183 0, /*tp_setattro*/
184 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200185 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
186 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000215};
216
217
218/* IncrementalNewlineDecoder */
219
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000226 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000227} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300229/*[clinic input]
230_io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235Codec used when reading a file in universal newlines mode.
236
237It wraps another incremental decoder, translating \r\n and \r into \n.
238It also records the types of newlines encountered. When used with
239translate=False, it ensures that the newline sequence is returned in
240one piece. When used with decoder=None, it expects unicode strings as
241decode input and translates newlines without first invoking an external
242decoder.
243[clinic start generated code]*/
244
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000245static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300246_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900255 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 self->errors = errors;
261 }
INADA Naoki507434f2017-12-21 09:59:53 +0900262 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263
264 self->translate = translate;
265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269}
270
271static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273{
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277}
278
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200279static int
280check_decoded(PyObject *decoded)
281{
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200295 return 0;
296}
297
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298#define SEEN_CR 1
299#define SEEN_LF 2
300#define SEEN_CRLF 4
301#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200304_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 PyObject *input, int final)
306{
307 PyObject *output;
308 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200309 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200327 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 return NULL;
329
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 if (modified == NULL)
340 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000355 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 }
389
Antoine Pitrou66913e22009-03-06 23:40:56 +0000390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200395 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000412 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 if (seennl == SEEN_ALL)
422 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200424 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 }
436 else
437 seennl |= SEEN_CR;
438 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000447 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200458 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200481 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482 continue;
483 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200486 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100490 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200492 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502}
503
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300504/*[clinic input]
505_io.IncrementalNewlineDecoder.decode
506 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200507 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300508[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300510static PyObject *
511_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200513/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300514{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516}
517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300518/*[clinic input]
519_io.IncrementalNewlineDecoder.getstate
520[clinic start generated code]*/
521
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300523_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700527 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557}
558
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300559/*[clinic input]
560_io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563[clinic start generated code]*/
564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300566_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569{
570 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700571 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
Oren Milman1d1d3e92017-08-20 18:35:36 +0300573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582
Victor Stinner7d7e7752014-06-17 23:31:25 +0200583 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 flag >>= 1;
585
586 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 else
590 Py_RETURN_NONE;
591}
592
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300593/*[clinic input]
594_io.IncrementalNewlineDecoder.reset
595[clinic start generated code]*/
596
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300598_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600{
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607}
608
609static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000610incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611{
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631}
632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633/* TextIOWrapper */
634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638typedef struct
639{
640 PyObject_HEAD
641 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000642 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200652 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200657 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200659 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676 PyObject *pending_bytes; /* list of bytes objects waiting to be
677 written, or NULL */
678 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000679
Oren Milman13614e32017-08-24 19:51:24 +0300680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000695} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
Zackery Spytz23db9352018-06-29 04:14:58 -0600697static void
698textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
699
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700/* A couple of specialized cases in order to bypass the slow incremental
701 encoding methods for the most popular encodings. */
702
703static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000704ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705{
INADA Naoki507434f2017-12-21 09:59:53 +0900706 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707}
708
709static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000710utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000711{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100712 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900713 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100719 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900720 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721}
722
723static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000724utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725{
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200728#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000732#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000733 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900735 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900742 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100748 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900749 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000750}
751
752static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000753utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000754{
755 if (!self->encoding_start_of_stream) {
756 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200757#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000758 return utf32be_encode(self, text);
759#else
760 return utf32le_encode(self, text);
761#endif
762 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900764 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769{
INADA Naoki507434f2017-12-21 09:59:53 +0900770 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771}
772
773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000774latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775{
INADA Naoki507434f2017-12-21 09:59:53 +0900776 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000777}
778
779/* Map normalized encoding names onto the specialized encoding funcs */
780
781typedef struct {
782 const char *name;
783 encodefunc_t encodefunc;
784} encodefuncentry;
785
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200786static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787 {"ascii", (encodefunc_t) ascii_encode},
788 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000789 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790 {"utf-16-be", (encodefunc_t) utf16be_encode},
791 {"utf-16-le", (encodefunc_t) utf16le_encode},
792 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000793 {"utf-32-be", (encodefunc_t) utf32be_encode},
794 {"utf-32-le", (encodefunc_t) utf32le_encode},
795 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000796 {NULL, NULL}
797};
798
INADA Naoki507434f2017-12-21 09:59:53 +0900799static int
800validate_newline(const char *newline)
801{
802 if (newline && newline[0] != '\0'
803 && !(newline[0] == '\n' && newline[1] == '\0')
804 && !(newline[0] == '\r' && newline[1] == '\0')
805 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
806 PyErr_Format(PyExc_ValueError,
807 "illegal newline value: %s", newline);
808 return -1;
809 }
810 return 0;
811}
812
813static int
814set_newline(textio *self, const char *newline)
815{
816 PyObject *old = self->readnl;
817 if (newline == NULL) {
818 self->readnl = NULL;
819 }
820 else {
821 self->readnl = PyUnicode_FromString(newline);
822 if (self->readnl == NULL) {
823 self->readnl = old;
824 return -1;
825 }
826 }
827 self->readuniversal = (newline == NULL || newline[0] == '\0');
828 self->readtranslate = (newline == NULL);
829 self->writetranslate = (newline == NULL || newline[0] != '\0');
830 if (!self->readuniversal && self->readnl != NULL) {
831 // validate_newline() accepts only ASCII newlines.
832 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
833 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
834 if (strcmp(self->writenl, "\n") == 0) {
835 self->writenl = NULL;
836 }
837 }
838 else {
839#ifdef MS_WINDOWS
840 self->writenl = "\r\n";
841#else
842 self->writenl = NULL;
843#endif
844 }
845 Py_XDECREF(old);
846 return 0;
847}
848
849static int
850_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
851 const char *errors)
852{
853 PyObject *res;
854 int r;
855
856 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
857 if (res == NULL)
858 return -1;
859
860 r = PyObject_IsTrue(res);
861 Py_DECREF(res);
862 if (r == -1)
863 return -1;
864
865 if (r != 1)
866 return 0;
867
868 Py_CLEAR(self->decoder);
869 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
870 if (self->decoder == NULL)
871 return -1;
872
873 if (self->readuniversal) {
874 PyObject *incrementalDecoder = PyObject_CallFunction(
875 (PyObject *)&PyIncrementalNewlineDecoder_Type,
876 "Oi", self->decoder, (int)self->readtranslate);
877 if (incrementalDecoder == NULL)
878 return -1;
879 Py_CLEAR(self->decoder);
880 self->decoder = incrementalDecoder;
881 }
882
883 return 0;
884}
885
886static PyObject*
887_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
888{
889 PyObject *chars;
890
891 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
892 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
893 else
894 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
895 eof ? Py_True : Py_False, NULL);
896
897 if (check_decoded(chars) < 0)
898 // check_decoded already decreases refcount
899 return NULL;
900
901 return chars;
902}
903
904static int
905_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
906 const char *errors)
907{
908 PyObject *res;
909 int r;
910
911 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
912 if (res == NULL)
913 return -1;
914
915 r = PyObject_IsTrue(res);
916 Py_DECREF(res);
917 if (r == -1)
918 return -1;
919
920 if (r != 1)
921 return 0;
922
923 Py_CLEAR(self->encoder);
924 self->encodefunc = NULL;
925 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
926 if (self->encoder == NULL)
927 return -1;
928
929 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200930 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
931 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900932 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200933 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900934 const encodefuncentry *e = encodefuncs;
935 while (e->name != NULL) {
936 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
937 self->encodefunc = e->encodefunc;
938 break;
939 }
940 e++;
941 }
942 }
943 Py_XDECREF(res);
944
945 return 0;
946}
947
948static int
949_textiowrapper_fix_encoder_state(textio *self)
950{
951 if (!self->seekable || !self->encoder) {
952 return 0;
953 }
954
955 self->encoding_start_of_stream = 1;
956
957 PyObject *cookieObj = PyObject_CallMethodObjArgs(
958 self->buffer, _PyIO_str_tell, NULL);
959 if (cookieObj == NULL) {
960 return -1;
961 }
962
963 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
964 Py_DECREF(cookieObj);
965 if (cmp < 0) {
966 return -1;
967 }
968
969 if (cmp == 0) {
970 self->encoding_start_of_stream = 0;
971 PyObject *res = PyObject_CallMethodObjArgs(
972 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
973 if (res == NULL) {
974 return -1;
975 }
976 Py_DECREF(res);
977 }
978
979 return 0;
980}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000981
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300982/*[clinic input]
983_io.TextIOWrapper.__init__
984 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700985 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900986 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700987 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200988 line_buffering: bool(accept={int}) = False
989 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300991Character and line based layer over a BufferedIOBase object, buffer.
992
993encoding gives the name of the encoding that the stream will be
994decoded or encoded with. It defaults to locale.getpreferredencoding(False).
995
996errors determines the strictness of encoding and decoding (see
997help(codecs.Codec) or the documentation for codecs.register) and
998defaults to "strict".
999
1000newline controls how line endings are handled. It can be None, '',
1001'\n', '\r', and '\r\n'. It works as follows:
1002
1003* On input, if newline is None, universal newlines mode is
1004 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1005 these are translated into '\n' before being returned to the
1006 caller. If it is '', universal newline mode is enabled, but line
1007 endings are returned to the caller untranslated. If it has any of
1008 the other legal values, input lines are only terminated by the given
1009 string, and the line ending is returned to the caller untranslated.
1010
1011* On output, if newline is None, any '\n' characters written are
1012 translated to the system default line separator, os.linesep. If
1013 newline is '' or '\n', no translation takes place. If newline is any
1014 of the other legal values, any '\n' characters written are translated
1015 to the given string.
1016
1017If line_buffering is True, a call to flush is implied when a call to
1018write contains a newline character.
1019[clinic start generated code]*/
1020
1021static int
1022_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001023 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001024 const char *newline, int line_buffering,
1025 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001026/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001027{
1028 PyObject *raw, *codec_info = NULL;
1029 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 PyObject *res;
1031 int r;
1032
1033 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001034 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035
INADA Naoki507434f2017-12-21 09:59:53 +09001036 if (errors == Py_None) {
1037 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001038 if (errors == NULL) {
1039 return -1;
1040 }
INADA Naoki507434f2017-12-21 09:59:53 +09001041 }
1042 else if (!PyUnicode_Check(errors)) {
1043 // Check 'errors' argument here because Argument Clinic doesn't support
1044 // 'str(accept={str, NoneType})' converter.
1045 PyErr_Format(
1046 PyExc_TypeError,
1047 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1048 errors->ob_type->tp_name);
1049 return -1;
1050 }
1051
1052 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 return -1;
1054 }
1055
1056 Py_CLEAR(self->buffer);
1057 Py_CLEAR(self->encoding);
1058 Py_CLEAR(self->encoder);
1059 Py_CLEAR(self->decoder);
1060 Py_CLEAR(self->readnl);
1061 Py_CLEAR(self->decoded_chars);
1062 Py_CLEAR(self->pending_bytes);
1063 Py_CLEAR(self->snapshot);
1064 Py_CLEAR(self->errors);
1065 Py_CLEAR(self->raw);
1066 self->decoded_chars_used = 0;
1067 self->pending_bytes_count = 0;
1068 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001069 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070
1071 if (encoding == NULL) {
1072 /* Try os.device_encoding(fileno) */
1073 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001074 state = IO_STATE();
1075 if (state == NULL)
1076 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001077 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 /* Ignore only AttributeError and UnsupportedOperation */
1079 if (fileno == NULL) {
1080 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1081 PyErr_ExceptionMatches(state->unsupported_operation)) {
1082 PyErr_Clear();
1083 }
1084 else {
1085 goto error;
1086 }
1087 }
1088 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001089 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001090 Py_DECREF(fileno);
1091 if (fd == -1 && PyErr_Occurred()) {
1092 goto error;
1093 }
1094
1095 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096 if (self->encoding == NULL)
1097 goto error;
1098 else if (!PyUnicode_Check(self->encoding))
1099 Py_CLEAR(self->encoding);
1100 }
1101 }
1102 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001103 PyObject *locale_module = _PyIO_get_locale_module(state);
1104 if (locale_module == NULL)
1105 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001106 self->encoding = _PyObject_CallMethodIdObjArgs(
1107 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001108 Py_DECREF(locale_module);
1109 if (self->encoding == NULL) {
1110 catch_ImportError:
1111 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001112 Importing locale can raise an ImportError because of
1113 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001114 ImportError if _locale is not available. These will happen
1115 during module building.
1116 */
1117 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1118 PyErr_Clear();
1119 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001120 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001121 else
1122 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001124 else if (!PyUnicode_Check(self->encoding))
1125 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001126 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001127 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001128 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001129 if (encoding == NULL)
1130 goto error;
1131 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132 else if (encoding != NULL) {
1133 self->encoding = PyUnicode_FromString(encoding);
1134 if (self->encoding == NULL)
1135 goto error;
1136 }
1137 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001138 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001140 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 }
1142
Nick Coghlana9b15242014-02-04 22:11:18 +10001143 /* Check we have been asked for a real text encoding */
1144 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1145 if (codec_info == NULL) {
1146 Py_CLEAR(self->encoding);
1147 goto error;
1148 }
1149
1150 /* XXX: Failures beyond this point have the potential to leak elements
1151 * of the partially constructed object (like self->encoding)
1152 */
1153
INADA Naoki507434f2017-12-21 09:59:53 +09001154 Py_INCREF(errors);
1155 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001158 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001159 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161 }
1162
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163 self->buffer = buffer;
1164 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001165
INADA Naoki507434f2017-12-21 09:59:53 +09001166 /* Build the decoder object */
1167 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1168 goto error;
1169
1170 /* Build the encoder object */
1171 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1172 goto error;
1173
1174 /* Finished sorting out the codec details */
1175 Py_CLEAR(codec_info);
1176
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1178 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001179 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1180 {
1181 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1182 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001184 if (raw != NULL) {
1185 if (Py_TYPE(raw) == &PyFileIO_Type)
1186 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001187 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001188 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001189 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 }
1191
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001192 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 if (res == NULL)
1194 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001195 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001197 if (r < 0)
1198 goto error;
1199 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001201 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1202 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001203 goto error;
1204 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001205 Py_XDECREF(res);
1206 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001207
Antoine Pitroue4501852009-05-14 18:55:55 +00001208 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001209 if (_textiowrapper_fix_encoder_state(self) < 0) {
1210 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001211 }
1212
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001213 self->ok = 1;
1214 return 0;
1215
1216 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001217 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001218 return -1;
1219}
1220
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001221/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1222 * -1 on error.
1223 */
1224static int
1225convert_optional_bool(PyObject *obj, int default_value)
1226{
1227 long v;
1228 if (obj == Py_None) {
1229 v = default_value;
1230 }
1231 else {
1232 v = PyLong_AsLong(obj);
1233 if (v == -1 && PyErr_Occurred())
1234 return -1;
1235 }
1236 return v != 0;
1237}
1238
INADA Naoki507434f2017-12-21 09:59:53 +09001239static int
1240textiowrapper_change_encoding(textio *self, PyObject *encoding,
1241 PyObject *errors, int newline_changed)
1242{
1243 /* Use existing settings where new settings are not specified */
1244 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1245 return 0; // no change
1246 }
1247
1248 if (encoding == Py_None) {
1249 encoding = self->encoding;
1250 if (errors == Py_None) {
1251 errors = self->errors;
1252 }
1253 }
1254 else if (errors == Py_None) {
1255 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001256 if (errors == NULL) {
1257 return -1;
1258 }
INADA Naoki507434f2017-12-21 09:59:53 +09001259 }
1260
1261 const char *c_errors = PyUnicode_AsUTF8(errors);
1262 if (c_errors == NULL) {
1263 return -1;
1264 }
1265
1266 // Create new encoder & decoder
1267 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1268 PyUnicode_AsUTF8(encoding), "codecs.open()");
1269 if (codec_info == NULL) {
1270 return -1;
1271 }
1272 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1273 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1274 Py_DECREF(codec_info);
1275 return -1;
1276 }
1277 Py_DECREF(codec_info);
1278
1279 Py_INCREF(encoding);
1280 Py_INCREF(errors);
1281 Py_SETREF(self->encoding, encoding);
1282 Py_SETREF(self->errors, errors);
1283
1284 return _textiowrapper_fix_encoder_state(self);
1285}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001286
1287/*[clinic input]
1288_io.TextIOWrapper.reconfigure
1289 *
INADA Naoki507434f2017-12-21 09:59:53 +09001290 encoding: object = None
1291 errors: object = None
1292 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001293 line_buffering as line_buffering_obj: object = None
1294 write_through as write_through_obj: object = None
1295
1296Reconfigure the text stream with new parameters.
1297
1298This also does an implicit stream flush.
1299
1300[clinic start generated code]*/
1301
1302static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001303_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1304 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001305 PyObject *line_buffering_obj,
1306 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001307/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001308{
1309 int line_buffering;
1310 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001311 const char *newline = NULL;
1312
1313 /* Check if something is in the read buffer */
1314 if (self->decoded_chars != NULL) {
1315 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1316 _unsupported("It is not possible to set the encoding or newline"
1317 "of stream after the first read");
1318 return NULL;
1319 }
1320 }
1321
1322 if (newline_obj != NULL && newline_obj != Py_None) {
1323 newline = PyUnicode_AsUTF8(newline_obj);
1324 if (newline == NULL || validate_newline(newline) < 0) {
1325 return NULL;
1326 }
1327 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001328
1329 line_buffering = convert_optional_bool(line_buffering_obj,
1330 self->line_buffering);
1331 write_through = convert_optional_bool(write_through_obj,
1332 self->write_through);
1333 if (line_buffering < 0 || write_through < 0) {
1334 return NULL;
1335 }
INADA Naoki507434f2017-12-21 09:59:53 +09001336
1337 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001338 if (res == NULL) {
1339 return NULL;
1340 }
INADA Naoki507434f2017-12-21 09:59:53 +09001341 Py_DECREF(res);
1342 self->b2cratio = 0;
1343
1344 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1345 return NULL;
1346 }
1347
1348 if (textiowrapper_change_encoding(
1349 self, encoding, errors, newline_obj != NULL) < 0) {
1350 return NULL;
1351 }
1352
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001353 self->line_buffering = line_buffering;
1354 self->write_through = write_through;
1355 Py_RETURN_NONE;
1356}
1357
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001359textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001360{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361 self->ok = 0;
1362 Py_CLEAR(self->buffer);
1363 Py_CLEAR(self->encoding);
1364 Py_CLEAR(self->encoder);
1365 Py_CLEAR(self->decoder);
1366 Py_CLEAR(self->readnl);
1367 Py_CLEAR(self->decoded_chars);
1368 Py_CLEAR(self->pending_bytes);
1369 Py_CLEAR(self->snapshot);
1370 Py_CLEAR(self->errors);
1371 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001372
1373 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374 return 0;
1375}
1376
1377static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001378textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001380 self->finalizing = 1;
1381 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001383 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 _PyObject_GC_UNTRACK(self);
1385 if (self->weakreflist != NULL)
1386 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001387 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001388 Py_TYPE(self)->tp_free((PyObject *)self);
1389}
1390
1391static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001392textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393{
1394 Py_VISIT(self->buffer);
1395 Py_VISIT(self->encoding);
1396 Py_VISIT(self->encoder);
1397 Py_VISIT(self->decoder);
1398 Py_VISIT(self->readnl);
1399 Py_VISIT(self->decoded_chars);
1400 Py_VISIT(self->pending_bytes);
1401 Py_VISIT(self->snapshot);
1402 Py_VISIT(self->errors);
1403 Py_VISIT(self->raw);
1404
1405 Py_VISIT(self->dict);
1406 return 0;
1407}
1408
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001409static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001410textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411
1412/* This macro takes some shortcuts to make the common case faster. */
1413#define CHECK_CLOSED(self) \
1414 do { \
1415 int r; \
1416 PyObject *_res; \
1417 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1418 if (self->raw != NULL) \
1419 r = _PyFileIO_closed(self->raw); \
1420 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001421 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001422 if (_res == NULL) \
1423 return NULL; \
1424 r = PyObject_IsTrue(_res); \
1425 Py_DECREF(_res); \
1426 if (r < 0) \
1427 return NULL; \
1428 } \
1429 if (r > 0) { \
1430 PyErr_SetString(PyExc_ValueError, \
1431 "I/O operation on closed file."); \
1432 return NULL; \
1433 } \
1434 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001435 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 return NULL; \
1437 } while (0)
1438
1439#define CHECK_INITIALIZED(self) \
1440 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001441 PyErr_SetString(PyExc_ValueError, \
1442 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443 return NULL; \
1444 }
1445
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001446#define CHECK_ATTACHED(self) \
1447 CHECK_INITIALIZED(self); \
1448 if (self->detached) { \
1449 PyErr_SetString(PyExc_ValueError, \
1450 "underlying buffer has been detached"); \
1451 return NULL; \
1452 }
1453
1454#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001456 PyErr_SetString(PyExc_ValueError, \
1457 "I/O operation on uninitialized object"); \
1458 return -1; \
1459 } else if (self->detached) { \
1460 PyErr_SetString(PyExc_ValueError, \
1461 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462 return -1; \
1463 }
1464
1465
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001466/*[clinic input]
1467_io.TextIOWrapper.detach
1468[clinic start generated code]*/
1469
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001470static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001471_io_TextIOWrapper_detach_impl(textio *self)
1472/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001473{
1474 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001475 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001476 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1477 if (res == NULL)
1478 return NULL;
1479 Py_DECREF(res);
1480 buffer = self->buffer;
1481 self->buffer = NULL;
1482 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001483 return buffer;
1484}
1485
Antoine Pitrou24f36292009-03-28 22:16:42 +00001486/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001487 underlying buffered object, though. */
1488static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001489_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001491 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492
1493 if (self->pending_bytes == NULL)
1494 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001495
1496 pending = self->pending_bytes;
1497 Py_INCREF(pending);
1498 self->pending_bytes_count = 0;
1499 Py_CLEAR(self->pending_bytes);
1500
1501 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1502 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 if (b == NULL)
1504 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001505 ret = NULL;
1506 do {
1507 ret = PyObject_CallMethodObjArgs(self->buffer,
1508 _PyIO_str_write, b, NULL);
1509 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 Py_DECREF(b);
1511 if (ret == NULL)
1512 return -1;
1513 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514 return 0;
1515}
1516
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001517/*[clinic input]
1518_io.TextIOWrapper.write
1519 text: unicode
1520 /
1521[clinic start generated code]*/
1522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001524_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1525/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526{
1527 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001528 PyObject *b;
1529 Py_ssize_t textlen;
1530 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001531 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001532
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001533 if (PyUnicode_READY(text) == -1)
1534 return NULL;
1535
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001536 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001537 CHECK_CLOSED(self);
1538
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001539 if (self->encoder == NULL)
1540 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001541
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542 Py_INCREF(text);
1543
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545
1546 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001547 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001548 haslf = 1;
1549
1550 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001551 PyObject *newtext = _PyObject_CallMethodId(
1552 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001553 Py_DECREF(text);
1554 if (newtext == NULL)
1555 return NULL;
1556 text = newtext;
1557 }
1558
Antoine Pitroue96ec682011-07-23 21:46:35 +02001559 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001560 text_needflush = 1;
1561 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001562 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001563 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 needflush = 1;
1565
1566 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001567 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001568 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001569 self->encoding_start_of_stream = 0;
1570 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 else
1572 b = PyObject_CallMethodObjArgs(self->encoder,
1573 _PyIO_str_encode, text, NULL);
1574 Py_DECREF(text);
1575 if (b == NULL)
1576 return NULL;
Oren Milmana5b4ea12017-08-25 21:14:54 +03001577 if (!PyBytes_Check(b)) {
1578 PyErr_Format(PyExc_TypeError,
1579 "encoder should return a bytes object, not '%.200s'",
1580 Py_TYPE(b)->tp_name);
1581 Py_DECREF(b);
1582 return NULL;
1583 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001584
1585 if (self->pending_bytes == NULL) {
1586 self->pending_bytes = PyList_New(0);
1587 if (self->pending_bytes == NULL) {
1588 Py_DECREF(b);
1589 return NULL;
1590 }
1591 self->pending_bytes_count = 0;
1592 }
1593 if (PyList_Append(self->pending_bytes, b) < 0) {
1594 Py_DECREF(b);
1595 return NULL;
1596 }
1597 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1598 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001599 if (self->pending_bytes_count > self->chunk_size || needflush ||
1600 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001601 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 return NULL;
1603 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001604
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001605 if (needflush) {
1606 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1607 if (ret == NULL)
1608 return NULL;
1609 Py_DECREF(ret);
1610 }
1611
Zackery Spytz23db9352018-06-29 04:14:58 -06001612 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 Py_CLEAR(self->snapshot);
1614
1615 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001616 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617 if (ret == NULL)
1618 return NULL;
1619 Py_DECREF(ret);
1620 }
1621
1622 return PyLong_FromSsize_t(textlen);
1623}
1624
1625/* Steal a reference to chars and store it in the decoded_char buffer;
1626 */
1627static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001628textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001630 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 self->decoded_chars_used = 0;
1632}
1633
1634static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001635textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001636{
1637 PyObject *chars;
1638 Py_ssize_t avail;
1639
1640 if (self->decoded_chars == NULL)
1641 return PyUnicode_FromStringAndSize(NULL, 0);
1642
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 /* decoded_chars is guaranteed to be "ready". */
1644 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 - self->decoded_chars_used);
1646
1647 assert(avail >= 0);
1648
1649 if (n < 0 || n > avail)
1650 n = avail;
1651
1652 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001653 chars = PyUnicode_Substring(self->decoded_chars,
1654 self->decoded_chars_used,
1655 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 if (chars == NULL)
1657 return NULL;
1658 }
1659 else {
1660 chars = self->decoded_chars;
1661 Py_INCREF(chars);
1662 }
1663
1664 self->decoded_chars_used += n;
1665 return chars;
1666}
1667
1668/* Read and decode the next chunk of data from the BufferedReader.
1669 */
1670static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001671textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672{
1673 PyObject *dec_buffer = NULL;
1674 PyObject *dec_flags = NULL;
1675 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001676 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001678 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 int eof;
1680
1681 /* The return value is True unless EOF was reached. The decoded string is
1682 * placed in self._decoded_chars (replacing its previous value). The
1683 * entire input chunk is sent to the decoder, though some of it may remain
1684 * buffered in the decoder, yet to be converted.
1685 */
1686
1687 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001688 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001689 return -1;
1690 }
1691
1692 if (self->telling) {
1693 /* To prepare for tell(), we need to snapshot a point in the file
1694 * where the decoder's input buffer is empty.
1695 */
1696
1697 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1698 _PyIO_str_getstate, NULL);
1699 if (state == NULL)
1700 return -1;
1701 /* Given this, we know there was a valid snapshot point
1702 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1703 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001704 if (!PyTuple_Check(state)) {
1705 PyErr_SetString(PyExc_TypeError,
1706 "illegal decoder state");
1707 Py_DECREF(state);
1708 return -1;
1709 }
1710 if (!PyArg_ParseTuple(state,
1711 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1712 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 Py_DECREF(state);
1714 return -1;
1715 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001716
1717 if (!PyBytes_Check(dec_buffer)) {
1718 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001719 "illegal decoder state: the first item should be a "
1720 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001721 Py_TYPE(dec_buffer)->tp_name);
1722 Py_DECREF(state);
1723 return -1;
1724 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 Py_INCREF(dec_buffer);
1726 Py_INCREF(dec_flags);
1727 Py_DECREF(state);
1728 }
1729
1730 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001731 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001732 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001733 }
1734 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 if (chunk_size == NULL)
1736 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001737
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001739 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1740 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 Py_DECREF(chunk_size);
1742 if (input_chunk == NULL)
1743 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001744
1745 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001746 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001747 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001748 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1749 Py_TYPE(input_chunk)->tp_name);
1750 goto fail;
1751 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752
Antoine Pitroub8503892014-04-29 10:14:02 +02001753 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001754 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755
INADA Naoki507434f2017-12-21 09:59:53 +09001756 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1757 PyBuffer_Release(&input_chunk_buf);
1758 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001759 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001760
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001761 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001763 if (nchars > 0)
1764 self->b2cratio = (double) nbytes / nchars;
1765 else
1766 self->b2cratio = 0.0;
1767 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 eof = 0;
1769
1770 if (self->telling) {
1771 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1772 * next input to be decoded is dec_buffer + input_chunk.
1773 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001774 PyObject *next_input = dec_buffer;
1775 PyBytes_Concat(&next_input, input_chunk);
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001776 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Antoine Pitroub8503892014-04-29 10:14:02 +02001777 if (next_input == NULL) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001778 goto fail;
1779 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03001780 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1781 if (snapshot == NULL) {
1782 dec_flags = NULL;
1783 goto fail;
1784 }
1785 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 }
1787 Py_DECREF(input_chunk);
1788
1789 return (eof == 0);
1790
1791 fail:
1792 Py_XDECREF(dec_buffer);
1793 Py_XDECREF(dec_flags);
1794 Py_XDECREF(input_chunk);
1795 return -1;
1796}
1797
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001798/*[clinic input]
1799_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001800 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001801 /
1802[clinic start generated code]*/
1803
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001805_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001806/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001807{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 PyObject *result = NULL, *chunks = NULL;
1809
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001810 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 CHECK_CLOSED(self);
1812
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001813 if (self->decoder == NULL)
1814 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001815
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001816 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817 return NULL;
1818
1819 if (n < 0) {
1820 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001821 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 PyObject *decoded;
1823 if (bytes == NULL)
1824 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001825
1826 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1827 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1828 bytes, 1);
1829 else
1830 decoded = PyObject_CallMethodObjArgs(
1831 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001832 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001833 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 goto fail;
1835
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001836 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837
1838 if (result == NULL) {
1839 Py_DECREF(decoded);
1840 return NULL;
1841 }
1842
1843 PyUnicode_AppendAndDel(&result, decoded);
1844 if (result == NULL)
1845 goto fail;
1846
Zackery Spytz23db9352018-06-29 04:14:58 -06001847 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 Py_CLEAR(self->snapshot);
1849 return result;
1850 }
1851 else {
1852 int res = 1;
1853 Py_ssize_t remaining = n;
1854
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001855 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856 if (result == NULL)
1857 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001858 if (PyUnicode_READY(result) == -1)
1859 goto fail;
1860 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861
1862 /* Keep reading chunks until we have n characters to return */
1863 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001864 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001865 if (res < 0) {
1866 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1867 when EINTR occurs so we needn't do it ourselves. */
1868 if (_PyIO_trap_eintr()) {
1869 continue;
1870 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001872 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001873 if (res == 0) /* EOF */
1874 break;
1875 if (chunks == NULL) {
1876 chunks = PyList_New(0);
1877 if (chunks == NULL)
1878 goto fail;
1879 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001880 if (PyUnicode_GET_LENGTH(result) > 0 &&
1881 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 goto fail;
1883 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001884 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885 if (result == NULL)
1886 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001887 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888 }
1889 if (chunks != NULL) {
1890 if (result != NULL && PyList_Append(chunks, result) < 0)
1891 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001892 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 if (result == NULL)
1894 goto fail;
1895 Py_CLEAR(chunks);
1896 }
1897 return result;
1898 }
1899 fail:
1900 Py_XDECREF(result);
1901 Py_XDECREF(chunks);
1902 return NULL;
1903}
1904
1905
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001906/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001907 that is to the NUL character. Otherwise the function will produce
1908 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001909static const char *
1910find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001912 if (kind == PyUnicode_1BYTE_KIND) {
1913 assert(ch < 256);
1914 return (char *) memchr((void *) s, (char) ch, end - s);
1915 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001916 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001917 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001918 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001919 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920 return s;
1921 if (s == end)
1922 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001923 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924 }
1925}
1926
1927Py_ssize_t
1928_PyIO_find_line_ending(
1929 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001930 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001932 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001933
1934 if (translated) {
1935 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001936 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001938 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939 else {
1940 *consumed = len;
1941 return -1;
1942 }
1943 }
1944 else if (universal) {
1945 /* Universal newline search. Find any of \r, \r\n, \n
1946 * The decoder ensures that \r\n are not split in two pieces
1947 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001948 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001950 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001952 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001953 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001954 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001955 if (s >= end) {
1956 *consumed = len;
1957 return -1;
1958 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001959 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001960 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001962 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001964 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001965 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001966 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001967 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968 }
1969 }
1970 }
1971 else {
1972 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001973 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001974 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001975 /* Assume that readnl is an ASCII character. */
1976 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001978 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001980 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001981 *consumed = len;
1982 return -1;
1983 }
1984 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001985 const char *s = start;
1986 const char *e = end - (readnl_len - 1)*kind;
1987 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988 if (e < s)
1989 e = s;
1990 while (s < e) {
1991 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001992 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993 if (pos == NULL || pos >= e)
1994 break;
1995 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001996 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 break;
1998 }
1999 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002000 return (pos - start)/kind + readnl_len;
2001 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002003 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 if (pos == NULL)
2005 *consumed = len;
2006 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002007 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 return -1;
2009 }
2010 }
2011}
2012
2013static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002014_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015{
2016 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2017 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2018 int res;
2019
2020 CHECK_CLOSED(self);
2021
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002022 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 return NULL;
2024
2025 chunked = 0;
2026
2027 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002028 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002029 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002030 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 Py_ssize_t consumed = 0;
2032
2033 /* First, get some data if necessary */
2034 res = 1;
2035 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002036 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002037 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002038 if (res < 0) {
2039 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2040 when EINTR occurs so we needn't do it ourselves. */
2041 if (_PyIO_trap_eintr()) {
2042 continue;
2043 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002045 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046 if (res == 0)
2047 break;
2048 }
2049 if (res == 0) {
2050 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002051 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 Py_CLEAR(self->snapshot);
2053 start = endpos = offset_to_buffer = 0;
2054 break;
2055 }
2056
2057 if (remaining == NULL) {
2058 line = self->decoded_chars;
2059 start = self->decoded_chars_used;
2060 offset_to_buffer = 0;
2061 Py_INCREF(line);
2062 }
2063 else {
2064 assert(self->decoded_chars_used == 0);
2065 line = PyUnicode_Concat(remaining, self->decoded_chars);
2066 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002067 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 Py_CLEAR(remaining);
2069 if (line == NULL)
2070 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002071 if (PyUnicode_READY(line) == -1)
2072 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 }
2074
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002075 ptr = PyUnicode_DATA(line);
2076 line_len = PyUnicode_GET_LENGTH(line);
2077 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078
2079 endpos = _PyIO_find_line_ending(
2080 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002081 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002082 ptr + kind * start,
2083 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002084 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085 if (endpos >= 0) {
2086 endpos += start;
2087 if (limit >= 0 && (endpos - start) + chunked >= limit)
2088 endpos = start + limit - chunked;
2089 break;
2090 }
2091
2092 /* We can put aside up to `endpos` */
2093 endpos = consumed + start;
2094 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2095 /* Didn't find line ending, but reached length limit */
2096 endpos = start + limit - chunked;
2097 break;
2098 }
2099
2100 if (endpos > start) {
2101 /* No line ending seen yet - put aside current data */
2102 PyObject *s;
2103 if (chunks == NULL) {
2104 chunks = PyList_New(0);
2105 if (chunks == NULL)
2106 goto error;
2107 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002108 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 if (s == NULL)
2110 goto error;
2111 if (PyList_Append(chunks, s) < 0) {
2112 Py_DECREF(s);
2113 goto error;
2114 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002115 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002116 Py_DECREF(s);
2117 }
2118 /* There may be some remaining bytes we'll have to prepend to the
2119 next chunk of data */
2120 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002121 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002122 if (remaining == NULL)
2123 goto error;
2124 }
2125 Py_CLEAR(line);
2126 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002127 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128 }
2129
2130 if (line != NULL) {
2131 /* Our line ends in the current buffer */
2132 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002133 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2134 PyObject *s = PyUnicode_Substring(line, start, endpos);
2135 Py_CLEAR(line);
2136 if (s == NULL)
2137 goto error;
2138 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139 }
2140 }
2141 if (remaining != NULL) {
2142 if (chunks == NULL) {
2143 chunks = PyList_New(0);
2144 if (chunks == NULL)
2145 goto error;
2146 }
2147 if (PyList_Append(chunks, remaining) < 0)
2148 goto error;
2149 Py_CLEAR(remaining);
2150 }
2151 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002152 if (line != NULL) {
2153 if (PyList_Append(chunks, line) < 0)
2154 goto error;
2155 Py_DECREF(line);
2156 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2158 if (line == NULL)
2159 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002160 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002162 if (line == NULL) {
2163 Py_INCREF(_PyIO_empty_str);
2164 line = _PyIO_empty_str;
2165 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166
2167 return line;
2168
2169 error:
2170 Py_XDECREF(chunks);
2171 Py_XDECREF(remaining);
2172 Py_XDECREF(line);
2173 return NULL;
2174}
2175
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002176/*[clinic input]
2177_io.TextIOWrapper.readline
2178 size: Py_ssize_t = -1
2179 /
2180[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002181
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002182static PyObject *
2183_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2184/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2185{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002186 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002187 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188}
2189
2190/* Seek and Tell */
2191
2192typedef struct {
2193 Py_off_t start_pos;
2194 int dec_flags;
2195 int bytes_to_feed;
2196 int chars_to_skip;
2197 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002198} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199
2200/*
2201 To speed up cookie packing/unpacking, we store the fields in a temporary
2202 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2203 The following macros define at which offsets in the intermediary byte
2204 string the various CookieStruct fields will be stored.
2205 */
2206
2207#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2208
Christian Heimes743e0cd2012-10-17 23:52:17 +02002209#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210/* We want the least significant byte of start_pos to also be the least
2211 significant byte of the cookie, which means that in big-endian mode we
2212 must copy the fields in reverse order. */
2213
2214# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2215# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2216# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2217# define OFF_CHARS_TO_SKIP (sizeof(char))
2218# define OFF_NEED_EOF 0
2219
2220#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002221/* Little-endian mode: the least significant byte of start_pos will
2222 naturally end up the least significant byte of the cookie. */
2223
2224# define OFF_START_POS 0
2225# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2226# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2227# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2228# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2229
2230#endif
2231
2232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002233textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002234{
2235 unsigned char buffer[COOKIE_BUF_LEN];
2236 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2237 if (cookieLong == NULL)
2238 return -1;
2239
2240 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002241 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 Py_DECREF(cookieLong);
2243 return -1;
2244 }
2245 Py_DECREF(cookieLong);
2246
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002247 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2248 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2249 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2250 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2251 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252
2253 return 0;
2254}
2255
2256static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002257textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258{
2259 unsigned char buffer[COOKIE_BUF_LEN];
2260
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002261 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2262 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2263 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2264 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2265 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002266
Christian Heimes743e0cd2012-10-17 23:52:17 +02002267 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2268 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270
2271static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002272_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273{
2274 PyObject *res;
2275 /* When seeking to the start of the stream, we call decoder.reset()
2276 rather than decoder.getstate().
2277 This is for a few decoders such as utf-16 for which the state value
2278 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2279 utf-16, that we are expecting a BOM).
2280 */
2281 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2282 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2283 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002284 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2285 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286 if (res == NULL)
2287 return -1;
2288 Py_DECREF(res);
2289 return 0;
2290}
2291
Antoine Pitroue4501852009-05-14 18:55:55 +00002292static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002293_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002294{
2295 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002296 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002297 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2298 self->encoding_start_of_stream = 1;
2299 }
2300 else {
2301 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002302 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002303 self->encoding_start_of_stream = 0;
2304 }
2305 if (res == NULL)
2306 return -1;
2307 Py_DECREF(res);
2308 return 0;
2309}
2310
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002311static int
2312_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2313{
2314 /* Same as _textiowrapper_decoder_setstate() above. */
2315 return _textiowrapper_encoder_reset(
2316 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2317}
2318
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002319/*[clinic input]
2320_io.TextIOWrapper.seek
2321 cookie as cookieObj: object
2322 whence: int = 0
2323 /
2324[clinic start generated code]*/
2325
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002327_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2328/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002329{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002330 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002331 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332 PyObject *res;
2333 int cmp;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002334 PyObject *snapshot;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002335
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002336 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337 CHECK_CLOSED(self);
2338
2339 Py_INCREF(cookieObj);
2340
2341 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002342 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343 goto fail;
2344 }
2345
2346 if (whence == 1) {
2347 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002348 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349 if (cmp < 0)
2350 goto fail;
2351
2352 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002353 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002354 goto fail;
2355 }
2356
2357 /* Seeking to the current position should attempt to
2358 * sync the underlying buffer with the current position.
2359 */
2360 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002361 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002362 if (cookieObj == NULL)
2363 goto fail;
2364 }
2365 else if (whence == 2) {
2366 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002367 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368 if (cmp < 0)
2369 goto fail;
2370
2371 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002372 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373 goto fail;
2374 }
2375
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002376 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377 if (res == NULL)
2378 goto fail;
2379 Py_DECREF(res);
2380
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002381 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382 Py_CLEAR(self->snapshot);
2383 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002384 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385 if (res == NULL)
2386 goto fail;
2387 Py_DECREF(res);
2388 }
2389
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002390 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002391 Py_CLEAR(cookieObj);
2392 if (res == NULL)
2393 goto fail;
2394 if (self->encoder) {
2395 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002396 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002397 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2398 Py_DECREF(res);
2399 goto fail;
2400 }
2401 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402 return res;
2403 }
2404 else if (whence != 0) {
2405 PyErr_Format(PyExc_ValueError,
2406 "invalid whence (%d, should be 0, 1 or 2)", whence);
2407 goto fail;
2408 }
2409
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002410 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002411 if (cmp < 0)
2412 goto fail;
2413
2414 if (cmp == 1) {
2415 PyErr_Format(PyExc_ValueError,
2416 "negative seek position %R", cookieObj);
2417 goto fail;
2418 }
2419
2420 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2421 if (res == NULL)
2422 goto fail;
2423 Py_DECREF(res);
2424
2425 /* The strategy of seek() is to go back to the safe start point
2426 * and replay the effect of read(chars_to_skip) from there.
2427 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002428 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002429 goto fail;
2430
2431 /* Seek back to the safe start point. */
2432 posobj = PyLong_FromOff_t(cookie.start_pos);
2433 if (posobj == NULL)
2434 goto fail;
2435 res = PyObject_CallMethodObjArgs(self->buffer,
2436 _PyIO_str_seek, posobj, NULL);
2437 Py_DECREF(posobj);
2438 if (res == NULL)
2439 goto fail;
2440 Py_DECREF(res);
2441
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002442 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443 Py_CLEAR(self->snapshot);
2444
2445 /* Restore the decoder to its state from the safe start point. */
2446 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002447 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448 goto fail;
2449 }
2450
2451 if (cookie.chars_to_skip) {
2452 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002453 PyObject *input_chunk = _PyObject_CallMethodId(
2454 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455 PyObject *decoded;
2456
2457 if (input_chunk == NULL)
2458 goto fail;
2459
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002460 if (!PyBytes_Check(input_chunk)) {
2461 PyErr_Format(PyExc_TypeError,
2462 "underlying read() should have returned a bytes "
2463 "object, not '%.200s'",
2464 Py_TYPE(input_chunk)->tp_name);
2465 Py_DECREF(input_chunk);
2466 goto fail;
2467 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002469 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2470 if (snapshot == NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471 goto fail;
2472 }
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002473 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002475 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2476 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002478 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479 goto fail;
2480
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002481 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482
2483 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002484 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002485 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 goto fail;
2487 }
2488 self->decoded_chars_used = cookie.chars_to_skip;
2489 }
2490 else {
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002491 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2492 if (snapshot == NULL)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493 goto fail;
Serhiy Storchakafdb5a502018-06-30 20:57:50 +03002494 Py_XSETREF(self->snapshot, snapshot);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 }
2496
Antoine Pitroue4501852009-05-14 18:55:55 +00002497 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2498 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002499 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002500 goto fail;
2501 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 return cookieObj;
2503 fail:
2504 Py_XDECREF(cookieObj);
2505 return NULL;
2506
2507}
2508
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002509/*[clinic input]
2510_io.TextIOWrapper.tell
2511[clinic start generated code]*/
2512
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002514_io_TextIOWrapper_tell_impl(textio *self)
2515/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002516{
2517 PyObject *res;
2518 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002519 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520 PyObject *next_input;
2521 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002522 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523 PyObject *saved_state = NULL;
2524 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002525 Py_ssize_t dec_buffer_len;
2526 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002528 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529 CHECK_CLOSED(self);
2530
2531 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002532 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 goto fail;
2534 }
2535 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002536 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537 "telling position disabled by next() call");
2538 goto fail;
2539 }
2540
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002541 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002543 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544 if (res == NULL)
2545 goto fail;
2546 Py_DECREF(res);
2547
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002548 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 if (posobj == NULL)
2550 goto fail;
2551
2552 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002553 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554 return posobj;
2555 }
2556
2557#if defined(HAVE_LARGEFILE_SUPPORT)
2558 cookie.start_pos = PyLong_AsLongLong(posobj);
2559#else
2560 cookie.start_pos = PyLong_AsLong(posobj);
2561#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002562 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 if (PyErr_Occurred())
2564 goto fail;
2565
2566 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002567 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002568 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569 goto fail;
2570
2571 assert (PyBytes_Check(next_input));
2572
2573 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2574
2575 /* How many decoded characters have been used up since the snapshot? */
2576 if (self->decoded_chars_used == 0) {
2577 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002578 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002579 }
2580
2581 chars_to_skip = self->decoded_chars_used;
2582
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002583 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2585 _PyIO_str_getstate, NULL);
2586 if (saved_state == NULL)
2587 goto fail;
2588
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002589#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002590 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002591 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2592 _PyIO_str_getstate, NULL); \
2593 if (_state == NULL) \
2594 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002595 if (!PyTuple_Check(_state)) { \
2596 PyErr_SetString(PyExc_TypeError, \
2597 "illegal decoder state"); \
2598 Py_DECREF(_state); \
2599 goto fail; \
2600 } \
2601 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2602 &dec_buffer, &dec_flags)) \
2603 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002604 Py_DECREF(_state); \
2605 goto fail; \
2606 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002607 if (!PyBytes_Check(dec_buffer)) { \
2608 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002609 "illegal decoder state: the first item should be a " \
2610 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002611 Py_TYPE(dec_buffer)->tp_name); \
2612 Py_DECREF(_state); \
2613 goto fail; \
2614 } \
2615 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002616 Py_DECREF(_state); \
2617 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002619#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002620 PyObject *_decoded = _PyObject_CallMethodId( \
2621 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002622 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002623 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002624 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002625 Py_DECREF(_decoded); \
2626 } while (0)
2627
2628 /* Fast search for an acceptable start point, close to our
2629 current pos */
2630 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2631 skip_back = 1;
2632 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2633 input = PyBytes_AS_STRING(next_input);
2634 while (skip_bytes > 0) {
2635 /* Decode up to temptative start point */
2636 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2637 goto fail;
2638 DECODER_DECODE(input, skip_bytes, chars_decoded);
2639 if (chars_decoded <= chars_to_skip) {
2640 DECODER_GETSTATE();
2641 if (dec_buffer_len == 0) {
2642 /* Before pos and no bytes buffered in decoder => OK */
2643 cookie.dec_flags = dec_flags;
2644 chars_to_skip -= chars_decoded;
2645 break;
2646 }
2647 /* Skip back by buffered amount and reset heuristic */
2648 skip_bytes -= dec_buffer_len;
2649 skip_back = 1;
2650 }
2651 else {
2652 /* We're too far ahead, skip back a bit */
2653 skip_bytes -= skip_back;
2654 skip_back *= 2;
2655 }
2656 }
2657 if (skip_bytes <= 0) {
2658 skip_bytes = 0;
2659 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2660 goto fail;
2661 }
2662
2663 /* Note our initial start point. */
2664 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002665 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002666 if (chars_to_skip == 0)
2667 goto finally;
2668
2669 /* We should be close to the desired position. Now feed the decoder one
2670 * byte at a time until we reach the `chars_to_skip` target.
2671 * As we go, note the nearest "safe start point" before the current
2672 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 * can safely start from there and advance to this location).
2674 */
2675 chars_decoded = 0;
2676 input = PyBytes_AS_STRING(next_input);
2677 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002678 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002680 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002682 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002683 /* We got n chars for 1 byte */
2684 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002686 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687
2688 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2689 /* Decoder buffer is empty, so this is a safe start point. */
2690 cookie.start_pos += cookie.bytes_to_feed;
2691 chars_to_skip -= chars_decoded;
2692 cookie.dec_flags = dec_flags;
2693 cookie.bytes_to_feed = 0;
2694 chars_decoded = 0;
2695 }
2696 if (chars_decoded >= chars_to_skip)
2697 break;
2698 input++;
2699 }
2700 if (input == input_end) {
2701 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002702 PyObject *decoded = _PyObject_CallMethodId(
2703 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002704 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002705 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002706 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 Py_DECREF(decoded);
2708 cookie.need_eof = 1;
2709
2710 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002711 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002712 "can't reconstruct logical file position");
2713 goto fail;
2714 }
2715 }
2716
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002717finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002718 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 Py_DECREF(saved_state);
2720 if (res == NULL)
2721 return NULL;
2722 Py_DECREF(res);
2723
2724 /* The returned cookie corresponds to the last safe start point. */
2725 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002726 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002727
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002728fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002729 if (saved_state) {
2730 PyObject *type, *value, *traceback;
2731 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002732 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002733 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002735 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 }
2737 return NULL;
2738}
2739
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002740/*[clinic input]
2741_io.TextIOWrapper.truncate
2742 pos: object = None
2743 /
2744[clinic start generated code]*/
2745
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002746static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002747_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2748/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750 PyObject *res;
2751
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002752 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753
2754 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2755 if (res == NULL)
2756 return NULL;
2757 Py_DECREF(res);
2758
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002759 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760}
2761
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002763textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002764{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002765 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002766 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002767
2768 CHECK_INITIALIZED(self);
2769
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002770 res = PyUnicode_FromString("<_io.TextIOWrapper");
2771 if (res == NULL)
2772 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002773
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002774 status = Py_ReprEnter((PyObject *)self);
2775 if (status != 0) {
2776 if (status > 0) {
2777 PyErr_Format(PyExc_RuntimeError,
2778 "reentrant call inside %s.__repr__",
2779 Py_TYPE(self)->tp_name);
2780 }
2781 goto error;
2782 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002783 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002784 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002785 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002786 PyErr_Clear();
2787 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002788 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002789 }
2790 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002791 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002792 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002793 if (s == NULL)
2794 goto error;
2795 PyUnicode_AppendAndDel(&res, s);
2796 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002797 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002798 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002799 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002800 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002801 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002802 PyErr_Clear();
2803 else
2804 goto error;
2805 }
2806 else {
2807 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2808 Py_DECREF(modeobj);
2809 if (s == NULL)
2810 goto error;
2811 PyUnicode_AppendAndDel(&res, s);
2812 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002813 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002814 }
2815 s = PyUnicode_FromFormat("%U encoding=%R>",
2816 res, self->encoding);
2817 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002818 if (status == 0) {
2819 Py_ReprLeave((PyObject *)self);
2820 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002821 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002822
2823 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002824 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002825 if (status == 0) {
2826 Py_ReprLeave((PyObject *)self);
2827 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002828 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002829}
2830
2831
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002832/* Inquiries */
2833
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002834/*[clinic input]
2835_io.TextIOWrapper.fileno
2836[clinic start generated code]*/
2837
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002838static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002839_io_TextIOWrapper_fileno_impl(textio *self)
2840/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002841{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002842 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002843 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002844}
2845
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002846/*[clinic input]
2847_io.TextIOWrapper.seekable
2848[clinic start generated code]*/
2849
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002850static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002851_io_TextIOWrapper_seekable_impl(textio *self)
2852/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002854 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002855 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002856}
2857
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002858/*[clinic input]
2859_io.TextIOWrapper.readable
2860[clinic start generated code]*/
2861
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002862static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002863_io_TextIOWrapper_readable_impl(textio *self)
2864/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002865{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002866 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002867 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002868}
2869
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002870/*[clinic input]
2871_io.TextIOWrapper.writable
2872[clinic start generated code]*/
2873
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002875_io_TextIOWrapper_writable_impl(textio *self)
2876/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002877{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002878 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002879 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880}
2881
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002882/*[clinic input]
2883_io.TextIOWrapper.isatty
2884[clinic start generated code]*/
2885
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002886static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002887_io_TextIOWrapper_isatty_impl(textio *self)
2888/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002889{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002890 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002891 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002892}
2893
2894static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002895textiowrapper_getstate(textio *self, PyObject *args)
2896{
2897 PyErr_Format(PyExc_TypeError,
2898 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2899 return NULL;
2900}
2901
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002902/*[clinic input]
2903_io.TextIOWrapper.flush
2904[clinic start generated code]*/
2905
Antoine Pitrou243757e2010-11-05 21:15:39 +00002906static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002907_io_TextIOWrapper_flush_impl(textio *self)
2908/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002909{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002910 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002911 CHECK_CLOSED(self);
2912 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002913 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002914 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002915 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002916}
2917
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002918/*[clinic input]
2919_io.TextIOWrapper.close
2920[clinic start generated code]*/
2921
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002922static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002923_io_TextIOWrapper_close_impl(textio *self)
2924/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002925{
2926 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002927 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002928 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002929
Antoine Pitrou6be88762010-05-03 16:48:20 +00002930 res = textiowrapper_closed_get(self, NULL);
2931 if (res == NULL)
2932 return NULL;
2933 r = PyObject_IsTrue(res);
2934 Py_DECREF(res);
2935 if (r < 0)
2936 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002937
Antoine Pitrou6be88762010-05-03 16:48:20 +00002938 if (r > 0) {
2939 Py_RETURN_NONE; /* stream already closed */
2940 }
2941 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002942 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002943 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01002944 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2945 &PyId__dealloc_warn,
2946 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00002947 if (res)
2948 Py_DECREF(res);
2949 else
2950 PyErr_Clear();
2951 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002952 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002953 if (res == NULL)
2954 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002955 else
2956 Py_DECREF(res);
2957
Benjamin Peterson68623612012-12-20 11:53:11 -06002958 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2959 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002960 _PyErr_ChainExceptions(exc, val, tb);
2961 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002962 }
2963 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002964 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002965}
2966
2967static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002968textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002969{
2970 PyObject *line;
2971
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002972 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973
2974 self->telling = 0;
2975 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2976 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002977 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002978 }
2979 else {
2980 line = PyObject_CallMethodObjArgs((PyObject *)self,
2981 _PyIO_str_readline, NULL);
2982 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002983 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002984 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002985 "not '%.200s'", Py_TYPE(line)->tp_name);
2986 Py_DECREF(line);
2987 return NULL;
2988 }
2989 }
2990
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002991 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002992 return NULL;
2993
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002994 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002995 /* Reached EOF or would have blocked */
2996 Py_DECREF(line);
2997 Py_CLEAR(self->snapshot);
2998 self->telling = self->seekable;
2999 return NULL;
3000 }
3001
3002 return line;
3003}
3004
3005static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003006textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003007{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003008 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02003009 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003010}
3011
3012static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003013textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003014{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003015 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003016 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3017}
3018
3019static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003020textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003021{
3022 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003023 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003024 if (self->decoder == NULL ||
3025 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3026 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003027 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003028 }
3029 return res;
3030}
3031
3032static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003033textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003034{
3035 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003036 Py_INCREF(self->errors);
3037 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003038}
3039
3040static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003041textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003042{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003043 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003044 return PyLong_FromSsize_t(self->chunk_size);
3045}
3046
3047static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003048textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003049{
3050 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003051 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003052 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003053 if (n == -1 && PyErr_Occurred())
3054 return -1;
3055 if (n <= 0) {
3056 PyErr_SetString(PyExc_ValueError,
3057 "a strictly positive integer is required");
3058 return -1;
3059 }
3060 self->chunk_size = n;
3061 return 0;
3062}
3063
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003064#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003065
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003066static PyMethodDef incrementalnewlinedecoder_methods[] = {
3067 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3068 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3069 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3070 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3071 {NULL}
3072};
3073
3074static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3075 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3076 {NULL}
3077};
3078
3079PyTypeObject PyIncrementalNewlineDecoder_Type = {
3080 PyVarObject_HEAD_INIT(NULL, 0)
3081 "_io.IncrementalNewlineDecoder", /*tp_name*/
3082 sizeof(nldecoder_object), /*tp_basicsize*/
3083 0, /*tp_itemsize*/
3084 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3085 0, /*tp_print*/
3086 0, /*tp_getattr*/
3087 0, /*tp_setattr*/
3088 0, /*tp_compare */
3089 0, /*tp_repr*/
3090 0, /*tp_as_number*/
3091 0, /*tp_as_sequence*/
3092 0, /*tp_as_mapping*/
3093 0, /*tp_hash */
3094 0, /*tp_call*/
3095 0, /*tp_str*/
3096 0, /*tp_getattro*/
3097 0, /*tp_setattro*/
3098 0, /*tp_as_buffer*/
3099 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3100 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3101 0, /* tp_traverse */
3102 0, /* tp_clear */
3103 0, /* tp_richcompare */
3104 0, /*tp_weaklistoffset*/
3105 0, /* tp_iter */
3106 0, /* tp_iternext */
3107 incrementalnewlinedecoder_methods, /* tp_methods */
3108 0, /* tp_members */
3109 incrementalnewlinedecoder_getset, /* tp_getset */
3110 0, /* tp_base */
3111 0, /* tp_dict */
3112 0, /* tp_descr_get */
3113 0, /* tp_descr_set */
3114 0, /* tp_dictoffset */
3115 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3116 0, /* tp_alloc */
3117 PyType_GenericNew, /* tp_new */
3118};
3119
3120
3121static PyMethodDef textiowrapper_methods[] = {
3122 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003123 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003124 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3125 _IO_TEXTIOWRAPPER_READ_METHODDEF
3126 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3127 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3128 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3129
3130 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3131 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3132 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3133 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3134 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00003135 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003136
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003137 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3138 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3139 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003140 {NULL, NULL}
3141};
3142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003143static PyMemberDef textiowrapper_members[] = {
3144 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3145 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3146 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003147 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003148 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003149 {NULL}
3150};
3151
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003152static PyGetSetDef textiowrapper_getset[] = {
3153 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3154 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003155/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3156*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003157 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3158 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3159 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3160 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003161 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003162};
3163
3164PyTypeObject PyTextIOWrapper_Type = {
3165 PyVarObject_HEAD_INIT(NULL, 0)
3166 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003167 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003168 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003169 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003170 0, /*tp_print*/
3171 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003172 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003173 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003174 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003175 0, /*tp_as_number*/
3176 0, /*tp_as_sequence*/
3177 0, /*tp_as_mapping*/
3178 0, /*tp_hash */
3179 0, /*tp_call*/
3180 0, /*tp_str*/
3181 0, /*tp_getattro*/
3182 0, /*tp_setattro*/
3183 0, /*tp_as_buffer*/
3184 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003185 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003186 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003187 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3188 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003189 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003190 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003191 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003192 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3193 textiowrapper_methods, /* tp_methods */
3194 textiowrapper_members, /* tp_members */
3195 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003196 0, /* tp_base */
3197 0, /* tp_dict */
3198 0, /* tp_descr_get */
3199 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003200 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003201 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003202 0, /* tp_alloc */
3203 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003204 0, /* tp_free */
3205 0, /* tp_is_gc */
3206 0, /* tp_bases */
3207 0, /* tp_mro */
3208 0, /* tp_cache */
3209 0, /* tp_subclasses */
3210 0, /* tp_weaklist */
3211 0, /* tp_del */
3212 0, /* tp_version_tag */
3213 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003214};