blob: 6800d2dd253531fec66d52ec8d3e49ebee6c98c4 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020021_Py_IDENTIFIER(close);
22_Py_IDENTIFIER(_dealloc_warn);
23_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(fileno);
25_Py_IDENTIFIER(flush);
26_Py_IDENTIFIER(getpreferredencoding);
27_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020028_Py_IDENTIFIER(mode);
29_Py_IDENTIFIER(name);
30_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020032_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020033_Py_IDENTIFIER(readable);
34_Py_IDENTIFIER(replace);
35_Py_IDENTIFIER(reset);
36_Py_IDENTIFIER(seek);
37_Py_IDENTIFIER(seekable);
38_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090039_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(tell);
41_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000043/* TextIOBase */
44
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000045PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000046 "Base class for text I/O.\n"
47 "\n"
48 "This class provides a character and line based interface to stream\n"
49 "I/O. There is no readinto method because Python's character strings\n"
50 "are immutable. There is no public constructor.\n"
51 );
52
53static PyObject *
54_unsupported(const char *message)
55{
Antoine Pitrou712cb732013-12-21 15:51:54 +010056 _PyIO_State *state = IO_STATE();
57 if (state != NULL)
58 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000059 return NULL;
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063 "Separate the underlying buffer from the TextIOBase and return it.\n"
64 "\n"
65 "After the underlying buffer has been detached, the TextIO is in an\n"
66 "unusable state.\n"
67 );
68
69static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000070textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000071{
72 return _unsupported("detach");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read at most n characters from stream.\n"
77 "\n"
78 "Read from underlying buffer until we have n characters or we hit EOF.\n"
79 "If n is negative or omitted, read until EOF.\n"
80 );
81
82static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000083textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000084{
85 return _unsupported("read");
86}
87
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089 "Read until newline or EOF.\n"
90 "\n"
91 "Returns an empty string if EOF is hit immediately.\n"
92 );
93
94static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000095textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000096{
97 return _unsupported("readline");
98}
99
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000100PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000101 "Write string to stream.\n"
102 "Returns the number of characters written (which is always equal to\n"
103 "the length of the string).\n"
104 );
105
106static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000108{
109 return _unsupported("write");
110}
111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000112PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000113 "Encoding of the text stream.\n"
114 "\n"
115 "Subclasses should override.\n"
116 );
117
118static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000119textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120{
121 Py_RETURN_NONE;
122}
123
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 "Line endings translated so far.\n"
126 "\n"
127 "Only line endings translated during reading are considered.\n"
128 "\n"
129 "Subclasses should override.\n"
130 );
131
132static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000133textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000134{
135 Py_RETURN_NONE;
136}
137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000139 "The error setting of the decoder or encoder.\n"
140 "\n"
141 "Subclasses should override.\n"
142 );
143
144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000145textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000146{
147 Py_RETURN_NONE;
148}
149
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyMethodDef textiobase_methods[] = {
152 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
153 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
154 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
155 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156 {NULL, NULL}
157};
158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000159static PyGetSetDef textiobase_getset[] = {
160 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
161 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
162 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000163 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164};
165
166PyTypeObject PyTextIOBase_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_io._TextIOBase", /*tp_name*/
169 0, /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 0, /*tp_dealloc*/
172 0, /*tp_print*/
173 0, /*tp_getattr*/
174 0, /*tp_setattr*/
175 0, /*tp_compare */
176 0, /*tp_repr*/
177 0, /*tp_as_number*/
178 0, /*tp_as_sequence*/
179 0, /*tp_as_mapping*/
180 0, /*tp_hash */
181 0, /*tp_call*/
182 0, /*tp_str*/
183 0, /*tp_getattro*/
184 0, /*tp_setattro*/
185 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200186 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
187 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000188 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000189 0, /* tp_traverse */
190 0, /* tp_clear */
191 0, /* tp_richcompare */
192 0, /* tp_weaklistoffset */
193 0, /* tp_iter */
194 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000195 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000196 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 &PyIOBase_Type, /* tp_base */
199 0, /* tp_dict */
200 0, /* tp_descr_get */
201 0, /* tp_descr_set */
202 0, /* tp_dictoffset */
203 0, /* tp_init */
204 0, /* tp_alloc */
205 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200206 0, /* tp_free */
207 0, /* tp_is_gc */
208 0, /* tp_bases */
209 0, /* tp_mro */
210 0, /* tp_cache */
211 0, /* tp_subclasses */
212 0, /* tp_weaklist */
213 0, /* tp_del */
214 0, /* tp_version_tag */
215 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000216};
217
218
219/* IncrementalNewlineDecoder */
220
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200225 unsigned int pendingcr: 1;
226 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300230/*[clinic input]
231_io.IncrementalNewlineDecoder.__init__
232 decoder: object
233 translate: int
234 errors: object(c_default="NULL") = "strict"
235
236Codec used when reading a file in universal newlines mode.
237
238It wraps another incremental decoder, translating \r\n and \r into \n.
239It also records the types of newlines encountered. When used with
240translate=False, it ensures that the newline sequence is returned in
241one piece. When used with decoder=None, it expects unicode strings as
242decode input and translates newlines without first invoking an external
243decoder.
244[clinic start generated code]*/
245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300247_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
248 PyObject *decoder, int translate,
249 PyObject *errors)
250/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252 self->decoder = decoder;
253 Py_INCREF(decoder);
254
255 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900256 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257 if (self->errors == NULL)
258 return -1;
259 }
260 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 self->errors = errors;
262 }
INADA Naoki507434f2017-12-21 09:59:53 +0900263 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264
265 self->translate = translate;
266 self->seennl = 0;
267 self->pendingcr = 0;
268
269 return 0;
270}
271
272static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000273incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000274{
275 Py_CLEAR(self->decoder);
276 Py_CLEAR(self->errors);
277 Py_TYPE(self)->tp_free((PyObject *)self);
278}
279
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200280static int
281check_decoded(PyObject *decoded)
282{
283 if (decoded == NULL)
284 return -1;
285 if (!PyUnicode_Check(decoded)) {
286 PyErr_Format(PyExc_TypeError,
287 "decoder should return a string result, not '%.200s'",
288 Py_TYPE(decoded)->tp_name);
289 Py_DECREF(decoded);
290 return -1;
291 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200292 if (PyUnicode_READY(decoded) < 0) {
293 Py_DECREF(decoded);
294 return -1;
295 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200296 return 0;
297}
298
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299#define SEEN_CR 1
300#define SEEN_LF 2
301#define SEEN_CRLF 4
302#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
303
304PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200305_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000306 PyObject *input, int final)
307{
308 PyObject *output;
309 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200310 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311
312 if (self->decoder == NULL) {
313 PyErr_SetString(PyExc_ValueError,
314 "IncrementalNewlineDecoder.__init__ not called");
315 return NULL;
316 }
317
318 /* decode input (with the eventual \r from a previous pass) */
319 if (self->decoder != Py_None) {
320 output = PyObject_CallMethodObjArgs(self->decoder,
321 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
322 }
323 else {
324 output = input;
325 Py_INCREF(output);
326 }
327
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200328 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329 return NULL;
330
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200331 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000332 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 /* Prefix output with CR */
334 int kind;
335 PyObject *modified;
336 char *out;
337
338 modified = PyUnicode_New(output_len + 1,
339 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (modified == NULL)
341 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 kind = PyUnicode_KIND(modified);
343 out = PyUnicode_DATA(modified);
344 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000348 self->pendingcr = 0;
349 output_len++;
350 }
351
352 /* retain last \r even when not translating data:
353 * then readline() is sure to get \r\n in one pass
354 */
355 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000356 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
358 {
359 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
360 if (modified == NULL)
361 goto error;
362 Py_DECREF(output);
363 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000364 self->pendingcr = 1;
365 }
366 }
367
368 /* Record which newlines are read and do newline translation if desired,
369 all in one pass. */
370 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200371 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000372 Py_ssize_t len;
373 int seennl = self->seennl;
374 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 in_str = PyUnicode_DATA(output);
378 len = PyUnicode_GET_LENGTH(output);
379 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380
381 if (len == 0)
382 return output;
383
384 /* If, up to now, newlines are consistently \n, do a quick check
385 for the \r *byte* with the libc's optimized memchr.
386 */
387 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200388 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 }
390
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 if (only_lf) {
392 /* If not already seen, quick scan for a possible "\n" character.
393 (there's nothing else to be done, even when in translation mode)
394 */
395 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200396 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100397 if (kind == PyUnicode_1BYTE_KIND)
398 seennl |= SEEN_LF;
399 else {
400 Py_ssize_t i = 0;
401 for (;;) {
402 Py_UCS4 c;
403 /* Fast loop for non-control characters */
404 while (PyUnicode_READ(kind, in_str, i) > '\n')
405 i++;
406 c = PyUnicode_READ(kind, in_str, i++);
407 if (c == '\n') {
408 seennl |= SEEN_LF;
409 break;
410 }
411 if (i >= len)
412 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 }
415 }
416 /* Finished: we have scanned for newlines, and none of them
417 need translating */
418 }
419 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000421 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 if (seennl == SEEN_ALL)
423 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200425 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 while (PyUnicode_READ(kind, in_str, i) > '\r')
428 i++;
429 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 if (c == '\n')
431 seennl |= SEEN_LF;
432 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 }
437 else
438 seennl |= SEEN_CR;
439 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 break;
442 if (seennl == SEEN_ALL)
443 break;
444 }
445 endscan:
446 ;
447 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000448 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 void *translated;
450 int kind = PyUnicode_KIND(output);
451 void *in_str = PyUnicode_DATA(output);
452 Py_ssize_t in, out;
453 /* XXX: Previous in-place translation here is disabled as
454 resizing is not possible anymore */
455 /* We could try to optimize this so that we only do a copy
456 when there is something to translate. On the other hand,
457 we already know there is a \r byte, so chances are high
458 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200459 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 if (translated == NULL) {
461 PyErr_NoMemory();
462 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200468 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
469 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200471 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000472 seennl |= SEEN_LF;
473 continue;
474 }
475 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 in++;
478 seennl |= SEEN_CRLF;
479 }
480 else
481 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 continue;
484 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000486 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200487 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200489 Py_DECREF(output);
490 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100491 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200492 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200493 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
495 self->seennl |= seennl;
496 }
497
498 return output;
499
500 error:
501 Py_DECREF(output);
502 return NULL;
503}
504
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300505/*[clinic input]
506_io.IncrementalNewlineDecoder.decode
507 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200508 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300509[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511static PyObject *
512_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
513 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200514/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300515{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
517}
518
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300519/*[clinic input]
520_io.IncrementalNewlineDecoder.getstate
521[clinic start generated code]*/
522
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300524_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
525/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000526{
527 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700528 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529
530 if (self->decoder != Py_None) {
531 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
532 _PyIO_str_getstate, NULL);
533 if (state == NULL)
534 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300535 if (!PyTuple_Check(state)) {
536 PyErr_SetString(PyExc_TypeError,
537 "illegal decoder state");
538 Py_DECREF(state);
539 return NULL;
540 }
541 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
542 &buffer, &flag))
543 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000544 Py_DECREF(state);
545 return NULL;
546 }
547 Py_INCREF(buffer);
548 Py_DECREF(state);
549 }
550 else {
551 buffer = PyBytes_FromString("");
552 flag = 0;
553 }
554 flag <<= 1;
555 if (self->pendingcr)
556 flag |= 1;
557 return Py_BuildValue("NK", buffer, flag);
558}
559
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300560/*[clinic input]
561_io.IncrementalNewlineDecoder.setstate
562 state: object
563 /
564[clinic start generated code]*/
565
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000566static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300567_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
568 PyObject *state)
569/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700572 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573
Oren Milman1d1d3e92017-08-20 18:35:36 +0300574 if (!PyTuple_Check(state)) {
575 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300577 }
578 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579 &buffer, &flag))
580 {
581 return NULL;
582 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583
Victor Stinner7d7e7752014-06-17 23:31:25 +0200584 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 flag >>= 1;
586
587 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200588 return _PyObject_CallMethodId(self->decoder,
589 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 else
591 Py_RETURN_NONE;
592}
593
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300594/*[clinic input]
595_io.IncrementalNewlineDecoder.reset
596[clinic start generated code]*/
597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300599_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
600/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601{
602 self->seennl = 0;
603 self->pendingcr = 0;
604 if (self->decoder != Py_None)
605 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
606 else
607 Py_RETURN_NONE;
608}
609
610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612{
613 switch (self->seennl) {
614 case SEEN_CR:
615 return PyUnicode_FromString("\r");
616 case SEEN_LF:
617 return PyUnicode_FromString("\n");
618 case SEEN_CRLF:
619 return PyUnicode_FromString("\r\n");
620 case SEEN_CR | SEEN_LF:
621 return Py_BuildValue("ss", "\r", "\n");
622 case SEEN_CR | SEEN_CRLF:
623 return Py_BuildValue("ss", "\r", "\r\n");
624 case SEEN_LF | SEEN_CRLF:
625 return Py_BuildValue("ss", "\n", "\r\n");
626 case SEEN_CR | SEEN_LF | SEEN_CRLF:
627 return Py_BuildValue("sss", "\r", "\n", "\r\n");
628 default:
629 Py_RETURN_NONE;
630 }
631
632}
633
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634/* TextIOWrapper */
635
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636typedef PyObject *
637 (*encodefunc_t)(PyObject *, PyObject *);
638
639typedef struct
640{
641 PyObject_HEAD
642 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000643 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 Py_ssize_t chunk_size;
645 PyObject *buffer;
646 PyObject *encoding;
647 PyObject *encoder;
648 PyObject *decoder;
649 PyObject *readnl;
650 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900651 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200653 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 char readuniversal;
655 char readtranslate;
656 char writetranslate;
657 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200658 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200660 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000680
Oren Milman13614e32017-08-24 19:51:24 +0300681 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000686 PyObject *snapshot;
687 /* Bytes-to-characters ratio for the current chunk. Serves as input for
688 the heuristic in tell(). */
689 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000690
691 /* Cache raw object if it's a FileIO object */
692 PyObject *raw;
693
694 PyObject *weakreflist;
695 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000696} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698/* A couple of specialized cases in order to bypass the slow incremental
699 encoding methods for the most popular encodings. */
700
701static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000702ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703{
INADA Naoki507434f2017-12-21 09:59:53 +0900704 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705}
706
707static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000708utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100710 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900711 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100717 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900718 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719}
720
721static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000722utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723{
Antoine Pitroue4501852009-05-14 18:55:55 +0000724 if (!self->encoding_start_of_stream) {
725 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200726#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000727 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100732 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900733 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734}
735
Antoine Pitroue4501852009-05-14 18:55:55 +0000736static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000737utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000738{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100739 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900740 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000741}
742
743static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000744utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000745{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100746 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900747 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000748}
749
750static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000751utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000752{
753 if (!self->encoding_start_of_stream) {
754 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200755#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000756 return utf32be_encode(self, text);
757#else
758 return utf32le_encode(self, text);
759#endif
760 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100761 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900762 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000763}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764
765static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000766utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767{
INADA Naoki507434f2017-12-21 09:59:53 +0900768 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769}
770
771static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000772latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000773{
INADA Naoki507434f2017-12-21 09:59:53 +0900774 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000775}
776
777/* Map normalized encoding names onto the specialized encoding funcs */
778
779typedef struct {
780 const char *name;
781 encodefunc_t encodefunc;
782} encodefuncentry;
783
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200784static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 {"ascii", (encodefunc_t) ascii_encode},
786 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000787 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788 {"utf-16-be", (encodefunc_t) utf16be_encode},
789 {"utf-16-le", (encodefunc_t) utf16le_encode},
790 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000791 {"utf-32-be", (encodefunc_t) utf32be_encode},
792 {"utf-32-le", (encodefunc_t) utf32le_encode},
793 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 {NULL, NULL}
795};
796
INADA Naoki507434f2017-12-21 09:59:53 +0900797static int
798validate_newline(const char *newline)
799{
800 if (newline && newline[0] != '\0'
801 && !(newline[0] == '\n' && newline[1] == '\0')
802 && !(newline[0] == '\r' && newline[1] == '\0')
803 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
804 PyErr_Format(PyExc_ValueError,
805 "illegal newline value: %s", newline);
806 return -1;
807 }
808 return 0;
809}
810
811static int
812set_newline(textio *self, const char *newline)
813{
814 PyObject *old = self->readnl;
815 if (newline == NULL) {
816 self->readnl = NULL;
817 }
818 else {
819 self->readnl = PyUnicode_FromString(newline);
820 if (self->readnl == NULL) {
821 self->readnl = old;
822 return -1;
823 }
824 }
825 self->readuniversal = (newline == NULL || newline[0] == '\0');
826 self->readtranslate = (newline == NULL);
827 self->writetranslate = (newline == NULL || newline[0] != '\0');
828 if (!self->readuniversal && self->readnl != NULL) {
829 // validate_newline() accepts only ASCII newlines.
830 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
831 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
832 if (strcmp(self->writenl, "\n") == 0) {
833 self->writenl = NULL;
834 }
835 }
836 else {
837#ifdef MS_WINDOWS
838 self->writenl = "\r\n";
839#else
840 self->writenl = NULL;
841#endif
842 }
843 Py_XDECREF(old);
844 return 0;
845}
846
847static int
848_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
849 const char *errors)
850{
851 PyObject *res;
852 int r;
853
854 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
855 if (res == NULL)
856 return -1;
857
858 r = PyObject_IsTrue(res);
859 Py_DECREF(res);
860 if (r == -1)
861 return -1;
862
863 if (r != 1)
864 return 0;
865
866 Py_CLEAR(self->decoder);
867 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
868 if (self->decoder == NULL)
869 return -1;
870
871 if (self->readuniversal) {
872 PyObject *incrementalDecoder = PyObject_CallFunction(
873 (PyObject *)&PyIncrementalNewlineDecoder_Type,
874 "Oi", self->decoder, (int)self->readtranslate);
875 if (incrementalDecoder == NULL)
876 return -1;
877 Py_CLEAR(self->decoder);
878 self->decoder = incrementalDecoder;
879 }
880
881 return 0;
882}
883
884static PyObject*
885_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
886{
887 PyObject *chars;
888
889 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
890 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
891 else
892 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
893 eof ? Py_True : Py_False, NULL);
894
895 if (check_decoded(chars) < 0)
896 // check_decoded already decreases refcount
897 return NULL;
898
899 return chars;
900}
901
902static int
903_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
904 const char *errors)
905{
906 PyObject *res;
907 int r;
908
909 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
910 if (res == NULL)
911 return -1;
912
913 r = PyObject_IsTrue(res);
914 Py_DECREF(res);
915 if (r == -1)
916 return -1;
917
918 if (r != 1)
919 return 0;
920
921 Py_CLEAR(self->encoder);
922 self->encodefunc = NULL;
923 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
924 if (self->encoder == NULL)
925 return -1;
926
927 /* Get the normalized named of the codec */
928 res = _PyObject_GetAttrId(codec_info, &PyId_name);
929 if (res == NULL) {
930 if (PyErr_ExceptionMatches(PyExc_AttributeError))
931 PyErr_Clear();
932 else
933 return -1;
934 }
935 else if (PyUnicode_Check(res)) {
936 const encodefuncentry *e = encodefuncs;
937 while (e->name != NULL) {
938 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
939 self->encodefunc = e->encodefunc;
940 break;
941 }
942 e++;
943 }
944 }
945 Py_XDECREF(res);
946
947 return 0;
948}
949
950static int
951_textiowrapper_fix_encoder_state(textio *self)
952{
953 if (!self->seekable || !self->encoder) {
954 return 0;
955 }
956
957 self->encoding_start_of_stream = 1;
958
959 PyObject *cookieObj = PyObject_CallMethodObjArgs(
960 self->buffer, _PyIO_str_tell, NULL);
961 if (cookieObj == NULL) {
962 return -1;
963 }
964
965 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
966 Py_DECREF(cookieObj);
967 if (cmp < 0) {
968 return -1;
969 }
970
971 if (cmp == 0) {
972 self->encoding_start_of_stream = 0;
973 PyObject *res = PyObject_CallMethodObjArgs(
974 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
975 if (res == NULL) {
976 return -1;
977 }
978 Py_DECREF(res);
979 }
980
981 return 0;
982}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000983
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300984/*[clinic input]
985_io.TextIOWrapper.__init__
986 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700987 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900988 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700989 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200990 line_buffering: bool(accept={int}) = False
991 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300993Character and line based layer over a BufferedIOBase object, buffer.
994
995encoding gives the name of the encoding that the stream will be
996decoded or encoded with. It defaults to locale.getpreferredencoding(False).
997
998errors determines the strictness of encoding and decoding (see
999help(codecs.Codec) or the documentation for codecs.register) and
1000defaults to "strict".
1001
1002newline controls how line endings are handled. It can be None, '',
1003'\n', '\r', and '\r\n'. It works as follows:
1004
1005* On input, if newline is None, universal newlines mode is
1006 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1007 these are translated into '\n' before being returned to the
1008 caller. If it is '', universal newline mode is enabled, but line
1009 endings are returned to the caller untranslated. If it has any of
1010 the other legal values, input lines are only terminated by the given
1011 string, and the line ending is returned to the caller untranslated.
1012
1013* On output, if newline is None, any '\n' characters written are
1014 translated to the system default line separator, os.linesep. If
1015 newline is '' or '\n', no translation takes place. If newline is any
1016 of the other legal values, any '\n' characters written are translated
1017 to the given string.
1018
1019If line_buffering is True, a call to flush is implied when a call to
1020write contains a newline character.
1021[clinic start generated code]*/
1022
1023static int
1024_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001025 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001026 const char *newline, int line_buffering,
1027 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001028/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001029{
1030 PyObject *raw, *codec_info = NULL;
1031 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 PyObject *res;
1033 int r;
1034
1035 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001036 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037
INADA Naoki507434f2017-12-21 09:59:53 +09001038 if (errors == Py_None) {
1039 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
1040 }
1041 else if (!PyUnicode_Check(errors)) {
1042 // Check 'errors' argument here because Argument Clinic doesn't support
1043 // 'str(accept={str, NoneType})' converter.
1044 PyErr_Format(
1045 PyExc_TypeError,
1046 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1047 errors->ob_type->tp_name);
1048 return -1;
1049 }
1050
1051 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001052 return -1;
1053 }
1054
1055 Py_CLEAR(self->buffer);
1056 Py_CLEAR(self->encoding);
1057 Py_CLEAR(self->encoder);
1058 Py_CLEAR(self->decoder);
1059 Py_CLEAR(self->readnl);
1060 Py_CLEAR(self->decoded_chars);
1061 Py_CLEAR(self->pending_bytes);
1062 Py_CLEAR(self->snapshot);
1063 Py_CLEAR(self->errors);
1064 Py_CLEAR(self->raw);
1065 self->decoded_chars_used = 0;
1066 self->pending_bytes_count = 0;
1067 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001068 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069
1070 if (encoding == NULL) {
1071 /* Try os.device_encoding(fileno) */
1072 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001073 state = IO_STATE();
1074 if (state == NULL)
1075 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001076 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077 /* Ignore only AttributeError and UnsupportedOperation */
1078 if (fileno == NULL) {
1079 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1080 PyErr_ExceptionMatches(state->unsupported_operation)) {
1081 PyErr_Clear();
1082 }
1083 else {
1084 goto error;
1085 }
1086 }
1087 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001088 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001089 Py_DECREF(fileno);
1090 if (fd == -1 && PyErr_Occurred()) {
1091 goto error;
1092 }
1093
1094 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001095 if (self->encoding == NULL)
1096 goto error;
1097 else if (!PyUnicode_Check(self->encoding))
1098 Py_CLEAR(self->encoding);
1099 }
1100 }
1101 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001102 PyObject *locale_module = _PyIO_get_locale_module(state);
1103 if (locale_module == NULL)
1104 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001105 self->encoding = _PyObject_CallMethodIdObjArgs(
1106 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001107 Py_DECREF(locale_module);
1108 if (self->encoding == NULL) {
1109 catch_ImportError:
1110 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001111 Importing locale can raise an ImportError because of
1112 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001113 ImportError if _locale is not available. These will happen
1114 during module building.
1115 */
1116 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1117 PyErr_Clear();
1118 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001120 else
1121 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001123 else if (!PyUnicode_Check(self->encoding))
1124 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001126 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001127 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001128 if (encoding == NULL)
1129 goto error;
1130 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001131 else if (encoding != NULL) {
1132 self->encoding = PyUnicode_FromString(encoding);
1133 if (self->encoding == NULL)
1134 goto error;
1135 }
1136 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001137 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001139 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 }
1141
Nick Coghlana9b15242014-02-04 22:11:18 +10001142 /* Check we have been asked for a real text encoding */
1143 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1144 if (codec_info == NULL) {
1145 Py_CLEAR(self->encoding);
1146 goto error;
1147 }
1148
1149 /* XXX: Failures beyond this point have the potential to leak elements
1150 * of the partially constructed object (like self->encoding)
1151 */
1152
INADA Naoki507434f2017-12-21 09:59:53 +09001153 Py_INCREF(errors);
1154 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001157 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001158 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 }
1161
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 self->buffer = buffer;
1163 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001164
INADA Naoki507434f2017-12-21 09:59:53 +09001165 /* Build the decoder object */
1166 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1167 goto error;
1168
1169 /* Build the encoder object */
1170 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1171 goto error;
1172
1173 /* Finished sorting out the codec details */
1174 Py_CLEAR(codec_info);
1175
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1177 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1178 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001179 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001181 if (raw == NULL) {
1182 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1183 PyErr_Clear();
1184 else
1185 goto error;
1186 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 else if (Py_TYPE(raw) == &PyFileIO_Type)
1188 self->raw = raw;
1189 else
1190 Py_DECREF(raw);
1191 }
1192
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001193 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001194 if (res == NULL)
1195 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001196 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001198 if (r < 0)
1199 goto error;
1200 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201
Martin v. Löwis767046a2011-10-14 15:35:36 +02001202 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001203
Antoine Pitroue4501852009-05-14 18:55:55 +00001204 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001205 if (_textiowrapper_fix_encoder_state(self) < 0) {
1206 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001207 }
1208
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 self->ok = 1;
1210 return 0;
1211
1212 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001213 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 return -1;
1215}
1216
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001217/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1218 * -1 on error.
1219 */
1220static int
1221convert_optional_bool(PyObject *obj, int default_value)
1222{
1223 long v;
1224 if (obj == Py_None) {
1225 v = default_value;
1226 }
1227 else {
1228 v = PyLong_AsLong(obj);
1229 if (v == -1 && PyErr_Occurred())
1230 return -1;
1231 }
1232 return v != 0;
1233}
1234
INADA Naoki507434f2017-12-21 09:59:53 +09001235static int
1236textiowrapper_change_encoding(textio *self, PyObject *encoding,
1237 PyObject *errors, int newline_changed)
1238{
1239 /* Use existing settings where new settings are not specified */
1240 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1241 return 0; // no change
1242 }
1243
1244 if (encoding == Py_None) {
1245 encoding = self->encoding;
1246 if (errors == Py_None) {
1247 errors = self->errors;
1248 }
1249 }
1250 else if (errors == Py_None) {
1251 errors = _PyUnicode_FromId(&PyId_strict);
1252 }
1253
1254 const char *c_errors = PyUnicode_AsUTF8(errors);
1255 if (c_errors == NULL) {
1256 return -1;
1257 }
1258
1259 // Create new encoder & decoder
1260 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1261 PyUnicode_AsUTF8(encoding), "codecs.open()");
1262 if (codec_info == NULL) {
1263 return -1;
1264 }
1265 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1266 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1267 Py_DECREF(codec_info);
1268 return -1;
1269 }
1270 Py_DECREF(codec_info);
1271
1272 Py_INCREF(encoding);
1273 Py_INCREF(errors);
1274 Py_SETREF(self->encoding, encoding);
1275 Py_SETREF(self->errors, errors);
1276
1277 return _textiowrapper_fix_encoder_state(self);
1278}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001279
1280/*[clinic input]
1281_io.TextIOWrapper.reconfigure
1282 *
INADA Naoki507434f2017-12-21 09:59:53 +09001283 encoding: object = None
1284 errors: object = None
1285 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001286 line_buffering as line_buffering_obj: object = None
1287 write_through as write_through_obj: object = None
1288
1289Reconfigure the text stream with new parameters.
1290
1291This also does an implicit stream flush.
1292
1293[clinic start generated code]*/
1294
1295static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001296_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1297 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001298 PyObject *line_buffering_obj,
1299 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001300/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001301{
1302 int line_buffering;
1303 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001304 const char *newline = NULL;
1305
1306 /* Check if something is in the read buffer */
1307 if (self->decoded_chars != NULL) {
1308 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1309 _unsupported("It is not possible to set the encoding or newline"
1310 "of stream after the first read");
1311 return NULL;
1312 }
1313 }
1314
1315 if (newline_obj != NULL && newline_obj != Py_None) {
1316 newline = PyUnicode_AsUTF8(newline_obj);
1317 if (newline == NULL || validate_newline(newline) < 0) {
1318 return NULL;
1319 }
1320 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001321
1322 line_buffering = convert_optional_bool(line_buffering_obj,
1323 self->line_buffering);
1324 write_through = convert_optional_bool(write_through_obj,
1325 self->write_through);
1326 if (line_buffering < 0 || write_through < 0) {
1327 return NULL;
1328 }
INADA Naoki507434f2017-12-21 09:59:53 +09001329
1330 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001331 if (res == NULL) {
1332 return NULL;
1333 }
INADA Naoki507434f2017-12-21 09:59:53 +09001334 Py_DECREF(res);
1335 self->b2cratio = 0;
1336
1337 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1338 return NULL;
1339 }
1340
1341 if (textiowrapper_change_encoding(
1342 self, encoding, errors, newline_obj != NULL) < 0) {
1343 return NULL;
1344 }
1345
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001346 self->line_buffering = line_buffering;
1347 self->write_through = write_through;
1348 Py_RETURN_NONE;
1349}
1350
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001352textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001354 self->ok = 0;
1355 Py_CLEAR(self->buffer);
1356 Py_CLEAR(self->encoding);
1357 Py_CLEAR(self->encoder);
1358 Py_CLEAR(self->decoder);
1359 Py_CLEAR(self->readnl);
1360 Py_CLEAR(self->decoded_chars);
1361 Py_CLEAR(self->pending_bytes);
1362 Py_CLEAR(self->snapshot);
1363 Py_CLEAR(self->errors);
1364 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001365
1366 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367 return 0;
1368}
1369
1370static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001371textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001373 self->finalizing = 1;
1374 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001376 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 _PyObject_GC_UNTRACK(self);
1378 if (self->weakreflist != NULL)
1379 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001380 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381 Py_TYPE(self)->tp_free((PyObject *)self);
1382}
1383
1384static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001385textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001386{
1387 Py_VISIT(self->buffer);
1388 Py_VISIT(self->encoding);
1389 Py_VISIT(self->encoder);
1390 Py_VISIT(self->decoder);
1391 Py_VISIT(self->readnl);
1392 Py_VISIT(self->decoded_chars);
1393 Py_VISIT(self->pending_bytes);
1394 Py_VISIT(self->snapshot);
1395 Py_VISIT(self->errors);
1396 Py_VISIT(self->raw);
1397
1398 Py_VISIT(self->dict);
1399 return 0;
1400}
1401
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001403textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404
1405/* This macro takes some shortcuts to make the common case faster. */
1406#define CHECK_CLOSED(self) \
1407 do { \
1408 int r; \
1409 PyObject *_res; \
1410 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1411 if (self->raw != NULL) \
1412 r = _PyFileIO_closed(self->raw); \
1413 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001414 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001415 if (_res == NULL) \
1416 return NULL; \
1417 r = PyObject_IsTrue(_res); \
1418 Py_DECREF(_res); \
1419 if (r < 0) \
1420 return NULL; \
1421 } \
1422 if (r > 0) { \
1423 PyErr_SetString(PyExc_ValueError, \
1424 "I/O operation on closed file."); \
1425 return NULL; \
1426 } \
1427 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001428 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429 return NULL; \
1430 } while (0)
1431
1432#define CHECK_INITIALIZED(self) \
1433 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001434 PyErr_SetString(PyExc_ValueError, \
1435 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 return NULL; \
1437 }
1438
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001439#define CHECK_ATTACHED(self) \
1440 CHECK_INITIALIZED(self); \
1441 if (self->detached) { \
1442 PyErr_SetString(PyExc_ValueError, \
1443 "underlying buffer has been detached"); \
1444 return NULL; \
1445 }
1446
1447#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001449 PyErr_SetString(PyExc_ValueError, \
1450 "I/O operation on uninitialized object"); \
1451 return -1; \
1452 } else if (self->detached) { \
1453 PyErr_SetString(PyExc_ValueError, \
1454 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 return -1; \
1456 }
1457
1458
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001459/*[clinic input]
1460_io.TextIOWrapper.detach
1461[clinic start generated code]*/
1462
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001463static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001464_io_TextIOWrapper_detach_impl(textio *self)
1465/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001466{
1467 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001468 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001469 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1470 if (res == NULL)
1471 return NULL;
1472 Py_DECREF(res);
1473 buffer = self->buffer;
1474 self->buffer = NULL;
1475 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001476 return buffer;
1477}
1478
Antoine Pitrou24f36292009-03-28 22:16:42 +00001479/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001480 underlying buffered object, though. */
1481static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001482_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001484 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001485
1486 if (self->pending_bytes == NULL)
1487 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001488
1489 pending = self->pending_bytes;
1490 Py_INCREF(pending);
1491 self->pending_bytes_count = 0;
1492 Py_CLEAR(self->pending_bytes);
1493
1494 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1495 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496 if (b == NULL)
1497 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001498 ret = NULL;
1499 do {
1500 ret = PyObject_CallMethodObjArgs(self->buffer,
1501 _PyIO_str_write, b, NULL);
1502 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 Py_DECREF(b);
1504 if (ret == NULL)
1505 return -1;
1506 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001507 return 0;
1508}
1509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001510/*[clinic input]
1511_io.TextIOWrapper.write
1512 text: unicode
1513 /
1514[clinic start generated code]*/
1515
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001517_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1518/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001519{
1520 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 PyObject *b;
1522 Py_ssize_t textlen;
1523 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001524 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526 if (PyUnicode_READY(text) == -1)
1527 return NULL;
1528
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001529 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001530 CHECK_CLOSED(self);
1531
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001532 if (self->encoder == NULL)
1533 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001534
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001535 Py_INCREF(text);
1536
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001537 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538
1539 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001540 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 haslf = 1;
1542
1543 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001544 PyObject *newtext = _PyObject_CallMethodId(
1545 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 Py_DECREF(text);
1547 if (newtext == NULL)
1548 return NULL;
1549 text = newtext;
1550 }
1551
Antoine Pitroue96ec682011-07-23 21:46:35 +02001552 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001553 text_needflush = 1;
1554 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001557 needflush = 1;
1558
1559 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001560 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001562 self->encoding_start_of_stream = 0;
1563 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 else
1565 b = PyObject_CallMethodObjArgs(self->encoder,
1566 _PyIO_str_encode, text, NULL);
1567 Py_DECREF(text);
1568 if (b == NULL)
1569 return NULL;
Oren Milmana5b4ea12017-08-25 21:14:54 +03001570 if (!PyBytes_Check(b)) {
1571 PyErr_Format(PyExc_TypeError,
1572 "encoder should return a bytes object, not '%.200s'",
1573 Py_TYPE(b)->tp_name);
1574 Py_DECREF(b);
1575 return NULL;
1576 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577
1578 if (self->pending_bytes == NULL) {
1579 self->pending_bytes = PyList_New(0);
1580 if (self->pending_bytes == NULL) {
1581 Py_DECREF(b);
1582 return NULL;
1583 }
1584 self->pending_bytes_count = 0;
1585 }
1586 if (PyList_Append(self->pending_bytes, b) < 0) {
1587 Py_DECREF(b);
1588 return NULL;
1589 }
1590 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1591 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001592 if (self->pending_bytes_count > self->chunk_size || needflush ||
1593 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001594 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595 return NULL;
1596 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001597
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598 if (needflush) {
1599 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1600 if (ret == NULL)
1601 return NULL;
1602 Py_DECREF(ret);
1603 }
1604
1605 Py_CLEAR(self->snapshot);
1606
1607 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001608 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 if (ret == NULL)
1610 return NULL;
1611 Py_DECREF(ret);
1612 }
1613
1614 return PyLong_FromSsize_t(textlen);
1615}
1616
1617/* Steal a reference to chars and store it in the decoded_char buffer;
1618 */
1619static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001620textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001622 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 self->decoded_chars_used = 0;
1624}
1625
1626static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001627textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628{
1629 PyObject *chars;
1630 Py_ssize_t avail;
1631
1632 if (self->decoded_chars == NULL)
1633 return PyUnicode_FromStringAndSize(NULL, 0);
1634
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001635 /* decoded_chars is guaranteed to be "ready". */
1636 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637 - self->decoded_chars_used);
1638
1639 assert(avail >= 0);
1640
1641 if (n < 0 || n > avail)
1642 n = avail;
1643
1644 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 chars = PyUnicode_Substring(self->decoded_chars,
1646 self->decoded_chars_used,
1647 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 if (chars == NULL)
1649 return NULL;
1650 }
1651 else {
1652 chars = self->decoded_chars;
1653 Py_INCREF(chars);
1654 }
1655
1656 self->decoded_chars_used += n;
1657 return chars;
1658}
1659
1660/* Read and decode the next chunk of data from the BufferedReader.
1661 */
1662static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001663textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664{
1665 PyObject *dec_buffer = NULL;
1666 PyObject *dec_flags = NULL;
1667 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001668 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001670 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 int eof;
1672
1673 /* The return value is True unless EOF was reached. The decoded string is
1674 * placed in self._decoded_chars (replacing its previous value). The
1675 * entire input chunk is sent to the decoder, though some of it may remain
1676 * buffered in the decoder, yet to be converted.
1677 */
1678
1679 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001680 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 return -1;
1682 }
1683
1684 if (self->telling) {
1685 /* To prepare for tell(), we need to snapshot a point in the file
1686 * where the decoder's input buffer is empty.
1687 */
1688
1689 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1690 _PyIO_str_getstate, NULL);
1691 if (state == NULL)
1692 return -1;
1693 /* Given this, we know there was a valid snapshot point
1694 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1695 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001696 if (!PyTuple_Check(state)) {
1697 PyErr_SetString(PyExc_TypeError,
1698 "illegal decoder state");
1699 Py_DECREF(state);
1700 return -1;
1701 }
1702 if (!PyArg_ParseTuple(state,
1703 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1704 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 Py_DECREF(state);
1706 return -1;
1707 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001708
1709 if (!PyBytes_Check(dec_buffer)) {
1710 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001711 "illegal decoder state: the first item should be a "
1712 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001713 Py_TYPE(dec_buffer)->tp_name);
1714 Py_DECREF(state);
1715 return -1;
1716 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 Py_INCREF(dec_buffer);
1718 Py_INCREF(dec_flags);
1719 Py_DECREF(state);
1720 }
1721
1722 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001723 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001724 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001725 }
1726 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727 if (chunk_size == NULL)
1728 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001731 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1732 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001733 Py_DECREF(chunk_size);
1734 if (input_chunk == NULL)
1735 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001736
1737 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001738 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001739 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001740 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1741 Py_TYPE(input_chunk)->tp_name);
1742 goto fail;
1743 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744
Antoine Pitroub8503892014-04-29 10:14:02 +02001745 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001746 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747
INADA Naoki507434f2017-12-21 09:59:53 +09001748 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1749 PyBuffer_Release(&input_chunk_buf);
1750 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001751 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001752
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001753 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001754 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001755 if (nchars > 0)
1756 self->b2cratio = (double) nbytes / nchars;
1757 else
1758 self->b2cratio = 0.0;
1759 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001760 eof = 0;
1761
1762 if (self->telling) {
1763 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1764 * next input to be decoded is dec_buffer + input_chunk.
1765 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001766 PyObject *next_input = dec_buffer;
1767 PyBytes_Concat(&next_input, input_chunk);
1768 if (next_input == NULL) {
1769 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001770 goto fail;
1771 }
Serhiy Storchaka48842712016-04-06 09:45:48 +03001772 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 }
1774 Py_DECREF(input_chunk);
1775
1776 return (eof == 0);
1777
1778 fail:
1779 Py_XDECREF(dec_buffer);
1780 Py_XDECREF(dec_flags);
1781 Py_XDECREF(input_chunk);
1782 return -1;
1783}
1784
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001785/*[clinic input]
1786_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001787 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001788 /
1789[clinic start generated code]*/
1790
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001792_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001793/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795 PyObject *result = NULL, *chunks = NULL;
1796
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001797 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798 CHECK_CLOSED(self);
1799
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001800 if (self->decoder == NULL)
1801 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001802
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001803 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 return NULL;
1805
1806 if (n < 0) {
1807 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001808 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 PyObject *decoded;
1810 if (bytes == NULL)
1811 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001812
1813 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1814 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1815 bytes, 1);
1816 else
1817 decoded = PyObject_CallMethodObjArgs(
1818 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001819 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001820 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001821 goto fail;
1822
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001823 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001824
1825 if (result == NULL) {
1826 Py_DECREF(decoded);
1827 return NULL;
1828 }
1829
1830 PyUnicode_AppendAndDel(&result, decoded);
1831 if (result == NULL)
1832 goto fail;
1833
1834 Py_CLEAR(self->snapshot);
1835 return result;
1836 }
1837 else {
1838 int res = 1;
1839 Py_ssize_t remaining = n;
1840
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001841 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 if (result == NULL)
1843 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001844 if (PyUnicode_READY(result) == -1)
1845 goto fail;
1846 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847
1848 /* Keep reading chunks until we have n characters to return */
1849 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001850 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001851 if (res < 0) {
1852 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1853 when EINTR occurs so we needn't do it ourselves. */
1854 if (_PyIO_trap_eintr()) {
1855 continue;
1856 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001857 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001858 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 if (res == 0) /* EOF */
1860 break;
1861 if (chunks == NULL) {
1862 chunks = PyList_New(0);
1863 if (chunks == NULL)
1864 goto fail;
1865 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001866 if (PyUnicode_GET_LENGTH(result) > 0 &&
1867 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868 goto fail;
1869 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001870 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871 if (result == NULL)
1872 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001873 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874 }
1875 if (chunks != NULL) {
1876 if (result != NULL && PyList_Append(chunks, result) < 0)
1877 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001878 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001879 if (result == NULL)
1880 goto fail;
1881 Py_CLEAR(chunks);
1882 }
1883 return result;
1884 }
1885 fail:
1886 Py_XDECREF(result);
1887 Py_XDECREF(chunks);
1888 return NULL;
1889}
1890
1891
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001892/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 that is to the NUL character. Otherwise the function will produce
1894 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001895static const char *
1896find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001898 if (kind == PyUnicode_1BYTE_KIND) {
1899 assert(ch < 256);
1900 return (char *) memchr((void *) s, (char) ch, end - s);
1901 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001902 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001903 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001904 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001905 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906 return s;
1907 if (s == end)
1908 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001909 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 }
1911}
1912
1913Py_ssize_t
1914_PyIO_find_line_ending(
1915 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001916 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001918 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919
1920 if (translated) {
1921 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001922 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001924 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925 else {
1926 *consumed = len;
1927 return -1;
1928 }
1929 }
1930 else if (universal) {
1931 /* Universal newline search. Find any of \r, \r\n, \n
1932 * The decoder ensures that \r\n are not split in two pieces
1933 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001934 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001936 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001938 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001939 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001940 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941 if (s >= end) {
1942 *consumed = len;
1943 return -1;
1944 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001945 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001946 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001948 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001950 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001951 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001953 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954 }
1955 }
1956 }
1957 else {
1958 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001959 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001960 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001961 /* Assume that readnl is an ASCII character. */
1962 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001964 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001966 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 *consumed = len;
1968 return -1;
1969 }
1970 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001971 const char *s = start;
1972 const char *e = end - (readnl_len - 1)*kind;
1973 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 if (e < s)
1975 e = s;
1976 while (s < e) {
1977 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001978 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979 if (pos == NULL || pos >= e)
1980 break;
1981 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001982 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983 break;
1984 }
1985 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001986 return (pos - start)/kind + readnl_len;
1987 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001989 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990 if (pos == NULL)
1991 *consumed = len;
1992 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001993 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 return -1;
1995 }
1996 }
1997}
1998
1999static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002000_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001{
2002 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2003 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2004 int res;
2005
2006 CHECK_CLOSED(self);
2007
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002008 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 return NULL;
2010
2011 chunked = 0;
2012
2013 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002014 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002016 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 Py_ssize_t consumed = 0;
2018
2019 /* First, get some data if necessary */
2020 res = 1;
2021 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002022 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002023 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002024 if (res < 0) {
2025 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2026 when EINTR occurs so we needn't do it ourselves. */
2027 if (_PyIO_trap_eintr()) {
2028 continue;
2029 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002031 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 if (res == 0)
2033 break;
2034 }
2035 if (res == 0) {
2036 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002037 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 Py_CLEAR(self->snapshot);
2039 start = endpos = offset_to_buffer = 0;
2040 break;
2041 }
2042
2043 if (remaining == NULL) {
2044 line = self->decoded_chars;
2045 start = self->decoded_chars_used;
2046 offset_to_buffer = 0;
2047 Py_INCREF(line);
2048 }
2049 else {
2050 assert(self->decoded_chars_used == 0);
2051 line = PyUnicode_Concat(remaining, self->decoded_chars);
2052 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002053 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 Py_CLEAR(remaining);
2055 if (line == NULL)
2056 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002057 if (PyUnicode_READY(line) == -1)
2058 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059 }
2060
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002061 ptr = PyUnicode_DATA(line);
2062 line_len = PyUnicode_GET_LENGTH(line);
2063 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064
2065 endpos = _PyIO_find_line_ending(
2066 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002067 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002068 ptr + kind * start,
2069 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002070 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 if (endpos >= 0) {
2072 endpos += start;
2073 if (limit >= 0 && (endpos - start) + chunked >= limit)
2074 endpos = start + limit - chunked;
2075 break;
2076 }
2077
2078 /* We can put aside up to `endpos` */
2079 endpos = consumed + start;
2080 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2081 /* Didn't find line ending, but reached length limit */
2082 endpos = start + limit - chunked;
2083 break;
2084 }
2085
2086 if (endpos > start) {
2087 /* No line ending seen yet - put aside current data */
2088 PyObject *s;
2089 if (chunks == NULL) {
2090 chunks = PyList_New(0);
2091 if (chunks == NULL)
2092 goto error;
2093 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002094 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 if (s == NULL)
2096 goto error;
2097 if (PyList_Append(chunks, s) < 0) {
2098 Py_DECREF(s);
2099 goto error;
2100 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002101 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 Py_DECREF(s);
2103 }
2104 /* There may be some remaining bytes we'll have to prepend to the
2105 next chunk of data */
2106 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002107 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 if (remaining == NULL)
2109 goto error;
2110 }
2111 Py_CLEAR(line);
2112 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002113 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 }
2115
2116 if (line != NULL) {
2117 /* Our line ends in the current buffer */
2118 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002119 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2120 PyObject *s = PyUnicode_Substring(line, start, endpos);
2121 Py_CLEAR(line);
2122 if (s == NULL)
2123 goto error;
2124 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125 }
2126 }
2127 if (remaining != NULL) {
2128 if (chunks == NULL) {
2129 chunks = PyList_New(0);
2130 if (chunks == NULL)
2131 goto error;
2132 }
2133 if (PyList_Append(chunks, remaining) < 0)
2134 goto error;
2135 Py_CLEAR(remaining);
2136 }
2137 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002138 if (line != NULL) {
2139 if (PyList_Append(chunks, line) < 0)
2140 goto error;
2141 Py_DECREF(line);
2142 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2144 if (line == NULL)
2145 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002146 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002148 if (line == NULL) {
2149 Py_INCREF(_PyIO_empty_str);
2150 line = _PyIO_empty_str;
2151 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152
2153 return line;
2154
2155 error:
2156 Py_XDECREF(chunks);
2157 Py_XDECREF(remaining);
2158 Py_XDECREF(line);
2159 return NULL;
2160}
2161
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002162/*[clinic input]
2163_io.TextIOWrapper.readline
2164 size: Py_ssize_t = -1
2165 /
2166[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002168static PyObject *
2169_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2170/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2171{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002172 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002173 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002174}
2175
2176/* Seek and Tell */
2177
2178typedef struct {
2179 Py_off_t start_pos;
2180 int dec_flags;
2181 int bytes_to_feed;
2182 int chars_to_skip;
2183 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002184} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185
2186/*
2187 To speed up cookie packing/unpacking, we store the fields in a temporary
2188 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2189 The following macros define at which offsets in the intermediary byte
2190 string the various CookieStruct fields will be stored.
2191 */
2192
2193#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2194
Christian Heimes743e0cd2012-10-17 23:52:17 +02002195#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196/* We want the least significant byte of start_pos to also be the least
2197 significant byte of the cookie, which means that in big-endian mode we
2198 must copy the fields in reverse order. */
2199
2200# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2201# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2202# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2203# define OFF_CHARS_TO_SKIP (sizeof(char))
2204# define OFF_NEED_EOF 0
2205
2206#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002207/* Little-endian mode: the least significant byte of start_pos will
2208 naturally end up the least significant byte of the cookie. */
2209
2210# define OFF_START_POS 0
2211# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2212# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2213# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2214# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2215
2216#endif
2217
2218static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002219textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220{
2221 unsigned char buffer[COOKIE_BUF_LEN];
2222 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2223 if (cookieLong == NULL)
2224 return -1;
2225
2226 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002227 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002228 Py_DECREF(cookieLong);
2229 return -1;
2230 }
2231 Py_DECREF(cookieLong);
2232
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002233 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2234 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2235 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2236 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2237 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002238
2239 return 0;
2240}
2241
2242static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002243textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002244{
2245 unsigned char buffer[COOKIE_BUF_LEN];
2246
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002247 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2248 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2249 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2250 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2251 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252
Christian Heimes743e0cd2012-10-17 23:52:17 +02002253 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2254 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256
2257static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002258_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259{
2260 PyObject *res;
2261 /* When seeking to the start of the stream, we call decoder.reset()
2262 rather than decoder.getstate().
2263 This is for a few decoders such as utf-16 for which the state value
2264 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2265 utf-16, that we are expecting a BOM).
2266 */
2267 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2268 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2269 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002270 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2271 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272 if (res == NULL)
2273 return -1;
2274 Py_DECREF(res);
2275 return 0;
2276}
2277
Antoine Pitroue4501852009-05-14 18:55:55 +00002278static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002279_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002280{
2281 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002282 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002283 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2284 self->encoding_start_of_stream = 1;
2285 }
2286 else {
2287 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002288 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002289 self->encoding_start_of_stream = 0;
2290 }
2291 if (res == NULL)
2292 return -1;
2293 Py_DECREF(res);
2294 return 0;
2295}
2296
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002297static int
2298_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2299{
2300 /* Same as _textiowrapper_decoder_setstate() above. */
2301 return _textiowrapper_encoder_reset(
2302 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2303}
2304
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002305/*[clinic input]
2306_io.TextIOWrapper.seek
2307 cookie as cookieObj: object
2308 whence: int = 0
2309 /
2310[clinic start generated code]*/
2311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002313_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2314/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002316 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002317 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318 PyObject *res;
2319 int cmp;
2320
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002321 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322 CHECK_CLOSED(self);
2323
2324 Py_INCREF(cookieObj);
2325
2326 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002327 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002328 goto fail;
2329 }
2330
2331 if (whence == 1) {
2332 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002333 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002334 if (cmp < 0)
2335 goto fail;
2336
2337 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002338 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002339 goto fail;
2340 }
2341
2342 /* Seeking to the current position should attempt to
2343 * sync the underlying buffer with the current position.
2344 */
2345 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002346 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347 if (cookieObj == NULL)
2348 goto fail;
2349 }
2350 else if (whence == 2) {
2351 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002352 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002353 if (cmp < 0)
2354 goto fail;
2355
2356 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002357 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358 goto fail;
2359 }
2360
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002361 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002362 if (res == NULL)
2363 goto fail;
2364 Py_DECREF(res);
2365
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002366 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002367 Py_CLEAR(self->snapshot);
2368 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002369 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 if (res == NULL)
2371 goto fail;
2372 Py_DECREF(res);
2373 }
2374
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002375 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002376 Py_CLEAR(cookieObj);
2377 if (res == NULL)
2378 goto fail;
2379 if (self->encoder) {
2380 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002381 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002382 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2383 Py_DECREF(res);
2384 goto fail;
2385 }
2386 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387 return res;
2388 }
2389 else if (whence != 0) {
2390 PyErr_Format(PyExc_ValueError,
2391 "invalid whence (%d, should be 0, 1 or 2)", whence);
2392 goto fail;
2393 }
2394
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002395 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 if (cmp < 0)
2397 goto fail;
2398
2399 if (cmp == 1) {
2400 PyErr_Format(PyExc_ValueError,
2401 "negative seek position %R", cookieObj);
2402 goto fail;
2403 }
2404
2405 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2406 if (res == NULL)
2407 goto fail;
2408 Py_DECREF(res);
2409
2410 /* The strategy of seek() is to go back to the safe start point
2411 * and replay the effect of read(chars_to_skip) from there.
2412 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002413 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414 goto fail;
2415
2416 /* Seek back to the safe start point. */
2417 posobj = PyLong_FromOff_t(cookie.start_pos);
2418 if (posobj == NULL)
2419 goto fail;
2420 res = PyObject_CallMethodObjArgs(self->buffer,
2421 _PyIO_str_seek, posobj, NULL);
2422 Py_DECREF(posobj);
2423 if (res == NULL)
2424 goto fail;
2425 Py_DECREF(res);
2426
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002427 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002428 Py_CLEAR(self->snapshot);
2429
2430 /* Restore the decoder to its state from the safe start point. */
2431 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002432 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002433 goto fail;
2434 }
2435
2436 if (cookie.chars_to_skip) {
2437 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002438 PyObject *input_chunk = _PyObject_CallMethodId(
2439 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440 PyObject *decoded;
2441
2442 if (input_chunk == NULL)
2443 goto fail;
2444
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002445 if (!PyBytes_Check(input_chunk)) {
2446 PyErr_Format(PyExc_TypeError,
2447 "underlying read() should have returned a bytes "
2448 "object, not '%.200s'",
2449 Py_TYPE(input_chunk)->tp_name);
2450 Py_DECREF(input_chunk);
2451 goto fail;
2452 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453
2454 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2455 if (self->snapshot == NULL) {
2456 Py_DECREF(input_chunk);
2457 goto fail;
2458 }
2459
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002460 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2461 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002463 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464 goto fail;
2465
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002466 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467
2468 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002469 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002470 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471 goto fail;
2472 }
2473 self->decoded_chars_used = cookie.chars_to_skip;
2474 }
2475 else {
2476 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2477 if (self->snapshot == NULL)
2478 goto fail;
2479 }
2480
Antoine Pitroue4501852009-05-14 18:55:55 +00002481 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2482 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002483 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002484 goto fail;
2485 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 return cookieObj;
2487 fail:
2488 Py_XDECREF(cookieObj);
2489 return NULL;
2490
2491}
2492
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002493/*[clinic input]
2494_io.TextIOWrapper.tell
2495[clinic start generated code]*/
2496
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002498_io_TextIOWrapper_tell_impl(textio *self)
2499/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500{
2501 PyObject *res;
2502 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504 PyObject *next_input;
2505 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002506 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507 PyObject *saved_state = NULL;
2508 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002509 Py_ssize_t dec_buffer_len;
2510 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002512 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513 CHECK_CLOSED(self);
2514
2515 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002516 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517 goto fail;
2518 }
2519 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002520 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 "telling position disabled by next() call");
2522 goto fail;
2523 }
2524
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002525 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002527 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528 if (res == NULL)
2529 goto fail;
2530 Py_DECREF(res);
2531
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002532 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 if (posobj == NULL)
2534 goto fail;
2535
2536 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002537 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538 return posobj;
2539 }
2540
2541#if defined(HAVE_LARGEFILE_SUPPORT)
2542 cookie.start_pos = PyLong_AsLongLong(posobj);
2543#else
2544 cookie.start_pos = PyLong_AsLong(posobj);
2545#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002546 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547 if (PyErr_Occurred())
2548 goto fail;
2549
2550 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002551 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002552 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553 goto fail;
2554
2555 assert (PyBytes_Check(next_input));
2556
2557 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2558
2559 /* How many decoded characters have been used up since the snapshot? */
2560 if (self->decoded_chars_used == 0) {
2561 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 }
2564
2565 chars_to_skip = self->decoded_chars_used;
2566
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002567 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2569 _PyIO_str_getstate, NULL);
2570 if (saved_state == NULL)
2571 goto fail;
2572
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002573#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002574 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002575 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2576 _PyIO_str_getstate, NULL); \
2577 if (_state == NULL) \
2578 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002579 if (!PyTuple_Check(_state)) { \
2580 PyErr_SetString(PyExc_TypeError, \
2581 "illegal decoder state"); \
2582 Py_DECREF(_state); \
2583 goto fail; \
2584 } \
2585 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2586 &dec_buffer, &dec_flags)) \
2587 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002588 Py_DECREF(_state); \
2589 goto fail; \
2590 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002591 if (!PyBytes_Check(dec_buffer)) { \
2592 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002593 "illegal decoder state: the first item should be a " \
2594 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002595 Py_TYPE(dec_buffer)->tp_name); \
2596 Py_DECREF(_state); \
2597 goto fail; \
2598 } \
2599 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002600 Py_DECREF(_state); \
2601 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002603#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002604 PyObject *_decoded = _PyObject_CallMethodId( \
2605 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002606 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002607 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002608 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002609 Py_DECREF(_decoded); \
2610 } while (0)
2611
2612 /* Fast search for an acceptable start point, close to our
2613 current pos */
2614 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2615 skip_back = 1;
2616 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2617 input = PyBytes_AS_STRING(next_input);
2618 while (skip_bytes > 0) {
2619 /* Decode up to temptative start point */
2620 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2621 goto fail;
2622 DECODER_DECODE(input, skip_bytes, chars_decoded);
2623 if (chars_decoded <= chars_to_skip) {
2624 DECODER_GETSTATE();
2625 if (dec_buffer_len == 0) {
2626 /* Before pos and no bytes buffered in decoder => OK */
2627 cookie.dec_flags = dec_flags;
2628 chars_to_skip -= chars_decoded;
2629 break;
2630 }
2631 /* Skip back by buffered amount and reset heuristic */
2632 skip_bytes -= dec_buffer_len;
2633 skip_back = 1;
2634 }
2635 else {
2636 /* We're too far ahead, skip back a bit */
2637 skip_bytes -= skip_back;
2638 skip_back *= 2;
2639 }
2640 }
2641 if (skip_bytes <= 0) {
2642 skip_bytes = 0;
2643 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2644 goto fail;
2645 }
2646
2647 /* Note our initial start point. */
2648 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002649 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002650 if (chars_to_skip == 0)
2651 goto finally;
2652
2653 /* We should be close to the desired position. Now feed the decoder one
2654 * byte at a time until we reach the `chars_to_skip` target.
2655 * As we go, note the nearest "safe start point" before the current
2656 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657 * can safely start from there and advance to this location).
2658 */
2659 chars_decoded = 0;
2660 input = PyBytes_AS_STRING(next_input);
2661 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002662 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002664 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002666 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002667 /* We got n chars for 1 byte */
2668 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002670 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002671
2672 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2673 /* Decoder buffer is empty, so this is a safe start point. */
2674 cookie.start_pos += cookie.bytes_to_feed;
2675 chars_to_skip -= chars_decoded;
2676 cookie.dec_flags = dec_flags;
2677 cookie.bytes_to_feed = 0;
2678 chars_decoded = 0;
2679 }
2680 if (chars_decoded >= chars_to_skip)
2681 break;
2682 input++;
2683 }
2684 if (input == input_end) {
2685 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002686 PyObject *decoded = _PyObject_CallMethodId(
2687 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002688 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002690 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002691 Py_DECREF(decoded);
2692 cookie.need_eof = 1;
2693
2694 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002695 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 "can't reconstruct logical file position");
2697 goto fail;
2698 }
2699 }
2700
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002701finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002702 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703 Py_DECREF(saved_state);
2704 if (res == NULL)
2705 return NULL;
2706 Py_DECREF(res);
2707
2708 /* The returned cookie corresponds to the last safe start point. */
2709 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002710 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002711
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002712fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002713 if (saved_state) {
2714 PyObject *type, *value, *traceback;
2715 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002716 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002717 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002719 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720 }
2721 return NULL;
2722}
2723
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002724/*[clinic input]
2725_io.TextIOWrapper.truncate
2726 pos: object = None
2727 /
2728[clinic start generated code]*/
2729
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002730static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002731_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2732/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002733{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734 PyObject *res;
2735
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002736 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737
2738 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2739 if (res == NULL)
2740 return NULL;
2741 Py_DECREF(res);
2742
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002743 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002744}
2745
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002747textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002748{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002749 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002750 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002751
2752 CHECK_INITIALIZED(self);
2753
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002754 res = PyUnicode_FromString("<_io.TextIOWrapper");
2755 if (res == NULL)
2756 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002757
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002758 status = Py_ReprEnter((PyObject *)self);
2759 if (status != 0) {
2760 if (status > 0) {
2761 PyErr_Format(PyExc_RuntimeError,
2762 "reentrant call inside %s.__repr__",
2763 Py_TYPE(self)->tp_name);
2764 }
2765 goto error;
2766 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002767 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002768 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002769 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002770 PyErr_Clear();
2771 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002772 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002773 }
2774 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002775 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002776 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002777 if (s == NULL)
2778 goto error;
2779 PyUnicode_AppendAndDel(&res, s);
2780 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002781 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002782 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002783 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002784 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002785 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002786 PyErr_Clear();
2787 else
2788 goto error;
2789 }
2790 else {
2791 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2792 Py_DECREF(modeobj);
2793 if (s == NULL)
2794 goto error;
2795 PyUnicode_AppendAndDel(&res, s);
2796 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002797 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002798 }
2799 s = PyUnicode_FromFormat("%U encoding=%R>",
2800 res, self->encoding);
2801 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002802 if (status == 0) {
2803 Py_ReprLeave((PyObject *)self);
2804 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002805 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002806
2807 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002808 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002809 if (status == 0) {
2810 Py_ReprLeave((PyObject *)self);
2811 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002812 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002813}
2814
2815
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002816/* Inquiries */
2817
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002818/*[clinic input]
2819_io.TextIOWrapper.fileno
2820[clinic start generated code]*/
2821
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002822static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002823_io_TextIOWrapper_fileno_impl(textio *self)
2824/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002825{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002826 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002827 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002828}
2829
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002830/*[clinic input]
2831_io.TextIOWrapper.seekable
2832[clinic start generated code]*/
2833
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002834static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002835_io_TextIOWrapper_seekable_impl(textio *self)
2836/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002837{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002838 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002839 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002840}
2841
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002842/*[clinic input]
2843_io.TextIOWrapper.readable
2844[clinic start generated code]*/
2845
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002846static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002847_io_TextIOWrapper_readable_impl(textio *self)
2848/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002849{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002850 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002851 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002852}
2853
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002854/*[clinic input]
2855_io.TextIOWrapper.writable
2856[clinic start generated code]*/
2857
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002858static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002859_io_TextIOWrapper_writable_impl(textio *self)
2860/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002861{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002862 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002863 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002864}
2865
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002866/*[clinic input]
2867_io.TextIOWrapper.isatty
2868[clinic start generated code]*/
2869
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002870static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002871_io_TextIOWrapper_isatty_impl(textio *self)
2872/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002873{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002874 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002875 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002876}
2877
2878static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002879textiowrapper_getstate(textio *self, PyObject *args)
2880{
2881 PyErr_Format(PyExc_TypeError,
2882 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2883 return NULL;
2884}
2885
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002886/*[clinic input]
2887_io.TextIOWrapper.flush
2888[clinic start generated code]*/
2889
Antoine Pitrou243757e2010-11-05 21:15:39 +00002890static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002891_io_TextIOWrapper_flush_impl(textio *self)
2892/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002893{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002894 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002895 CHECK_CLOSED(self);
2896 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002897 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002898 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002899 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002900}
2901
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002902/*[clinic input]
2903_io.TextIOWrapper.close
2904[clinic start generated code]*/
2905
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002906static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002907_io_TextIOWrapper_close_impl(textio *self)
2908/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002909{
2910 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002911 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002912 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002913
Antoine Pitrou6be88762010-05-03 16:48:20 +00002914 res = textiowrapper_closed_get(self, NULL);
2915 if (res == NULL)
2916 return NULL;
2917 r = PyObject_IsTrue(res);
2918 Py_DECREF(res);
2919 if (r < 0)
2920 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002921
Antoine Pitrou6be88762010-05-03 16:48:20 +00002922 if (r > 0) {
2923 Py_RETURN_NONE; /* stream already closed */
2924 }
2925 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002926 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002927 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01002928 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2929 &PyId__dealloc_warn,
2930 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00002931 if (res)
2932 Py_DECREF(res);
2933 else
2934 PyErr_Clear();
2935 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002936 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002937 if (res == NULL)
2938 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002939 else
2940 Py_DECREF(res);
2941
Benjamin Peterson68623612012-12-20 11:53:11 -06002942 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2943 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002944 _PyErr_ChainExceptions(exc, val, tb);
2945 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002946 }
2947 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002948 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002949}
2950
2951static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002952textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002953{
2954 PyObject *line;
2955
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002956 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002957
2958 self->telling = 0;
2959 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2960 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002961 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002962 }
2963 else {
2964 line = PyObject_CallMethodObjArgs((PyObject *)self,
2965 _PyIO_str_readline, NULL);
2966 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002967 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002968 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002969 "not '%.200s'", Py_TYPE(line)->tp_name);
2970 Py_DECREF(line);
2971 return NULL;
2972 }
2973 }
2974
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002975 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002976 return NULL;
2977
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002978 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002979 /* Reached EOF or would have blocked */
2980 Py_DECREF(line);
2981 Py_CLEAR(self->snapshot);
2982 self->telling = self->seekable;
2983 return NULL;
2984 }
2985
2986 return line;
2987}
2988
2989static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002990textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002991{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002992 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002993 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002994}
2995
2996static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002997textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002998{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002999 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003000 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3001}
3002
3003static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003004textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003005{
3006 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003007 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003008 if (self->decoder == NULL)
3009 Py_RETURN_NONE;
3010 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
3011 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00003012 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3013 PyErr_Clear();
3014 Py_RETURN_NONE;
3015 }
3016 else {
3017 return NULL;
3018 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003019 }
3020 return res;
3021}
3022
3023static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003024textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003025{
3026 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003027 Py_INCREF(self->errors);
3028 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003029}
3030
3031static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003032textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003033{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003034 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003035 return PyLong_FromSsize_t(self->chunk_size);
3036}
3037
3038static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003039textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003040{
3041 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003042 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003043 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003044 if (n == -1 && PyErr_Occurred())
3045 return -1;
3046 if (n <= 0) {
3047 PyErr_SetString(PyExc_ValueError,
3048 "a strictly positive integer is required");
3049 return -1;
3050 }
3051 self->chunk_size = n;
3052 return 0;
3053}
3054
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003055#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003056
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003057static PyMethodDef incrementalnewlinedecoder_methods[] = {
3058 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3059 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3060 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3061 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3062 {NULL}
3063};
3064
3065static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3066 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3067 {NULL}
3068};
3069
3070PyTypeObject PyIncrementalNewlineDecoder_Type = {
3071 PyVarObject_HEAD_INIT(NULL, 0)
3072 "_io.IncrementalNewlineDecoder", /*tp_name*/
3073 sizeof(nldecoder_object), /*tp_basicsize*/
3074 0, /*tp_itemsize*/
3075 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3076 0, /*tp_print*/
3077 0, /*tp_getattr*/
3078 0, /*tp_setattr*/
3079 0, /*tp_compare */
3080 0, /*tp_repr*/
3081 0, /*tp_as_number*/
3082 0, /*tp_as_sequence*/
3083 0, /*tp_as_mapping*/
3084 0, /*tp_hash */
3085 0, /*tp_call*/
3086 0, /*tp_str*/
3087 0, /*tp_getattro*/
3088 0, /*tp_setattro*/
3089 0, /*tp_as_buffer*/
3090 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3091 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3092 0, /* tp_traverse */
3093 0, /* tp_clear */
3094 0, /* tp_richcompare */
3095 0, /*tp_weaklistoffset*/
3096 0, /* tp_iter */
3097 0, /* tp_iternext */
3098 incrementalnewlinedecoder_methods, /* tp_methods */
3099 0, /* tp_members */
3100 incrementalnewlinedecoder_getset, /* tp_getset */
3101 0, /* tp_base */
3102 0, /* tp_dict */
3103 0, /* tp_descr_get */
3104 0, /* tp_descr_set */
3105 0, /* tp_dictoffset */
3106 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3107 0, /* tp_alloc */
3108 PyType_GenericNew, /* tp_new */
3109};
3110
3111
3112static PyMethodDef textiowrapper_methods[] = {
3113 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003114 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003115 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3116 _IO_TEXTIOWRAPPER_READ_METHODDEF
3117 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3118 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3119 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3120
3121 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3122 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3123 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3124 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3125 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00003126 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003127
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003128 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3129 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3130 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003131 {NULL, NULL}
3132};
3133
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003134static PyMemberDef textiowrapper_members[] = {
3135 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3136 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3137 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003138 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003139 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003140 {NULL}
3141};
3142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003143static PyGetSetDef textiowrapper_getset[] = {
3144 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3145 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003146/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3147*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003148 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3149 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3150 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3151 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003152 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003153};
3154
3155PyTypeObject PyTextIOWrapper_Type = {
3156 PyVarObject_HEAD_INIT(NULL, 0)
3157 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003158 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003159 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003160 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003161 0, /*tp_print*/
3162 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003163 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003164 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003165 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003166 0, /*tp_as_number*/
3167 0, /*tp_as_sequence*/
3168 0, /*tp_as_mapping*/
3169 0, /*tp_hash */
3170 0, /*tp_call*/
3171 0, /*tp_str*/
3172 0, /*tp_getattro*/
3173 0, /*tp_setattro*/
3174 0, /*tp_as_buffer*/
3175 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003176 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003177 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003178 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3179 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003180 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003181 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003182 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003183 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3184 textiowrapper_methods, /* tp_methods */
3185 textiowrapper_members, /* tp_members */
3186 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003187 0, /* tp_base */
3188 0, /* tp_dict */
3189 0, /* tp_descr_get */
3190 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003191 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003192 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003193 0, /* tp_alloc */
3194 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003195 0, /* tp_free */
3196 0, /* tp_is_gc */
3197 0, /* tp_bases */
3198 0, /* tp_mro */
3199 0, /* tp_cache */
3200 0, /* tp_subclasses */
3201 0, /* tp_weaklist */
3202 0, /* tp_del */
3203 0, /* tp_version_tag */
3204 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003205};