blob: 717b56ab319b618a460d720d3150c2a0922e588d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020021_Py_IDENTIFIER(close);
22_Py_IDENTIFIER(_dealloc_warn);
23_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(fileno);
25_Py_IDENTIFIER(flush);
26_Py_IDENTIFIER(getpreferredencoding);
27_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020028_Py_IDENTIFIER(mode);
29_Py_IDENTIFIER(name);
30_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(read);
32_Py_IDENTIFIER(readable);
33_Py_IDENTIFIER(replace);
34_Py_IDENTIFIER(reset);
35_Py_IDENTIFIER(seek);
36_Py_IDENTIFIER(seekable);
37_Py_IDENTIFIER(setstate);
INADA Naoki507434f2017-12-21 09:59:53 +090038_Py_IDENTIFIER(strict);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020039_Py_IDENTIFIER(tell);
40_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000042/* TextIOBase */
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Base class for text I/O.\n"
46 "\n"
47 "This class provides a character and line based interface to stream\n"
48 "I/O. There is no readinto method because Python's character strings\n"
49 "are immutable. There is no public constructor.\n"
50 );
51
52static PyObject *
53_unsupported(const char *message)
54{
Antoine Pitrou712cb732013-12-21 15:51:54 +010055 _PyIO_State *state = IO_STATE();
56 if (state != NULL)
57 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 return NULL;
59}
60
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062 "Separate the underlying buffer from the TextIOBase and return it.\n"
63 "\n"
64 "After the underlying buffer has been detached, the TextIO is in an\n"
65 "unusable state.\n"
66 );
67
68static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000070{
71 return _unsupported("detach");
72}
73
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075 "Read at most n characters from stream.\n"
76 "\n"
77 "Read from underlying buffer until we have n characters or we hit EOF.\n"
78 "If n is negative or omitted, read until EOF.\n"
79 );
80
81static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000082textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000083{
84 return _unsupported("read");
85}
86
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088 "Read until newline or EOF.\n"
89 "\n"
90 "Returns an empty string if EOF is hit immediately.\n"
91 );
92
93static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095{
96 return _unsupported("readline");
97}
98
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 "Write string to stream.\n"
101 "Returns the number of characters written (which is always equal to\n"
102 "the length of the string).\n"
103 );
104
105static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107{
108 return _unsupported("write");
109}
110
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112 "Encoding of the text stream.\n"
113 "\n"
114 "Subclasses should override.\n"
115 );
116
117static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000118textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119{
120 Py_RETURN_NONE;
121}
122
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124 "Line endings translated so far.\n"
125 "\n"
126 "Only line endings translated during reading are considered.\n"
127 "\n"
128 "Subclasses should override.\n"
129 );
130
131static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000132textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133{
134 Py_RETURN_NONE;
135}
136
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138 "The error setting of the decoder or encoder.\n"
139 "\n"
140 "Subclasses should override.\n"
141 );
142
143static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000144textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000145{
146 Py_RETURN_NONE;
147}
148
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyMethodDef textiobase_methods[] = {
151 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
152 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
153 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
154 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155 {NULL, NULL}
156};
157
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000158static PyGetSetDef textiobase_getset[] = {
159 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
160 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
161 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000162 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000163};
164
165PyTypeObject PyTextIOBase_Type = {
166 PyVarObject_HEAD_INIT(NULL, 0)
167 "_io._TextIOBase", /*tp_name*/
168 0, /*tp_basicsize*/
169 0, /*tp_itemsize*/
170 0, /*tp_dealloc*/
171 0, /*tp_print*/
172 0, /*tp_getattr*/
173 0, /*tp_setattr*/
174 0, /*tp_compare */
175 0, /*tp_repr*/
176 0, /*tp_as_number*/
177 0, /*tp_as_sequence*/
178 0, /*tp_as_mapping*/
179 0, /*tp_hash */
180 0, /*tp_call*/
181 0, /*tp_str*/
182 0, /*tp_getattro*/
183 0, /*tp_setattro*/
184 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200185 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
186 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_traverse */
189 0, /* tp_clear */
190 0, /* tp_richcompare */
191 0, /* tp_weaklistoffset */
192 0, /* tp_iter */
193 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197 &PyIOBase_Type, /* tp_base */
198 0, /* tp_dict */
199 0, /* tp_descr_get */
200 0, /* tp_descr_set */
201 0, /* tp_dictoffset */
202 0, /* tp_init */
203 0, /* tp_alloc */
204 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200205 0, /* tp_free */
206 0, /* tp_is_gc */
207 0, /* tp_bases */
208 0, /* tp_mro */
209 0, /* tp_cache */
210 0, /* tp_subclasses */
211 0, /* tp_weaklist */
212 0, /* tp_del */
213 0, /* tp_version_tag */
214 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000215};
216
217
218/* IncrementalNewlineDecoder */
219
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000220typedef struct {
221 PyObject_HEAD
222 PyObject *decoder;
223 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200224 unsigned int pendingcr: 1;
225 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000226 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000227} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000228
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300229/*[clinic input]
230_io.IncrementalNewlineDecoder.__init__
231 decoder: object
232 translate: int
233 errors: object(c_default="NULL") = "strict"
234
235Codec used when reading a file in universal newlines mode.
236
237It wraps another incremental decoder, translating \r\n and \r into \n.
238It also records the types of newlines encountered. When used with
239translate=False, it ensures that the newline sequence is returned in
240one piece. When used with decoder=None, it expects unicode strings as
241decode input and translates newlines without first invoking an external
242decoder.
243[clinic start generated code]*/
244
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000245static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300246_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
247 PyObject *decoder, int translate,
248 PyObject *errors)
249/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000250{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251 self->decoder = decoder;
252 Py_INCREF(decoder);
253
254 if (errors == NULL) {
INADA Naoki507434f2017-12-21 09:59:53 +0900255 self->errors = _PyUnicode_FromId(&PyId_strict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000256 if (self->errors == NULL)
257 return -1;
258 }
259 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000260 self->errors = errors;
261 }
INADA Naoki507434f2017-12-21 09:59:53 +0900262 Py_INCREF(self->errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000263
264 self->translate = translate;
265 self->seennl = 0;
266 self->pendingcr = 0;
267
268 return 0;
269}
270
271static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273{
274 Py_CLEAR(self->decoder);
275 Py_CLEAR(self->errors);
276 Py_TYPE(self)->tp_free((PyObject *)self);
277}
278
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200279static int
280check_decoded(PyObject *decoded)
281{
282 if (decoded == NULL)
283 return -1;
284 if (!PyUnicode_Check(decoded)) {
285 PyErr_Format(PyExc_TypeError,
286 "decoder should return a string result, not '%.200s'",
287 Py_TYPE(decoded)->tp_name);
288 Py_DECREF(decoded);
289 return -1;
290 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200291 if (PyUnicode_READY(decoded) < 0) {
292 Py_DECREF(decoded);
293 return -1;
294 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200295 return 0;
296}
297
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298#define SEEN_CR 1
299#define SEEN_LF 2
300#define SEEN_CRLF 4
301#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
302
303PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200304_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305 PyObject *input, int final)
306{
307 PyObject *output;
308 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200309 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310
311 if (self->decoder == NULL) {
312 PyErr_SetString(PyExc_ValueError,
313 "IncrementalNewlineDecoder.__init__ not called");
314 return NULL;
315 }
316
317 /* decode input (with the eventual \r from a previous pass) */
318 if (self->decoder != Py_None) {
319 output = PyObject_CallMethodObjArgs(self->decoder,
320 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
321 }
322 else {
323 output = input;
324 Py_INCREF(output);
325 }
326
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200327 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 return NULL;
329
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200330 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200332 /* Prefix output with CR */
333 int kind;
334 PyObject *modified;
335 char *out;
336
337 modified = PyUnicode_New(output_len + 1,
338 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 if (modified == NULL)
340 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 kind = PyUnicode_KIND(modified);
342 out = PyUnicode_DATA(modified);
343 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200344 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347 self->pendingcr = 0;
348 output_len++;
349 }
350
351 /* retain last \r even when not translating data:
352 * then readline() is sure to get \r\n in one pass
353 */
354 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000355 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200356 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
357 {
358 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
359 if (modified == NULL)
360 goto error;
361 Py_DECREF(output);
362 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 self->pendingcr = 1;
364 }
365 }
366
367 /* Record which newlines are read and do newline translation if desired,
368 all in one pass. */
369 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 Py_ssize_t len;
372 int seennl = self->seennl;
373 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200376 in_str = PyUnicode_DATA(output);
377 len = PyUnicode_GET_LENGTH(output);
378 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379
380 if (len == 0)
381 return output;
382
383 /* If, up to now, newlines are consistently \n, do a quick check
384 for the \r *byte* with the libc's optimized memchr.
385 */
386 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200387 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 }
389
Antoine Pitrou66913e22009-03-06 23:40:56 +0000390 if (only_lf) {
391 /* If not already seen, quick scan for a possible "\n" character.
392 (there's nothing else to be done, even when in translation mode)
393 */
394 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200395 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100396 if (kind == PyUnicode_1BYTE_KIND)
397 seennl |= SEEN_LF;
398 else {
399 Py_ssize_t i = 0;
400 for (;;) {
401 Py_UCS4 c;
402 /* Fast loop for non-control characters */
403 while (PyUnicode_READ(kind, in_str, i) > '\n')
404 i++;
405 c = PyUnicode_READ(kind, in_str, i++);
406 if (c == '\n') {
407 seennl |= SEEN_LF;
408 break;
409 }
410 if (i >= len)
411 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000412 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000413 }
414 }
415 /* Finished: we have scanned for newlines, and none of them
416 need translating */
417 }
418 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 if (seennl == SEEN_ALL)
422 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200424 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 while (PyUnicode_READ(kind, in_str, i) > '\r')
427 i++;
428 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 if (c == '\n')
430 seennl |= SEEN_LF;
431 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 }
436 else
437 seennl |= SEEN_CR;
438 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 break;
441 if (seennl == SEEN_ALL)
442 break;
443 }
444 endscan:
445 ;
446 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000447 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 void *translated;
449 int kind = PyUnicode_KIND(output);
450 void *in_str = PyUnicode_DATA(output);
451 Py_ssize_t in, out;
452 /* XXX: Previous in-place translation here is disabled as
453 resizing is not possible anymore */
454 /* We could try to optimize this so that we only do a copy
455 when there is something to translate. On the other hand,
456 we already know there is a \r byte, so chances are high
457 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200458 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 if (translated == NULL) {
460 PyErr_NoMemory();
461 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000462 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
468 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 seennl |= SEEN_LF;
472 continue;
473 }
474 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 in++;
477 seennl |= SEEN_CRLF;
478 }
479 else
480 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200481 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000482 continue;
483 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200484 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200486 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 Py_DECREF(output);
489 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100490 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200492 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493 }
494 self->seennl |= seennl;
495 }
496
497 return output;
498
499 error:
500 Py_DECREF(output);
501 return NULL;
502}
503
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300504/*[clinic input]
505_io.IncrementalNewlineDecoder.decode
506 input: object
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200507 final: bool(accept={int}) = False
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300508[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000509
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300510static PyObject *
511_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
512 PyObject *input, int final)
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200513/*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300514{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
516}
517
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300518/*[clinic input]
519_io.IncrementalNewlineDecoder.getstate
520[clinic start generated code]*/
521
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300523_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
524/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700527 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528
529 if (self->decoder != Py_None) {
530 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
531 _PyIO_str_getstate, NULL);
532 if (state == NULL)
533 return NULL;
Oren Milman13614e32017-08-24 19:51:24 +0300534 if (!PyTuple_Check(state)) {
535 PyErr_SetString(PyExc_TypeError,
536 "illegal decoder state");
537 Py_DECREF(state);
538 return NULL;
539 }
540 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
541 &buffer, &flag))
542 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000543 Py_DECREF(state);
544 return NULL;
545 }
546 Py_INCREF(buffer);
547 Py_DECREF(state);
548 }
549 else {
550 buffer = PyBytes_FromString("");
551 flag = 0;
552 }
553 flag <<= 1;
554 if (self->pendingcr)
555 flag |= 1;
556 return Py_BuildValue("NK", buffer, flag);
557}
558
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300559/*[clinic input]
560_io.IncrementalNewlineDecoder.setstate
561 state: object
562 /
563[clinic start generated code]*/
564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300566_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
567 PyObject *state)
568/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000569{
570 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700571 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572
Oren Milman1d1d3e92017-08-20 18:35:36 +0300573 if (!PyTuple_Check(state)) {
574 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575 return NULL;
Oren Milman1d1d3e92017-08-20 18:35:36 +0300576 }
577 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
578 &buffer, &flag))
579 {
580 return NULL;
581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000582
Victor Stinner7d7e7752014-06-17 23:31:25 +0200583 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584 flag >>= 1;
585
586 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200587 return _PyObject_CallMethodId(self->decoder,
588 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 else
590 Py_RETURN_NONE;
591}
592
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300593/*[clinic input]
594_io.IncrementalNewlineDecoder.reset
595[clinic start generated code]*/
596
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000597static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300598_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
599/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000600{
601 self->seennl = 0;
602 self->pendingcr = 0;
603 if (self->decoder != Py_None)
604 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
605 else
606 Py_RETURN_NONE;
607}
608
609static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000610incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611{
612 switch (self->seennl) {
613 case SEEN_CR:
614 return PyUnicode_FromString("\r");
615 case SEEN_LF:
616 return PyUnicode_FromString("\n");
617 case SEEN_CRLF:
618 return PyUnicode_FromString("\r\n");
619 case SEEN_CR | SEEN_LF:
620 return Py_BuildValue("ss", "\r", "\n");
621 case SEEN_CR | SEEN_CRLF:
622 return Py_BuildValue("ss", "\r", "\r\n");
623 case SEEN_LF | SEEN_CRLF:
624 return Py_BuildValue("ss", "\n", "\r\n");
625 case SEEN_CR | SEEN_LF | SEEN_CRLF:
626 return Py_BuildValue("sss", "\r", "\n", "\r\n");
627 default:
628 Py_RETURN_NONE;
629 }
630
631}
632
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000633/* TextIOWrapper */
634
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635typedef PyObject *
636 (*encodefunc_t)(PyObject *, PyObject *);
637
638typedef struct
639{
640 PyObject_HEAD
641 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000642 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643 Py_ssize_t chunk_size;
644 PyObject *buffer;
645 PyObject *encoding;
646 PyObject *encoder;
647 PyObject *decoder;
648 PyObject *readnl;
649 PyObject *errors;
INADA Naoki507434f2017-12-21 09:59:53 +0900650 const char *writenl; /* ASCII-encoded; NULL stands for \n */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200652 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 char readuniversal;
654 char readtranslate;
655 char writetranslate;
656 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200657 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000658 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200659 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 /* Specialized encoding func (see below) */
661 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000662 /* Whether or not it's the start of the stream */
663 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664
665 /* Reads and writes are internally buffered in order to speed things up.
666 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000667
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668 Please also note that text to be written is first encoded before being
669 buffered. This is necessary so that encoding errors are immediately
670 reported to the caller, but it unfortunately means that the
671 IncrementalEncoder (whose encode() method is always written in Python)
672 becomes a bottleneck for small writes.
673 */
674 PyObject *decoded_chars; /* buffer for text returned from decoder */
675 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
676 PyObject *pending_bytes; /* list of bytes objects waiting to be
677 written, or NULL */
678 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000679
Oren Milman13614e32017-08-24 19:51:24 +0300680 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 * dec_flags is the second (integer) item of the decoder state and
682 * next_input is the chunk of input bytes that comes next after the
683 * snapshot point. We use this to reconstruct decoder states in tell().
684 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000685 PyObject *snapshot;
686 /* Bytes-to-characters ratio for the current chunk. Serves as input for
687 the heuristic in tell(). */
688 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689
690 /* Cache raw object if it's a FileIO object */
691 PyObject *raw;
692
693 PyObject *weakreflist;
694 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000695} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697/* A couple of specialized cases in order to bypass the slow incremental
698 encoding methods for the most popular encodings. */
699
700static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000701ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702{
INADA Naoki507434f2017-12-21 09:59:53 +0900703 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100709 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900710 PyUnicode_AsUTF8(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000711}
712
713static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000714utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100716 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900717 PyUnicode_AsUTF8(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718}
719
720static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000721utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722{
Antoine Pitroue4501852009-05-14 18:55:55 +0000723 if (!self->encoding_start_of_stream) {
724 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200725#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100731 return _PyUnicode_EncodeUTF16(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900732 PyUnicode_AsUTF8(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733}
734
Antoine Pitroue4501852009-05-14 18:55:55 +0000735static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000736utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000737{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100738 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900739 PyUnicode_AsUTF8(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000740}
741
742static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000744{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100745 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900746 PyUnicode_AsUTF8(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000747}
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000751{
752 if (!self->encoding_start_of_stream) {
753 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200754#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000755 return utf32be_encode(self, text);
756#else
757 return utf32le_encode(self, text);
758#endif
759 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100760 return _PyUnicode_EncodeUTF32(text,
INADA Naoki507434f2017-12-21 09:59:53 +0900761 PyUnicode_AsUTF8(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000762}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000763
764static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000765utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766{
INADA Naoki507434f2017-12-21 09:59:53 +0900767 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000768}
769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
INADA Naoki507434f2017-12-21 09:59:53 +0900773 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776/* Map normalized encoding names onto the specialized encoding funcs */
777
778typedef struct {
779 const char *name;
780 encodefunc_t encodefunc;
781} encodefuncentry;
782
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200783static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 {"ascii", (encodefunc_t) ascii_encode},
785 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000786 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787 {"utf-16-be", (encodefunc_t) utf16be_encode},
788 {"utf-16-le", (encodefunc_t) utf16le_encode},
789 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000790 {"utf-32-be", (encodefunc_t) utf32be_encode},
791 {"utf-32-le", (encodefunc_t) utf32le_encode},
792 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793 {NULL, NULL}
794};
795
INADA Naoki507434f2017-12-21 09:59:53 +0900796static int
797validate_newline(const char *newline)
798{
799 if (newline && newline[0] != '\0'
800 && !(newline[0] == '\n' && newline[1] == '\0')
801 && !(newline[0] == '\r' && newline[1] == '\0')
802 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
803 PyErr_Format(PyExc_ValueError,
804 "illegal newline value: %s", newline);
805 return -1;
806 }
807 return 0;
808}
809
810static int
811set_newline(textio *self, const char *newline)
812{
813 PyObject *old = self->readnl;
814 if (newline == NULL) {
815 self->readnl = NULL;
816 }
817 else {
818 self->readnl = PyUnicode_FromString(newline);
819 if (self->readnl == NULL) {
820 self->readnl = old;
821 return -1;
822 }
823 }
824 self->readuniversal = (newline == NULL || newline[0] == '\0');
825 self->readtranslate = (newline == NULL);
826 self->writetranslate = (newline == NULL || newline[0] != '\0');
827 if (!self->readuniversal && self->readnl != NULL) {
828 // validate_newline() accepts only ASCII newlines.
829 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
830 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
831 if (strcmp(self->writenl, "\n") == 0) {
832 self->writenl = NULL;
833 }
834 }
835 else {
836#ifdef MS_WINDOWS
837 self->writenl = "\r\n";
838#else
839 self->writenl = NULL;
840#endif
841 }
842 Py_XDECREF(old);
843 return 0;
844}
845
846static int
847_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
848 const char *errors)
849{
850 PyObject *res;
851 int r;
852
853 res = _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
854 if (res == NULL)
855 return -1;
856
857 r = PyObject_IsTrue(res);
858 Py_DECREF(res);
859 if (r == -1)
860 return -1;
861
862 if (r != 1)
863 return 0;
864
865 Py_CLEAR(self->decoder);
866 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
867 if (self->decoder == NULL)
868 return -1;
869
870 if (self->readuniversal) {
871 PyObject *incrementalDecoder = PyObject_CallFunction(
872 (PyObject *)&PyIncrementalNewlineDecoder_Type,
873 "Oi", self->decoder, (int)self->readtranslate);
874 if (incrementalDecoder == NULL)
875 return -1;
876 Py_CLEAR(self->decoder);
877 self->decoder = incrementalDecoder;
878 }
879
880 return 0;
881}
882
883static PyObject*
884_textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
885{
886 PyObject *chars;
887
888 if (Py_TYPE(decoder) == &PyIncrementalNewlineDecoder_Type)
889 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
890 else
891 chars = PyObject_CallMethodObjArgs(decoder, _PyIO_str_decode, bytes,
892 eof ? Py_True : Py_False, NULL);
893
894 if (check_decoded(chars) < 0)
895 // check_decoded already decreases refcount
896 return NULL;
897
898 return chars;
899}
900
901static int
902_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
903 const char *errors)
904{
905 PyObject *res;
906 int r;
907
908 res = _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
909 if (res == NULL)
910 return -1;
911
912 r = PyObject_IsTrue(res);
913 Py_DECREF(res);
914 if (r == -1)
915 return -1;
916
917 if (r != 1)
918 return 0;
919
920 Py_CLEAR(self->encoder);
921 self->encodefunc = NULL;
922 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
923 if (self->encoder == NULL)
924 return -1;
925
926 /* Get the normalized named of the codec */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200927 if (_PyObject_LookupAttrId(codec_info, &PyId_name, &res) < 0) {
928 return -1;
INADA Naoki507434f2017-12-21 09:59:53 +0900929 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200930 if (res != NULL && PyUnicode_Check(res)) {
INADA Naoki507434f2017-12-21 09:59:53 +0900931 const encodefuncentry *e = encodefuncs;
932 while (e->name != NULL) {
933 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
934 self->encodefunc = e->encodefunc;
935 break;
936 }
937 e++;
938 }
939 }
940 Py_XDECREF(res);
941
942 return 0;
943}
944
945static int
946_textiowrapper_fix_encoder_state(textio *self)
947{
948 if (!self->seekable || !self->encoder) {
949 return 0;
950 }
951
952 self->encoding_start_of_stream = 1;
953
954 PyObject *cookieObj = PyObject_CallMethodObjArgs(
955 self->buffer, _PyIO_str_tell, NULL);
956 if (cookieObj == NULL) {
957 return -1;
958 }
959
960 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
961 Py_DECREF(cookieObj);
962 if (cmp < 0) {
963 return -1;
964 }
965
966 if (cmp == 0) {
967 self->encoding_start_of_stream = 0;
968 PyObject *res = PyObject_CallMethodObjArgs(
969 self->encoder, _PyIO_str_setstate, _PyLong_Zero, NULL);
970 if (res == NULL) {
971 return -1;
972 }
973 Py_DECREF(res);
974 }
975
976 return 0;
977}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000978
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300979/*[clinic input]
980_io.TextIOWrapper.__init__
981 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700982 encoding: str(accept={str, NoneType}) = NULL
INADA Naoki507434f2017-12-21 09:59:53 +0900983 errors: object = None
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700984 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchaka202fda52017-03-12 10:10:47 +0200985 line_buffering: bool(accept={int}) = False
986 write_through: bool(accept={int}) = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000987
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300988Character and line based layer over a BufferedIOBase object, buffer.
989
990encoding gives the name of the encoding that the stream will be
991decoded or encoded with. It defaults to locale.getpreferredencoding(False).
992
993errors determines the strictness of encoding and decoding (see
994help(codecs.Codec) or the documentation for codecs.register) and
995defaults to "strict".
996
997newline controls how line endings are handled. It can be None, '',
998'\n', '\r', and '\r\n'. It works as follows:
999
1000* On input, if newline is None, universal newlines mode is
1001 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1002 these are translated into '\n' before being returned to the
1003 caller. If it is '', universal newline mode is enabled, but line
1004 endings are returned to the caller untranslated. If it has any of
1005 the other legal values, input lines are only terminated by the given
1006 string, and the line ending is returned to the caller untranslated.
1007
1008* On output, if newline is None, any '\n' characters written are
1009 translated to the system default line separator, os.linesep. If
1010 newline is '' or '\n', no translation takes place. If newline is any
1011 of the other legal values, any '\n' characters written are translated
1012 to the given string.
1013
1014If line_buffering is True, a call to flush is implied when a call to
1015write contains a newline character.
1016[clinic start generated code]*/
1017
1018static int
1019_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
INADA Naoki507434f2017-12-21 09:59:53 +09001020 const char *encoding, PyObject *errors,
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001021 const char *newline, int line_buffering,
1022 int write_through)
INADA Naoki507434f2017-12-21 09:59:53 +09001023/*[clinic end generated code: output=72267c0c01032ed2 input=1c5dd5d78bfcc675]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001024{
1025 PyObject *raw, *codec_info = NULL;
1026 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 PyObject *res;
1028 int r;
1029
1030 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001031 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032
INADA Naoki507434f2017-12-21 09:59:53 +09001033 if (errors == Py_None) {
1034 errors = _PyUnicode_FromId(&PyId_strict); /* borrowed */
INADA Naoki4856b0f2017-12-24 10:29:19 +09001035 if (errors == NULL) {
1036 return -1;
1037 }
INADA Naoki507434f2017-12-21 09:59:53 +09001038 }
1039 else if (!PyUnicode_Check(errors)) {
1040 // Check 'errors' argument here because Argument Clinic doesn't support
1041 // 'str(accept={str, NoneType})' converter.
1042 PyErr_Format(
1043 PyExc_TypeError,
1044 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1045 errors->ob_type->tp_name);
1046 return -1;
1047 }
1048
1049 if (validate_newline(newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 return -1;
1051 }
1052
1053 Py_CLEAR(self->buffer);
1054 Py_CLEAR(self->encoding);
1055 Py_CLEAR(self->encoder);
1056 Py_CLEAR(self->decoder);
1057 Py_CLEAR(self->readnl);
1058 Py_CLEAR(self->decoded_chars);
1059 Py_CLEAR(self->pending_bytes);
1060 Py_CLEAR(self->snapshot);
1061 Py_CLEAR(self->errors);
1062 Py_CLEAR(self->raw);
1063 self->decoded_chars_used = 0;
1064 self->pending_bytes_count = 0;
1065 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001066 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067
1068 if (encoding == NULL) {
1069 /* Try os.device_encoding(fileno) */
1070 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +01001071 state = IO_STATE();
1072 if (state == NULL)
1073 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001074 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075 /* Ignore only AttributeError and UnsupportedOperation */
1076 if (fileno == NULL) {
1077 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
1078 PyErr_ExceptionMatches(state->unsupported_operation)) {
1079 PyErr_Clear();
1080 }
1081 else {
1082 goto error;
1083 }
1084 }
1085 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +02001086 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -05001087 Py_DECREF(fileno);
1088 if (fd == -1 && PyErr_Occurred()) {
1089 goto error;
1090 }
1091
1092 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 if (self->encoding == NULL)
1094 goto error;
1095 else if (!PyUnicode_Check(self->encoding))
1096 Py_CLEAR(self->encoding);
1097 }
1098 }
1099 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +02001100 PyObject *locale_module = _PyIO_get_locale_module(state);
1101 if (locale_module == NULL)
1102 goto catch_ImportError;
Victor Stinner61bdb0d2016-12-09 15:39:28 +01001103 self->encoding = _PyObject_CallMethodIdObjArgs(
1104 locale_module, &PyId_getpreferredencoding, Py_False, NULL);
Antoine Pitrou932ff832013-08-01 21:04:50 +02001105 Py_DECREF(locale_module);
1106 if (self->encoding == NULL) {
1107 catch_ImportError:
1108 /*
Martin Panter7462b6492015-11-02 03:37:02 +00001109 Importing locale can raise an ImportError because of
1110 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +02001111 ImportError if _locale is not available. These will happen
1112 during module building.
1113 */
1114 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
1115 PyErr_Clear();
1116 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001117 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001118 else
1119 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001120 }
Antoine Pitrou932ff832013-08-01 21:04:50 +02001121 else if (!PyUnicode_Check(self->encoding))
1122 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123 }
Victor Stinnerf6c57832010-05-19 01:17:01 +00001124 if (self->encoding != NULL) {
Serhiy Storchaka06515832016-11-20 09:13:07 +02001125 encoding = PyUnicode_AsUTF8(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +00001126 if (encoding == NULL)
1127 goto error;
1128 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 else if (encoding != NULL) {
1130 self->encoding = PyUnicode_FromString(encoding);
1131 if (self->encoding == NULL)
1132 goto error;
1133 }
1134 else {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03001135 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136 "could not determine default encoding");
Serhiy Storchakad6238a72017-09-24 02:49:58 +03001137 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138 }
1139
Nick Coghlana9b15242014-02-04 22:11:18 +10001140 /* Check we have been asked for a real text encoding */
1141 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1142 if (codec_info == NULL) {
1143 Py_CLEAR(self->encoding);
1144 goto error;
1145 }
1146
1147 /* XXX: Failures beyond this point have the potential to leak elements
1148 * of the partially constructed object (like self->encoding)
1149 */
1150
INADA Naoki507434f2017-12-21 09:59:53 +09001151 Py_INCREF(errors);
1152 self->errors = errors;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001153 self->chunk_size = 8192;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001155 self->write_through = write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001156 if (set_newline(self, newline) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 }
1159
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 self->buffer = buffer;
1161 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001162
INADA Naoki507434f2017-12-21 09:59:53 +09001163 /* Build the decoder object */
1164 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1165 goto error;
1166
1167 /* Build the encoder object */
1168 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1169 goto error;
1170
1171 /* Finished sorting out the codec details */
1172 Py_CLEAR(codec_info);
1173
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1175 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001176 Py_TYPE(buffer) == &PyBufferedRandom_Type)
1177 {
1178 if (_PyObject_LookupAttrId(buffer, &PyId_raw, &raw) < 0)
1179 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 /* Cache the raw FileIO object to speed up 'closed' checks */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001181 if (raw != NULL) {
1182 if (Py_TYPE(raw) == &PyFileIO_Type)
1183 self->raw = raw;
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001184 else
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001185 Py_DECREF(raw);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001186 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 }
1188
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001189 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 if (res == NULL)
1191 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001192 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001194 if (r < 0)
1195 goto error;
1196 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001198 r = _PyObject_LookupAttr(buffer, _PyIO_str_read1, &res);
1199 if (r < 0) {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02001200 goto error;
1201 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001202 Py_XDECREF(res);
1203 self->has_read1 = r;
Antoine Pitroue96ec682011-07-23 21:46:35 +02001204
Antoine Pitroue4501852009-05-14 18:55:55 +00001205 self->encoding_start_of_stream = 0;
INADA Naoki507434f2017-12-21 09:59:53 +09001206 if (_textiowrapper_fix_encoder_state(self) < 0) {
1207 goto error;
Antoine Pitroue4501852009-05-14 18:55:55 +00001208 }
1209
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 self->ok = 1;
1211 return 0;
1212
1213 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001214 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215 return -1;
1216}
1217
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001218/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1219 * -1 on error.
1220 */
1221static int
1222convert_optional_bool(PyObject *obj, int default_value)
1223{
1224 long v;
1225 if (obj == Py_None) {
1226 v = default_value;
1227 }
1228 else {
1229 v = PyLong_AsLong(obj);
1230 if (v == -1 && PyErr_Occurred())
1231 return -1;
1232 }
1233 return v != 0;
1234}
1235
INADA Naoki507434f2017-12-21 09:59:53 +09001236static int
1237textiowrapper_change_encoding(textio *self, PyObject *encoding,
1238 PyObject *errors, int newline_changed)
1239{
1240 /* Use existing settings where new settings are not specified */
1241 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1242 return 0; // no change
1243 }
1244
1245 if (encoding == Py_None) {
1246 encoding = self->encoding;
1247 if (errors == Py_None) {
1248 errors = self->errors;
1249 }
1250 }
1251 else if (errors == Py_None) {
1252 errors = _PyUnicode_FromId(&PyId_strict);
INADA Naoki4856b0f2017-12-24 10:29:19 +09001253 if (errors == NULL) {
1254 return -1;
1255 }
INADA Naoki507434f2017-12-21 09:59:53 +09001256 }
1257
1258 const char *c_errors = PyUnicode_AsUTF8(errors);
1259 if (c_errors == NULL) {
1260 return -1;
1261 }
1262
1263 // Create new encoder & decoder
1264 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1265 PyUnicode_AsUTF8(encoding), "codecs.open()");
1266 if (codec_info == NULL) {
1267 return -1;
1268 }
1269 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1270 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1271 Py_DECREF(codec_info);
1272 return -1;
1273 }
1274 Py_DECREF(codec_info);
1275
1276 Py_INCREF(encoding);
1277 Py_INCREF(errors);
1278 Py_SETREF(self->encoding, encoding);
1279 Py_SETREF(self->errors, errors);
1280
1281 return _textiowrapper_fix_encoder_state(self);
1282}
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001283
1284/*[clinic input]
1285_io.TextIOWrapper.reconfigure
1286 *
INADA Naoki507434f2017-12-21 09:59:53 +09001287 encoding: object = None
1288 errors: object = None
1289 newline as newline_obj: object(c_default="NULL") = None
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001290 line_buffering as line_buffering_obj: object = None
1291 write_through as write_through_obj: object = None
1292
1293Reconfigure the text stream with new parameters.
1294
1295This also does an implicit stream flush.
1296
1297[clinic start generated code]*/
1298
1299static PyObject *
INADA Naoki507434f2017-12-21 09:59:53 +09001300_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1301 PyObject *errors, PyObject *newline_obj,
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001302 PyObject *line_buffering_obj,
1303 PyObject *write_through_obj)
INADA Naoki507434f2017-12-21 09:59:53 +09001304/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001305{
1306 int line_buffering;
1307 int write_through;
INADA Naoki507434f2017-12-21 09:59:53 +09001308 const char *newline = NULL;
1309
1310 /* Check if something is in the read buffer */
1311 if (self->decoded_chars != NULL) {
1312 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1313 _unsupported("It is not possible to set the encoding or newline"
1314 "of stream after the first read");
1315 return NULL;
1316 }
1317 }
1318
1319 if (newline_obj != NULL && newline_obj != Py_None) {
1320 newline = PyUnicode_AsUTF8(newline_obj);
1321 if (newline == NULL || validate_newline(newline) < 0) {
1322 return NULL;
1323 }
1324 }
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001325
1326 line_buffering = convert_optional_bool(line_buffering_obj,
1327 self->line_buffering);
1328 write_through = convert_optional_bool(write_through_obj,
1329 self->write_through);
1330 if (line_buffering < 0 || write_through < 0) {
1331 return NULL;
1332 }
INADA Naoki507434f2017-12-21 09:59:53 +09001333
1334 PyObject *res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001335 if (res == NULL) {
1336 return NULL;
1337 }
INADA Naoki507434f2017-12-21 09:59:53 +09001338 Py_DECREF(res);
1339 self->b2cratio = 0;
1340
1341 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1342 return NULL;
1343 }
1344
1345 if (textiowrapper_change_encoding(
1346 self, encoding, errors, newline_obj != NULL) < 0) {
1347 return NULL;
1348 }
1349
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02001350 self->line_buffering = line_buffering;
1351 self->write_through = write_through;
1352 Py_RETURN_NONE;
1353}
1354
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001355static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001356textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358 self->ok = 0;
1359 Py_CLEAR(self->buffer);
1360 Py_CLEAR(self->encoding);
1361 Py_CLEAR(self->encoder);
1362 Py_CLEAR(self->decoder);
1363 Py_CLEAR(self->readnl);
1364 Py_CLEAR(self->decoded_chars);
1365 Py_CLEAR(self->pending_bytes);
1366 Py_CLEAR(self->snapshot);
1367 Py_CLEAR(self->errors);
1368 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001369
1370 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371 return 0;
1372}
1373
1374static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001375textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001377 self->finalizing = 1;
1378 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001380 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381 _PyObject_GC_UNTRACK(self);
1382 if (self->weakreflist != NULL)
1383 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001384 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001385 Py_TYPE(self)->tp_free((PyObject *)self);
1386}
1387
1388static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001389textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001390{
1391 Py_VISIT(self->buffer);
1392 Py_VISIT(self->encoding);
1393 Py_VISIT(self->encoder);
1394 Py_VISIT(self->decoder);
1395 Py_VISIT(self->readnl);
1396 Py_VISIT(self->decoded_chars);
1397 Py_VISIT(self->pending_bytes);
1398 Py_VISIT(self->snapshot);
1399 Py_VISIT(self->errors);
1400 Py_VISIT(self->raw);
1401
1402 Py_VISIT(self->dict);
1403 return 0;
1404}
1405
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001406static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001407textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408
1409/* This macro takes some shortcuts to make the common case faster. */
1410#define CHECK_CLOSED(self) \
1411 do { \
1412 int r; \
1413 PyObject *_res; \
1414 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1415 if (self->raw != NULL) \
1416 r = _PyFileIO_closed(self->raw); \
1417 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001418 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419 if (_res == NULL) \
1420 return NULL; \
1421 r = PyObject_IsTrue(_res); \
1422 Py_DECREF(_res); \
1423 if (r < 0) \
1424 return NULL; \
1425 } \
1426 if (r > 0) { \
1427 PyErr_SetString(PyExc_ValueError, \
1428 "I/O operation on closed file."); \
1429 return NULL; \
1430 } \
1431 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001432 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001433 return NULL; \
1434 } while (0)
1435
1436#define CHECK_INITIALIZED(self) \
1437 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001438 PyErr_SetString(PyExc_ValueError, \
1439 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440 return NULL; \
1441 }
1442
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001443#define CHECK_ATTACHED(self) \
1444 CHECK_INITIALIZED(self); \
1445 if (self->detached) { \
1446 PyErr_SetString(PyExc_ValueError, \
1447 "underlying buffer has been detached"); \
1448 return NULL; \
1449 }
1450
1451#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001453 PyErr_SetString(PyExc_ValueError, \
1454 "I/O operation on uninitialized object"); \
1455 return -1; \
1456 } else if (self->detached) { \
1457 PyErr_SetString(PyExc_ValueError, \
1458 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001459 return -1; \
1460 }
1461
1462
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001463/*[clinic input]
1464_io.TextIOWrapper.detach
1465[clinic start generated code]*/
1466
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001467static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001468_io_TextIOWrapper_detach_impl(textio *self)
1469/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001470{
1471 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001472 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001473 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1474 if (res == NULL)
1475 return NULL;
1476 Py_DECREF(res);
1477 buffer = self->buffer;
1478 self->buffer = NULL;
1479 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001480 return buffer;
1481}
1482
Antoine Pitrou24f36292009-03-28 22:16:42 +00001483/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 underlying buffered object, though. */
1485static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001486_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001487{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001488 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001489
1490 if (self->pending_bytes == NULL)
1491 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001492
1493 pending = self->pending_bytes;
1494 Py_INCREF(pending);
1495 self->pending_bytes_count = 0;
1496 Py_CLEAR(self->pending_bytes);
1497
1498 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1499 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 if (b == NULL)
1501 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001502 ret = NULL;
1503 do {
1504 ret = PyObject_CallMethodObjArgs(self->buffer,
1505 _PyIO_str_write, b, NULL);
1506 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001507 Py_DECREF(b);
1508 if (ret == NULL)
1509 return -1;
1510 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 return 0;
1512}
1513
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001514/*[clinic input]
1515_io.TextIOWrapper.write
1516 text: unicode
1517 /
1518[clinic start generated code]*/
1519
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001520static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001521_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1522/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523{
1524 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525 PyObject *b;
1526 Py_ssize_t textlen;
1527 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001528 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001530 if (PyUnicode_READY(text) == -1)
1531 return NULL;
1532
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001533 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 CHECK_CLOSED(self);
1535
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001536 if (self->encoder == NULL)
1537 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001538
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539 Py_INCREF(text);
1540
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001541 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542
1543 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001544 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545 haslf = 1;
1546
1547 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001548 PyObject *newtext = _PyObject_CallMethodId(
1549 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 Py_DECREF(text);
1551 if (newtext == NULL)
1552 return NULL;
1553 text = newtext;
1554 }
1555
Antoine Pitroue96ec682011-07-23 21:46:35 +02001556 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001557 text_needflush = 1;
1558 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001560 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 needflush = 1;
1562
1563 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001564 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001566 self->encoding_start_of_stream = 0;
1567 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001568 else
1569 b = PyObject_CallMethodObjArgs(self->encoder,
1570 _PyIO_str_encode, text, NULL);
1571 Py_DECREF(text);
1572 if (b == NULL)
1573 return NULL;
Oren Milmana5b4ea12017-08-25 21:14:54 +03001574 if (!PyBytes_Check(b)) {
1575 PyErr_Format(PyExc_TypeError,
1576 "encoder should return a bytes object, not '%.200s'",
1577 Py_TYPE(b)->tp_name);
1578 Py_DECREF(b);
1579 return NULL;
1580 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581
1582 if (self->pending_bytes == NULL) {
1583 self->pending_bytes = PyList_New(0);
1584 if (self->pending_bytes == NULL) {
1585 Py_DECREF(b);
1586 return NULL;
1587 }
1588 self->pending_bytes_count = 0;
1589 }
1590 if (PyList_Append(self->pending_bytes, b) < 0) {
1591 Py_DECREF(b);
1592 return NULL;
1593 }
1594 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1595 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001596 if (self->pending_bytes_count > self->chunk_size || needflush ||
1597 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001598 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 return NULL;
1600 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001601
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 if (needflush) {
1603 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1604 if (ret == NULL)
1605 return NULL;
1606 Py_DECREF(ret);
1607 }
1608
1609 Py_CLEAR(self->snapshot);
1610
1611 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001612 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 if (ret == NULL)
1614 return NULL;
1615 Py_DECREF(ret);
1616 }
1617
1618 return PyLong_FromSsize_t(textlen);
1619}
1620
1621/* Steal a reference to chars and store it in the decoded_char buffer;
1622 */
1623static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001624textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001626 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 self->decoded_chars_used = 0;
1628}
1629
1630static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001631textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632{
1633 PyObject *chars;
1634 Py_ssize_t avail;
1635
1636 if (self->decoded_chars == NULL)
1637 return PyUnicode_FromStringAndSize(NULL, 0);
1638
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 /* decoded_chars is guaranteed to be "ready". */
1640 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 - self->decoded_chars_used);
1642
1643 assert(avail >= 0);
1644
1645 if (n < 0 || n > avail)
1646 n = avail;
1647
1648 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001649 chars = PyUnicode_Substring(self->decoded_chars,
1650 self->decoded_chars_used,
1651 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 if (chars == NULL)
1653 return NULL;
1654 }
1655 else {
1656 chars = self->decoded_chars;
1657 Py_INCREF(chars);
1658 }
1659
1660 self->decoded_chars_used += n;
1661 return chars;
1662}
1663
1664/* Read and decode the next chunk of data from the BufferedReader.
1665 */
1666static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001667textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001668{
1669 PyObject *dec_buffer = NULL;
1670 PyObject *dec_flags = NULL;
1671 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001672 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001674 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 int eof;
1676
1677 /* The return value is True unless EOF was reached. The decoded string is
1678 * placed in self._decoded_chars (replacing its previous value). The
1679 * entire input chunk is sent to the decoder, though some of it may remain
1680 * buffered in the decoder, yet to be converted.
1681 */
1682
1683 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001684 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685 return -1;
1686 }
1687
1688 if (self->telling) {
1689 /* To prepare for tell(), we need to snapshot a point in the file
1690 * where the decoder's input buffer is empty.
1691 */
1692
1693 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1694 _PyIO_str_getstate, NULL);
1695 if (state == NULL)
1696 return -1;
1697 /* Given this, we know there was a valid snapshot point
1698 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1699 */
Oren Milmanba7d7362017-08-29 11:58:27 +03001700 if (!PyTuple_Check(state)) {
1701 PyErr_SetString(PyExc_TypeError,
1702 "illegal decoder state");
1703 Py_DECREF(state);
1704 return -1;
1705 }
1706 if (!PyArg_ParseTuple(state,
1707 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1708 {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 Py_DECREF(state);
1710 return -1;
1711 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001712
1713 if (!PyBytes_Check(dec_buffer)) {
1714 PyErr_Format(PyExc_TypeError,
Oren Milmanba7d7362017-08-29 11:58:27 +03001715 "illegal decoder state: the first item should be a "
1716 "bytes object, not '%.200s'",
Antoine Pitroub8503892014-04-29 10:14:02 +02001717 Py_TYPE(dec_buffer)->tp_name);
1718 Py_DECREF(state);
1719 return -1;
1720 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 Py_INCREF(dec_buffer);
1722 Py_INCREF(dec_flags);
1723 Py_DECREF(state);
1724 }
1725
1726 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001727 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001728 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001729 }
1730 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (chunk_size == NULL)
1732 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001733
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001735 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1736 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 Py_DECREF(chunk_size);
1738 if (input_chunk == NULL)
1739 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001740
1741 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001742 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001743 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001744 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1745 Py_TYPE(input_chunk)->tp_name);
1746 goto fail;
1747 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748
Antoine Pitroub8503892014-04-29 10:14:02 +02001749 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001750 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751
INADA Naoki507434f2017-12-21 09:59:53 +09001752 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1753 PyBuffer_Release(&input_chunk_buf);
1754 if (decoded_chars == NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001755 goto fail;
INADA Naoki507434f2017-12-21 09:59:53 +09001756
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001757 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001758 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001759 if (nchars > 0)
1760 self->b2cratio = (double) nbytes / nchars;
1761 else
1762 self->b2cratio = 0.0;
1763 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 eof = 0;
1765
1766 if (self->telling) {
1767 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1768 * next input to be decoded is dec_buffer + input_chunk.
1769 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001770 PyObject *next_input = dec_buffer;
1771 PyBytes_Concat(&next_input, input_chunk);
1772 if (next_input == NULL) {
1773 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001774 goto fail;
1775 }
Serhiy Storchaka48842712016-04-06 09:45:48 +03001776 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 }
1778 Py_DECREF(input_chunk);
1779
1780 return (eof == 0);
1781
1782 fail:
1783 Py_XDECREF(dec_buffer);
1784 Py_XDECREF(dec_flags);
1785 Py_XDECREF(input_chunk);
1786 return -1;
1787}
1788
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001789/*[clinic input]
1790_io.TextIOWrapper.read
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001791 size as n: Py_ssize_t(accept={int, NoneType}) = -1
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001792 /
1793[clinic start generated code]*/
1794
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001796_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
Serhiy Storchaka762bf402017-03-30 09:15:31 +03001797/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 PyObject *result = NULL, *chunks = NULL;
1800
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001801 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 CHECK_CLOSED(self);
1803
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001804 if (self->decoder == NULL)
1805 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001806
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001807 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 return NULL;
1809
1810 if (n < 0) {
1811 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001812 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001813 PyObject *decoded;
1814 if (bytes == NULL)
1815 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001816
1817 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1818 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1819 bytes, 1);
1820 else
1821 decoded = PyObject_CallMethodObjArgs(
1822 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001824 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 goto fail;
1826
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001827 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828
1829 if (result == NULL) {
1830 Py_DECREF(decoded);
1831 return NULL;
1832 }
1833
1834 PyUnicode_AppendAndDel(&result, decoded);
1835 if (result == NULL)
1836 goto fail;
1837
1838 Py_CLEAR(self->snapshot);
1839 return result;
1840 }
1841 else {
1842 int res = 1;
1843 Py_ssize_t remaining = n;
1844
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001845 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 if (result == NULL)
1847 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001848 if (PyUnicode_READY(result) == -1)
1849 goto fail;
1850 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851
1852 /* Keep reading chunks until we have n characters to return */
1853 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001854 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001855 if (res < 0) {
1856 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1857 when EINTR occurs so we needn't do it ourselves. */
1858 if (_PyIO_trap_eintr()) {
1859 continue;
1860 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001862 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863 if (res == 0) /* EOF */
1864 break;
1865 if (chunks == NULL) {
1866 chunks = PyList_New(0);
1867 if (chunks == NULL)
1868 goto fail;
1869 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001870 if (PyUnicode_GET_LENGTH(result) > 0 &&
1871 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872 goto fail;
1873 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001874 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001875 if (result == NULL)
1876 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001877 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001878 }
1879 if (chunks != NULL) {
1880 if (result != NULL && PyList_Append(chunks, result) < 0)
1881 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001882 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883 if (result == NULL)
1884 goto fail;
1885 Py_CLEAR(chunks);
1886 }
1887 return result;
1888 }
1889 fail:
1890 Py_XDECREF(result);
1891 Py_XDECREF(chunks);
1892 return NULL;
1893}
1894
1895
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001896/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897 that is to the NUL character. Otherwise the function will produce
1898 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001899static const char *
1900find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001901{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001902 if (kind == PyUnicode_1BYTE_KIND) {
1903 assert(ch < 256);
1904 return (char *) memchr((void *) s, (char) ch, end - s);
1905 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001907 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001908 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001909 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910 return s;
1911 if (s == end)
1912 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001913 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001914 }
1915}
1916
1917Py_ssize_t
1918_PyIO_find_line_ending(
1919 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001920 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001922 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923
1924 if (translated) {
1925 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001926 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001928 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929 else {
1930 *consumed = len;
1931 return -1;
1932 }
1933 }
1934 else if (universal) {
1935 /* Universal newline search. Find any of \r, \r\n, \n
1936 * The decoder ensures that \r\n are not split in two pieces
1937 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001938 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001940 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001942 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001943 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001944 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 if (s >= end) {
1946 *consumed = len;
1947 return -1;
1948 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001949 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001950 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001952 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001953 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001954 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001955 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001957 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958 }
1959 }
1960 }
1961 else {
1962 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001963 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001964 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001965 /* Assume that readnl is an ASCII character. */
1966 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001968 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001970 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971 *consumed = len;
1972 return -1;
1973 }
1974 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001975 const char *s = start;
1976 const char *e = end - (readnl_len - 1)*kind;
1977 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978 if (e < s)
1979 e = s;
1980 while (s < e) {
1981 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001982 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983 if (pos == NULL || pos >= e)
1984 break;
1985 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001986 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987 break;
1988 }
1989 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001990 return (pos - start)/kind + readnl_len;
1991 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001993 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 if (pos == NULL)
1995 *consumed = len;
1996 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001997 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 return -1;
1999 }
2000 }
2001}
2002
2003static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002004_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005{
2006 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2007 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2008 int res;
2009
2010 CHECK_CLOSED(self);
2011
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002012 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 return NULL;
2014
2015 chunked = 0;
2016
2017 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002018 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002020 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 Py_ssize_t consumed = 0;
2022
2023 /* First, get some data if necessary */
2024 res = 1;
2025 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002026 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01002027 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07002028 if (res < 0) {
2029 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2030 when EINTR occurs so we needn't do it ourselves. */
2031 if (_PyIO_trap_eintr()) {
2032 continue;
2033 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07002035 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 if (res == 0)
2037 break;
2038 }
2039 if (res == 0) {
2040 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002041 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 Py_CLEAR(self->snapshot);
2043 start = endpos = offset_to_buffer = 0;
2044 break;
2045 }
2046
2047 if (remaining == NULL) {
2048 line = self->decoded_chars;
2049 start = self->decoded_chars_used;
2050 offset_to_buffer = 0;
2051 Py_INCREF(line);
2052 }
2053 else {
2054 assert(self->decoded_chars_used == 0);
2055 line = PyUnicode_Concat(remaining, self->decoded_chars);
2056 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002057 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002058 Py_CLEAR(remaining);
2059 if (line == NULL)
2060 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002061 if (PyUnicode_READY(line) == -1)
2062 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 }
2064
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002065 ptr = PyUnicode_DATA(line);
2066 line_len = PyUnicode_GET_LENGTH(line);
2067 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068
2069 endpos = _PyIO_find_line_ending(
2070 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002071 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02002072 ptr + kind * start,
2073 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002074 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 if (endpos >= 0) {
2076 endpos += start;
2077 if (limit >= 0 && (endpos - start) + chunked >= limit)
2078 endpos = start + limit - chunked;
2079 break;
2080 }
2081
2082 /* We can put aside up to `endpos` */
2083 endpos = consumed + start;
2084 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2085 /* Didn't find line ending, but reached length limit */
2086 endpos = start + limit - chunked;
2087 break;
2088 }
2089
2090 if (endpos > start) {
2091 /* No line ending seen yet - put aside current data */
2092 PyObject *s;
2093 if (chunks == NULL) {
2094 chunks = PyList_New(0);
2095 if (chunks == NULL)
2096 goto error;
2097 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002098 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002099 if (s == NULL)
2100 goto error;
2101 if (PyList_Append(chunks, s) < 0) {
2102 Py_DECREF(s);
2103 goto error;
2104 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002105 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 Py_DECREF(s);
2107 }
2108 /* There may be some remaining bytes we'll have to prepend to the
2109 next chunk of data */
2110 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002111 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002112 if (remaining == NULL)
2113 goto error;
2114 }
2115 Py_CLEAR(line);
2116 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002117 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 }
2119
2120 if (line != NULL) {
2121 /* Our line ends in the current buffer */
2122 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002123 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2124 PyObject *s = PyUnicode_Substring(line, start, endpos);
2125 Py_CLEAR(line);
2126 if (s == NULL)
2127 goto error;
2128 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 }
2130 }
2131 if (remaining != NULL) {
2132 if (chunks == NULL) {
2133 chunks = PyList_New(0);
2134 if (chunks == NULL)
2135 goto error;
2136 }
2137 if (PyList_Append(chunks, remaining) < 0)
2138 goto error;
2139 Py_CLEAR(remaining);
2140 }
2141 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002142 if (line != NULL) {
2143 if (PyList_Append(chunks, line) < 0)
2144 goto error;
2145 Py_DECREF(line);
2146 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 line = PyUnicode_Join(_PyIO_empty_str, chunks);
2148 if (line == NULL)
2149 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002150 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002152 if (line == NULL) {
2153 Py_INCREF(_PyIO_empty_str);
2154 line = _PyIO_empty_str;
2155 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156
2157 return line;
2158
2159 error:
2160 Py_XDECREF(chunks);
2161 Py_XDECREF(remaining);
2162 Py_XDECREF(line);
2163 return NULL;
2164}
2165
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002166/*[clinic input]
2167_io.TextIOWrapper.readline
2168 size: Py_ssize_t = -1
2169 /
2170[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002171
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002172static PyObject *
2173_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2174/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2175{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002176 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002177 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002178}
2179
2180/* Seek and Tell */
2181
2182typedef struct {
2183 Py_off_t start_pos;
2184 int dec_flags;
2185 int bytes_to_feed;
2186 int chars_to_skip;
2187 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002188} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189
2190/*
2191 To speed up cookie packing/unpacking, we store the fields in a temporary
2192 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2193 The following macros define at which offsets in the intermediary byte
2194 string the various CookieStruct fields will be stored.
2195 */
2196
2197#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2198
Christian Heimes743e0cd2012-10-17 23:52:17 +02002199#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200/* We want the least significant byte of start_pos to also be the least
2201 significant byte of the cookie, which means that in big-endian mode we
2202 must copy the fields in reverse order. */
2203
2204# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2205# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2206# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2207# define OFF_CHARS_TO_SKIP (sizeof(char))
2208# define OFF_NEED_EOF 0
2209
2210#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002211/* Little-endian mode: the least significant byte of start_pos will
2212 naturally end up the least significant byte of the cookie. */
2213
2214# define OFF_START_POS 0
2215# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2216# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2217# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2218# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2219
2220#endif
2221
2222static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002223textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224{
2225 unsigned char buffer[COOKIE_BUF_LEN];
2226 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2227 if (cookieLong == NULL)
2228 return -1;
2229
2230 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02002231 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 Py_DECREF(cookieLong);
2233 return -1;
2234 }
2235 Py_DECREF(cookieLong);
2236
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002237 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2238 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2239 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2240 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2241 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242
2243 return 0;
2244}
2245
2246static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002247textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002248{
2249 unsigned char buffer[COOKIE_BUF_LEN];
2250
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002251 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2252 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2253 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2254 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2255 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256
Christian Heimes743e0cd2012-10-17 23:52:17 +02002257 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2258 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260
2261static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002262_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263{
2264 PyObject *res;
2265 /* When seeking to the start of the stream, we call decoder.reset()
2266 rather than decoder.getstate().
2267 This is for a few decoders such as utf-16 for which the state value
2268 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2269 utf-16, that we are expecting a BOM).
2270 */
2271 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2272 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2273 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002274 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2275 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002276 if (res == NULL)
2277 return -1;
2278 Py_DECREF(res);
2279 return 0;
2280}
2281
Antoine Pitroue4501852009-05-14 18:55:55 +00002282static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002283_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002284{
2285 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002286 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002287 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2288 self->encoding_start_of_stream = 1;
2289 }
2290 else {
2291 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002292 _PyLong_Zero, NULL);
Antoine Pitroue4501852009-05-14 18:55:55 +00002293 self->encoding_start_of_stream = 0;
2294 }
2295 if (res == NULL)
2296 return -1;
2297 Py_DECREF(res);
2298 return 0;
2299}
2300
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002301static int
2302_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2303{
2304 /* Same as _textiowrapper_decoder_setstate() above. */
2305 return _textiowrapper_encoder_reset(
2306 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2307}
2308
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002309/*[clinic input]
2310_io.TextIOWrapper.seek
2311 cookie as cookieObj: object
2312 whence: int = 0
2313 /
2314[clinic start generated code]*/
2315
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002317_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2318/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002320 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002321 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322 PyObject *res;
2323 int cmp;
2324
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002325 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326 CHECK_CLOSED(self);
2327
2328 Py_INCREF(cookieObj);
2329
2330 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002331 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332 goto fail;
2333 }
2334
2335 if (whence == 1) {
2336 /* seek relative to current position */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002337 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338 if (cmp < 0)
2339 goto fail;
2340
2341 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002342 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343 goto fail;
2344 }
2345
2346 /* Seeking to the current position should attempt to
2347 * sync the underlying buffer with the current position.
2348 */
2349 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002350 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 if (cookieObj == NULL)
2352 goto fail;
2353 }
2354 else if (whence == 2) {
2355 /* seek relative to end of file */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002356 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357 if (cmp < 0)
2358 goto fail;
2359
2360 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002361 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002362 goto fail;
2363 }
2364
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002365 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366 if (res == NULL)
2367 goto fail;
2368 Py_DECREF(res);
2369
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002370 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 Py_CLEAR(self->snapshot);
2372 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002373 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 if (res == NULL)
2375 goto fail;
2376 Py_DECREF(res);
2377 }
2378
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002379 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002380 Py_CLEAR(cookieObj);
2381 if (res == NULL)
2382 goto fail;
2383 if (self->encoder) {
2384 /* If seek() == 0, we are at the start of stream, otherwise not */
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002385 cmp = PyObject_RichCompareBool(res, _PyLong_Zero, Py_EQ);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002386 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2387 Py_DECREF(res);
2388 goto fail;
2389 }
2390 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 return res;
2392 }
2393 else if (whence != 0) {
2394 PyErr_Format(PyExc_ValueError,
2395 "invalid whence (%d, should be 0, 1 or 2)", whence);
2396 goto fail;
2397 }
2398
Serhiy Storchakaba85d692017-03-30 09:09:41 +03002399 cmp = PyObject_RichCompareBool(cookieObj, _PyLong_Zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400 if (cmp < 0)
2401 goto fail;
2402
2403 if (cmp == 1) {
2404 PyErr_Format(PyExc_ValueError,
2405 "negative seek position %R", cookieObj);
2406 goto fail;
2407 }
2408
2409 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2410 if (res == NULL)
2411 goto fail;
2412 Py_DECREF(res);
2413
2414 /* The strategy of seek() is to go back to the safe start point
2415 * and replay the effect of read(chars_to_skip) from there.
2416 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002417 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 goto fail;
2419
2420 /* Seek back to the safe start point. */
2421 posobj = PyLong_FromOff_t(cookie.start_pos);
2422 if (posobj == NULL)
2423 goto fail;
2424 res = PyObject_CallMethodObjArgs(self->buffer,
2425 _PyIO_str_seek, posobj, NULL);
2426 Py_DECREF(posobj);
2427 if (res == NULL)
2428 goto fail;
2429 Py_DECREF(res);
2430
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002431 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002432 Py_CLEAR(self->snapshot);
2433
2434 /* Restore the decoder to its state from the safe start point. */
2435 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002436 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437 goto fail;
2438 }
2439
2440 if (cookie.chars_to_skip) {
2441 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002442 PyObject *input_chunk = _PyObject_CallMethodId(
2443 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002444 PyObject *decoded;
2445
2446 if (input_chunk == NULL)
2447 goto fail;
2448
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002449 if (!PyBytes_Check(input_chunk)) {
2450 PyErr_Format(PyExc_TypeError,
2451 "underlying read() should have returned a bytes "
2452 "object, not '%.200s'",
2453 Py_TYPE(input_chunk)->tp_name);
2454 Py_DECREF(input_chunk);
2455 goto fail;
2456 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457
2458 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2459 if (self->snapshot == NULL) {
2460 Py_DECREF(input_chunk);
2461 goto fail;
2462 }
2463
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002464 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2465 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002467 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468 goto fail;
2469
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471
2472 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002473 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002474 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475 goto fail;
2476 }
2477 self->decoded_chars_used = cookie.chars_to_skip;
2478 }
2479 else {
2480 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2481 if (self->snapshot == NULL)
2482 goto fail;
2483 }
2484
Antoine Pitroue4501852009-05-14 18:55:55 +00002485 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2486 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002487 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002488 goto fail;
2489 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 return cookieObj;
2491 fail:
2492 Py_XDECREF(cookieObj);
2493 return NULL;
2494
2495}
2496
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002497/*[clinic input]
2498_io.TextIOWrapper.tell
2499[clinic start generated code]*/
2500
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002501static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002502_io_TextIOWrapper_tell_impl(textio *self)
2503/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 PyObject *res;
2506 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002507 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508 PyObject *next_input;
2509 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002510 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511 PyObject *saved_state = NULL;
2512 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002513 Py_ssize_t dec_buffer_len;
2514 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002516 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517 CHECK_CLOSED(self);
2518
2519 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002520 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 goto fail;
2522 }
2523 if (!self->telling) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002524 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525 "telling position disabled by next() call");
2526 goto fail;
2527 }
2528
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002529 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002531 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532 if (res == NULL)
2533 goto fail;
2534 Py_DECREF(res);
2535
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002536 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537 if (posobj == NULL)
2538 goto fail;
2539
2540 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002541 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542 return posobj;
2543 }
2544
2545#if defined(HAVE_LARGEFILE_SUPPORT)
2546 cookie.start_pos = PyLong_AsLongLong(posobj);
2547#else
2548 cookie.start_pos = PyLong_AsLong(posobj);
2549#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002550 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 if (PyErr_Occurred())
2552 goto fail;
2553
2554 /* Skip backward to the snapshot point (see _read_chunk). */
Oren Milman13614e32017-08-24 19:51:24 +03002555 assert(PyTuple_Check(self->snapshot));
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002556 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 goto fail;
2558
2559 assert (PyBytes_Check(next_input));
2560
2561 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2562
2563 /* How many decoded characters have been used up since the snapshot? */
2564 if (self->decoded_chars_used == 0) {
2565 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 }
2568
2569 chars_to_skip = self->decoded_chars_used;
2570
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002571 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2573 _PyIO_str_getstate, NULL);
2574 if (saved_state == NULL)
2575 goto fail;
2576
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002577#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002578 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002579 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2580 _PyIO_str_getstate, NULL); \
2581 if (_state == NULL) \
2582 goto fail; \
Oren Milman13614e32017-08-24 19:51:24 +03002583 if (!PyTuple_Check(_state)) { \
2584 PyErr_SetString(PyExc_TypeError, \
2585 "illegal decoder state"); \
2586 Py_DECREF(_state); \
2587 goto fail; \
2588 } \
2589 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2590 &dec_buffer, &dec_flags)) \
2591 { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002592 Py_DECREF(_state); \
2593 goto fail; \
2594 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002595 if (!PyBytes_Check(dec_buffer)) { \
2596 PyErr_Format(PyExc_TypeError, \
Oren Milmanba7d7362017-08-29 11:58:27 +03002597 "illegal decoder state: the first item should be a " \
2598 "bytes object, not '%.200s'", \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002599 Py_TYPE(dec_buffer)->tp_name); \
2600 Py_DECREF(_state); \
2601 goto fail; \
2602 } \
2603 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002604 Py_DECREF(_state); \
2605 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002607#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002608 PyObject *_decoded = _PyObject_CallMethodId( \
2609 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002610 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002611 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002612 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002613 Py_DECREF(_decoded); \
2614 } while (0)
2615
2616 /* Fast search for an acceptable start point, close to our
2617 current pos */
2618 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2619 skip_back = 1;
2620 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2621 input = PyBytes_AS_STRING(next_input);
2622 while (skip_bytes > 0) {
2623 /* Decode up to temptative start point */
2624 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2625 goto fail;
2626 DECODER_DECODE(input, skip_bytes, chars_decoded);
2627 if (chars_decoded <= chars_to_skip) {
2628 DECODER_GETSTATE();
2629 if (dec_buffer_len == 0) {
2630 /* Before pos and no bytes buffered in decoder => OK */
2631 cookie.dec_flags = dec_flags;
2632 chars_to_skip -= chars_decoded;
2633 break;
2634 }
2635 /* Skip back by buffered amount and reset heuristic */
2636 skip_bytes -= dec_buffer_len;
2637 skip_back = 1;
2638 }
2639 else {
2640 /* We're too far ahead, skip back a bit */
2641 skip_bytes -= skip_back;
2642 skip_back *= 2;
2643 }
2644 }
2645 if (skip_bytes <= 0) {
2646 skip_bytes = 0;
2647 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2648 goto fail;
2649 }
2650
2651 /* Note our initial start point. */
2652 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002653 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002654 if (chars_to_skip == 0)
2655 goto finally;
2656
2657 /* We should be close to the desired position. Now feed the decoder one
2658 * byte at a time until we reach the `chars_to_skip` target.
2659 * As we go, note the nearest "safe start point" before the current
2660 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 * can safely start from there and advance to this location).
2662 */
2663 chars_decoded = 0;
2664 input = PyBytes_AS_STRING(next_input);
2665 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002666 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002668 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002670 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002671 /* We got n chars for 1 byte */
2672 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002674 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675
2676 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2677 /* Decoder buffer is empty, so this is a safe start point. */
2678 cookie.start_pos += cookie.bytes_to_feed;
2679 chars_to_skip -= chars_decoded;
2680 cookie.dec_flags = dec_flags;
2681 cookie.bytes_to_feed = 0;
2682 chars_decoded = 0;
2683 }
2684 if (chars_decoded >= chars_to_skip)
2685 break;
2686 input++;
2687 }
2688 if (input == input_end) {
2689 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002690 PyObject *decoded = _PyObject_CallMethodId(
2691 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002692 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002694 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002695 Py_DECREF(decoded);
2696 cookie.need_eof = 1;
2697
2698 if (chars_decoded < chars_to_skip) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002699 PyErr_SetString(PyExc_OSError,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 "can't reconstruct logical file position");
2701 goto fail;
2702 }
2703 }
2704
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002705finally:
Victor Stinner7e425412016-12-09 00:36:19 +01002706 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 Py_DECREF(saved_state);
2708 if (res == NULL)
2709 return NULL;
2710 Py_DECREF(res);
2711
2712 /* The returned cookie corresponds to the last safe start point. */
2713 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002714 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002715
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002716fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002717 if (saved_state) {
2718 PyObject *type, *value, *traceback;
2719 PyErr_Fetch(&type, &value, &traceback);
Victor Stinner7e425412016-12-09 00:36:19 +01002720 res = _PyObject_CallMethodIdObjArgs(self->decoder, &PyId_setstate, saved_state, NULL);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002721 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002723 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724 }
2725 return NULL;
2726}
2727
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002728/*[clinic input]
2729_io.TextIOWrapper.truncate
2730 pos: object = None
2731 /
2732[clinic start generated code]*/
2733
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002735_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2736/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002738 PyObject *res;
2739
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002740 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002741
2742 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2743 if (res == NULL)
2744 return NULL;
2745 Py_DECREF(res);
2746
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002747 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002748}
2749
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002750static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002751textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002752{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002753 PyObject *nameobj, *modeobj, *res, *s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002754 int status;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002755
2756 CHECK_INITIALIZED(self);
2757
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002758 res = PyUnicode_FromString("<_io.TextIOWrapper");
2759 if (res == NULL)
2760 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002761
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002762 status = Py_ReprEnter((PyObject *)self);
2763 if (status != 0) {
2764 if (status > 0) {
2765 PyErr_Format(PyExc_RuntimeError,
2766 "reentrant call inside %s.__repr__",
2767 Py_TYPE(self)->tp_name);
2768 }
2769 goto error;
2770 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002771 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002772 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002773 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002774 PyErr_Clear();
2775 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002776 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002777 }
2778 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002779 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002780 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002781 if (s == NULL)
2782 goto error;
2783 PyUnicode_AppendAndDel(&res, s);
2784 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002785 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002786 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002787 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002788 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002789 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002790 PyErr_Clear();
2791 else
2792 goto error;
2793 }
2794 else {
2795 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2796 Py_DECREF(modeobj);
2797 if (s == NULL)
2798 goto error;
2799 PyUnicode_AppendAndDel(&res, s);
2800 if (res == NULL)
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002801 goto error;
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002802 }
2803 s = PyUnicode_FromFormat("%U encoding=%R>",
2804 res, self->encoding);
2805 Py_DECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002806 if (status == 0) {
2807 Py_ReprLeave((PyObject *)self);
2808 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002809 return s;
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002810
2811 error:
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002812 Py_XDECREF(res);
Serhiy Storchakaa5af6e12017-03-19 19:25:29 +02002813 if (status == 0) {
2814 Py_ReprLeave((PyObject *)self);
2815 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002816 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002817}
2818
2819
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002820/* Inquiries */
2821
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002822/*[clinic input]
2823_io.TextIOWrapper.fileno
2824[clinic start generated code]*/
2825
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002826static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002827_io_TextIOWrapper_fileno_impl(textio *self)
2828/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002829{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002830 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002831 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002832}
2833
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002834/*[clinic input]
2835_io.TextIOWrapper.seekable
2836[clinic start generated code]*/
2837
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002838static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002839_io_TextIOWrapper_seekable_impl(textio *self)
2840/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002841{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002842 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002843 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002844}
2845
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002846/*[clinic input]
2847_io.TextIOWrapper.readable
2848[clinic start generated code]*/
2849
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002850static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002851_io_TextIOWrapper_readable_impl(textio *self)
2852/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002853{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002854 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002855 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002856}
2857
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002858/*[clinic input]
2859_io.TextIOWrapper.writable
2860[clinic start generated code]*/
2861
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002862static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002863_io_TextIOWrapper_writable_impl(textio *self)
2864/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002865{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002866 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002867 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002868}
2869
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002870/*[clinic input]
2871_io.TextIOWrapper.isatty
2872[clinic start generated code]*/
2873
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002874static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002875_io_TextIOWrapper_isatty_impl(textio *self)
2876/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002877{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002878 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002879 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880}
2881
2882static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002883textiowrapper_getstate(textio *self, PyObject *args)
2884{
2885 PyErr_Format(PyExc_TypeError,
2886 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2887 return NULL;
2888}
2889
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002890/*[clinic input]
2891_io.TextIOWrapper.flush
2892[clinic start generated code]*/
2893
Antoine Pitrou243757e2010-11-05 21:15:39 +00002894static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002895_io_TextIOWrapper_flush_impl(textio *self)
2896/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002897{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002898 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002899 CHECK_CLOSED(self);
2900 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002901 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002902 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002903 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002904}
2905
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002906/*[clinic input]
2907_io.TextIOWrapper.close
2908[clinic start generated code]*/
2909
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002910static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002911_io_TextIOWrapper_close_impl(textio *self)
2912/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002913{
2914 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002915 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002916 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002917
Antoine Pitrou6be88762010-05-03 16:48:20 +00002918 res = textiowrapper_closed_get(self, NULL);
2919 if (res == NULL)
2920 return NULL;
2921 r = PyObject_IsTrue(res);
2922 Py_DECREF(res);
2923 if (r < 0)
2924 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002925
Antoine Pitrou6be88762010-05-03 16:48:20 +00002926 if (r > 0) {
2927 Py_RETURN_NONE; /* stream already closed */
2928 }
2929 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002930 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002931 if (self->finalizing) {
Victor Stinner61bdb0d2016-12-09 15:39:28 +01002932 res = _PyObject_CallMethodIdObjArgs(self->buffer,
2933 &PyId__dealloc_warn,
2934 self, NULL);
Antoine Pitroue033e062010-10-29 10:38:18 +00002935 if (res)
2936 Py_DECREF(res);
2937 else
2938 PyErr_Clear();
2939 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002940 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002941 if (res == NULL)
2942 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002943 else
2944 Py_DECREF(res);
2945
Benjamin Peterson68623612012-12-20 11:53:11 -06002946 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2947 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002948 _PyErr_ChainExceptions(exc, val, tb);
2949 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002950 }
2951 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002952 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002953}
2954
2955static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002956textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002957{
2958 PyObject *line;
2959
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002960 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002961
2962 self->telling = 0;
2963 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2964 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002965 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002966 }
2967 else {
2968 line = PyObject_CallMethodObjArgs((PyObject *)self,
2969 _PyIO_str_readline, NULL);
2970 if (line && !PyUnicode_Check(line)) {
Serhiy Storchaka55fe1ae2017-04-16 10:46:38 +03002971 PyErr_Format(PyExc_OSError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002972 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002973 "not '%.200s'", Py_TYPE(line)->tp_name);
2974 Py_DECREF(line);
2975 return NULL;
2976 }
2977 }
2978
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002979 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002980 return NULL;
2981
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002982 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002983 /* Reached EOF or would have blocked */
2984 Py_DECREF(line);
2985 Py_CLEAR(self->snapshot);
2986 self->telling = self->seekable;
2987 return NULL;
2988 }
2989
2990 return line;
2991}
2992
2993static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002994textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002995{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002996 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002997 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002998}
2999
3000static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003001textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003002{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003003 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003004 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
3005}
3006
3007static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003008textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003009{
3010 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003011 CHECK_ATTACHED(self);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003012 if (self->decoder == NULL ||
3013 _PyObject_LookupAttr(self->decoder, _PyIO_str_newlines, &res) == 0)
3014 {
Serhiy Storchaka4d9aec02018-01-16 18:34:21 +02003015 Py_RETURN_NONE;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003016 }
3017 return res;
3018}
3019
3020static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003021textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003022{
3023 CHECK_INITIALIZED(self);
INADA Naoki507434f2017-12-21 09:59:53 +09003024 Py_INCREF(self->errors);
3025 return self->errors;
Benjamin Peterson0926ad12009-06-06 18:02:12 +00003026}
3027
3028static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003029textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003030{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003031 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003032 return PyLong_FromSsize_t(self->chunk_size);
3033}
3034
3035static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003036textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003037{
3038 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06003039 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02003040 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003041 if (n == -1 && PyErr_Occurred())
3042 return -1;
3043 if (n <= 0) {
3044 PyErr_SetString(PyExc_ValueError,
3045 "a strictly positive integer is required");
3046 return -1;
3047 }
3048 self->chunk_size = n;
3049 return 0;
3050}
3051
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003052#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003053
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003054static PyMethodDef incrementalnewlinedecoder_methods[] = {
3055 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3056 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3057 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3058 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3059 {NULL}
3060};
3061
3062static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3063 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3064 {NULL}
3065};
3066
3067PyTypeObject PyIncrementalNewlineDecoder_Type = {
3068 PyVarObject_HEAD_INIT(NULL, 0)
3069 "_io.IncrementalNewlineDecoder", /*tp_name*/
3070 sizeof(nldecoder_object), /*tp_basicsize*/
3071 0, /*tp_itemsize*/
3072 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3073 0, /*tp_print*/
3074 0, /*tp_getattr*/
3075 0, /*tp_setattr*/
3076 0, /*tp_compare */
3077 0, /*tp_repr*/
3078 0, /*tp_as_number*/
3079 0, /*tp_as_sequence*/
3080 0, /*tp_as_mapping*/
3081 0, /*tp_hash */
3082 0, /*tp_call*/
3083 0, /*tp_str*/
3084 0, /*tp_getattro*/
3085 0, /*tp_setattro*/
3086 0, /*tp_as_buffer*/
3087 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3088 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3089 0, /* tp_traverse */
3090 0, /* tp_clear */
3091 0, /* tp_richcompare */
3092 0, /*tp_weaklistoffset*/
3093 0, /* tp_iter */
3094 0, /* tp_iternext */
3095 incrementalnewlinedecoder_methods, /* tp_methods */
3096 0, /* tp_members */
3097 incrementalnewlinedecoder_getset, /* tp_getset */
3098 0, /* tp_base */
3099 0, /* tp_dict */
3100 0, /* tp_descr_get */
3101 0, /* tp_descr_set */
3102 0, /* tp_dictoffset */
3103 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3104 0, /* tp_alloc */
3105 PyType_GenericNew, /* tp_new */
3106};
3107
3108
3109static PyMethodDef textiowrapper_methods[] = {
3110 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003111 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003112 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3113 _IO_TEXTIOWRAPPER_READ_METHODDEF
3114 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3115 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3116 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3117
3118 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3119 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3120 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3121 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3122 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00003123 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003124
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003125 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3126 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3127 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003128 {NULL, NULL}
3129};
3130
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003131static PyMemberDef textiowrapper_members[] = {
3132 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3133 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3134 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou3c2817b2017-06-03 12:32:28 +02003135 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02003136 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003137 {NULL}
3138};
3139
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003140static PyGetSetDef textiowrapper_getset[] = {
3141 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3142 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003143/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3144*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003145 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3146 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3147 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3148 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00003149 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003150};
3151
3152PyTypeObject PyTextIOWrapper_Type = {
3153 PyVarObject_HEAD_INIT(NULL, 0)
3154 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003155 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003156 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003157 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003158 0, /*tp_print*/
3159 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00003160 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003161 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003162 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003163 0, /*tp_as_number*/
3164 0, /*tp_as_sequence*/
3165 0, /*tp_as_mapping*/
3166 0, /*tp_hash */
3167 0, /*tp_call*/
3168 0, /*tp_str*/
3169 0, /*tp_getattro*/
3170 0, /*tp_setattro*/
3171 0, /*tp_as_buffer*/
3172 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02003173 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003174 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003175 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3176 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003177 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003178 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003179 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003180 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3181 textiowrapper_methods, /* tp_methods */
3182 textiowrapper_members, /* tp_members */
3183 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003184 0, /* tp_base */
3185 0, /* tp_dict */
3186 0, /* tp_descr_get */
3187 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00003188 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03003189 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003190 0, /* tp_alloc */
3191 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02003192 0, /* tp_free */
3193 0, /* tp_is_gc */
3194 0, /* tp_bases */
3195 0, /* tp_mro */
3196 0, /* tp_cache */
3197 0, /* tp_subclasses */
3198 0, /* tp_weaklist */
3199 0, /* tp_del */
3200 0, /* tp_version_tag */
3201 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00003202};