blob: 2f55eb0595b6e2c33eb459f9ea3830564ff5ac21 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Serhiy Storchakaf24131f2015-04-16 11:19:43 +030014/*[clinic input]
15module _io
16class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
17class _io.TextIOWrapper "textio *" "&TextIOWrapper_TYpe"
18[clinic start generated code]*/
19/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2097a4fc85670c26]*/
20
21/*[python input]
22class io_ssize_t_converter(CConverter):
23 type = 'Py_ssize_t'
24 converter = '_PyIO_ConvertSsize_t'
25[python start generated code]*/
26/*[python end generated code: output=da39a3ee5e6b4b0d input=d0a811d3cbfd1b33]*/
27
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020028_Py_IDENTIFIER(close);
29_Py_IDENTIFIER(_dealloc_warn);
30_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020031_Py_IDENTIFIER(fileno);
32_Py_IDENTIFIER(flush);
33_Py_IDENTIFIER(getpreferredencoding);
34_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020035_Py_IDENTIFIER(mode);
36_Py_IDENTIFIER(name);
37_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020038_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020039_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020040_Py_IDENTIFIER(readable);
41_Py_IDENTIFIER(replace);
42_Py_IDENTIFIER(reset);
43_Py_IDENTIFIER(seek);
44_Py_IDENTIFIER(seekable);
45_Py_IDENTIFIER(setstate);
46_Py_IDENTIFIER(tell);
47_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000049/* TextIOBase */
50
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000051PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000052 "Base class for text I/O.\n"
53 "\n"
54 "This class provides a character and line based interface to stream\n"
55 "I/O. There is no readinto method because Python's character strings\n"
56 "are immutable. There is no public constructor.\n"
57 );
58
59static PyObject *
60_unsupported(const char *message)
61{
Antoine Pitrou712cb732013-12-21 15:51:54 +010062 _PyIO_State *state = IO_STATE();
63 if (state != NULL)
64 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065 return NULL;
66}
67
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000068PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000069 "Separate the underlying buffer from the TextIOBase and return it.\n"
70 "\n"
71 "After the underlying buffer has been detached, the TextIO is in an\n"
72 "unusable state.\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000077{
78 return _unsupported("detach");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Read at most n characters from stream.\n"
83 "\n"
84 "Read from underlying buffer until we have n characters or we hit EOF.\n"
85 "If n is negative or omitted, read until EOF.\n"
86 );
87
88static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000089textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000090{
91 return _unsupported("read");
92}
93
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095 "Read until newline or EOF.\n"
96 "\n"
97 "Returns an empty string if EOF is hit immediately.\n"
98 );
99
100static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000101textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000102{
103 return _unsupported("readline");
104}
105
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107 "Write string to stream.\n"
108 "Returns the number of characters written (which is always equal to\n"
109 "the length of the string).\n"
110 );
111
112static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000113textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000114{
115 return _unsupported("write");
116}
117
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000118PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119 "Encoding of the text stream.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000131 "Line endings translated so far.\n"
132 "\n"
133 "Only line endings translated during reading are considered.\n"
134 "\n"
135 "Subclasses should override.\n"
136 );
137
138static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000139textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140{
141 Py_RETURN_NONE;
142}
143
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000144PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000145 "The error setting of the decoder or encoder.\n"
146 "\n"
147 "Subclasses should override.\n"
148 );
149
150static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000152{
153 Py_RETURN_NONE;
154}
155
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000157static PyMethodDef textiobase_methods[] = {
158 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
159 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
160 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
161 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000162 {NULL, NULL}
163};
164
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165static PyGetSetDef textiobase_getset[] = {
166 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
167 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
168 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000169 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000170};
171
172PyTypeObject PyTextIOBase_Type = {
173 PyVarObject_HEAD_INIT(NULL, 0)
174 "_io._TextIOBase", /*tp_name*/
175 0, /*tp_basicsize*/
176 0, /*tp_itemsize*/
177 0, /*tp_dealloc*/
178 0, /*tp_print*/
179 0, /*tp_getattr*/
180 0, /*tp_setattr*/
181 0, /*tp_compare */
182 0, /*tp_repr*/
183 0, /*tp_as_number*/
184 0, /*tp_as_sequence*/
185 0, /*tp_as_mapping*/
186 0, /*tp_hash */
187 0, /*tp_call*/
188 0, /*tp_str*/
189 0, /*tp_getattro*/
190 0, /*tp_setattro*/
191 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200192 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
193 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000194 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000195 0, /* tp_traverse */
196 0, /* tp_clear */
197 0, /* tp_richcompare */
198 0, /* tp_weaklistoffset */
199 0, /* tp_iter */
200 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000203 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000204 &PyIOBase_Type, /* tp_base */
205 0, /* tp_dict */
206 0, /* tp_descr_get */
207 0, /* tp_descr_set */
208 0, /* tp_dictoffset */
209 0, /* tp_init */
210 0, /* tp_alloc */
211 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200212 0, /* tp_free */
213 0, /* tp_is_gc */
214 0, /* tp_bases */
215 0, /* tp_mro */
216 0, /* tp_cache */
217 0, /* tp_subclasses */
218 0, /* tp_weaklist */
219 0, /* tp_del */
220 0, /* tp_version_tag */
221 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222};
223
224
225/* IncrementalNewlineDecoder */
226
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000227typedef struct {
228 PyObject_HEAD
229 PyObject *decoder;
230 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200231 unsigned int pendingcr: 1;
232 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000233 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000234} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000235
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300236/*[clinic input]
237_io.IncrementalNewlineDecoder.__init__
238 decoder: object
239 translate: int
240 errors: object(c_default="NULL") = "strict"
241
242Codec used when reading a file in universal newlines mode.
243
244It wraps another incremental decoder, translating \r\n and \r into \n.
245It also records the types of newlines encountered. When used with
246translate=False, it ensures that the newline sequence is returned in
247one piece. When used with decoder=None, it expects unicode strings as
248decode input and translates newlines without first invoking an external
249decoder.
250[clinic start generated code]*/
251
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000252static int
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300253_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
254 PyObject *decoder, int translate,
255 PyObject *errors)
256/*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000257{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000258 self->decoder = decoder;
259 Py_INCREF(decoder);
260
261 if (errors == NULL) {
262 self->errors = PyUnicode_FromString("strict");
263 if (self->errors == NULL)
264 return -1;
265 }
266 else {
267 Py_INCREF(errors);
268 self->errors = errors;
269 }
270
271 self->translate = translate;
272 self->seennl = 0;
273 self->pendingcr = 0;
274
275 return 0;
276}
277
278static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000279incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000280{
281 Py_CLEAR(self->decoder);
282 Py_CLEAR(self->errors);
283 Py_TYPE(self)->tp_free((PyObject *)self);
284}
285
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200286static int
287check_decoded(PyObject *decoded)
288{
289 if (decoded == NULL)
290 return -1;
291 if (!PyUnicode_Check(decoded)) {
292 PyErr_Format(PyExc_TypeError,
293 "decoder should return a string result, not '%.200s'",
294 Py_TYPE(decoded)->tp_name);
295 Py_DECREF(decoded);
296 return -1;
297 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200298 if (PyUnicode_READY(decoded) < 0) {
299 Py_DECREF(decoded);
300 return -1;
301 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200302 return 0;
303}
304
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000305#define SEEN_CR 1
306#define SEEN_LF 2
307#define SEEN_CRLF 4
308#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
309
310PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200311_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 PyObject *input, int final)
313{
314 PyObject *output;
315 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200316 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317
318 if (self->decoder == NULL) {
319 PyErr_SetString(PyExc_ValueError,
320 "IncrementalNewlineDecoder.__init__ not called");
321 return NULL;
322 }
323
324 /* decode input (with the eventual \r from a previous pass) */
325 if (self->decoder != Py_None) {
326 output = PyObject_CallMethodObjArgs(self->decoder,
327 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
328 }
329 else {
330 output = input;
331 Py_INCREF(output);
332 }
333
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200334 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 return NULL;
336
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339 /* Prefix output with CR */
340 int kind;
341 PyObject *modified;
342 char *out;
343
344 modified = PyUnicode_New(output_len + 1,
345 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346 if (modified == NULL)
347 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 kind = PyUnicode_KIND(modified);
349 out = PyUnicode_DATA(modified);
350 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200351 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200353 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 self->pendingcr = 0;
355 output_len++;
356 }
357
358 /* retain last \r even when not translating data:
359 * then readline() is sure to get \r\n in one pass
360 */
361 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000362 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200363 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
364 {
365 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
366 if (modified == NULL)
367 goto error;
368 Py_DECREF(output);
369 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000370 self->pendingcr = 1;
371 }
372 }
373
374 /* Record which newlines are read and do newline translation if desired,
375 all in one pass. */
376 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200377 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 Py_ssize_t len;
379 int seennl = self->seennl;
380 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200381 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383 in_str = PyUnicode_DATA(output);
384 len = PyUnicode_GET_LENGTH(output);
385 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386
387 if (len == 0)
388 return output;
389
390 /* If, up to now, newlines are consistently \n, do a quick check
391 for the \r *byte* with the libc's optimized memchr.
392 */
393 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200394 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 }
396
Antoine Pitrou66913e22009-03-06 23:40:56 +0000397 if (only_lf) {
398 /* If not already seen, quick scan for a possible "\n" character.
399 (there's nothing else to be done, even when in translation mode)
400 */
401 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200402 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100403 if (kind == PyUnicode_1BYTE_KIND)
404 seennl |= SEEN_LF;
405 else {
406 Py_ssize_t i = 0;
407 for (;;) {
408 Py_UCS4 c;
409 /* Fast loop for non-control characters */
410 while (PyUnicode_READ(kind, in_str, i) > '\n')
411 i++;
412 c = PyUnicode_READ(kind, in_str, i++);
413 if (c == '\n') {
414 seennl |= SEEN_LF;
415 break;
416 }
417 if (i >= len)
418 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000420 }
421 }
422 /* Finished: we have scanned for newlines, and none of them
423 need translating */
424 }
425 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000427 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 if (seennl == SEEN_ALL)
429 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 while (PyUnicode_READ(kind, in_str, i) > '\r')
434 i++;
435 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 if (c == '\n')
437 seennl |= SEEN_LF;
438 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 }
443 else
444 seennl |= SEEN_CR;
445 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 break;
448 if (seennl == SEEN_ALL)
449 break;
450 }
451 endscan:
452 ;
453 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000454 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 void *translated;
456 int kind = PyUnicode_KIND(output);
457 void *in_str = PyUnicode_DATA(output);
458 Py_ssize_t in, out;
459 /* XXX: Previous in-place translation here is disabled as
460 resizing is not possible anymore */
461 /* We could try to optimize this so that we only do a copy
462 when there is something to translate. On the other hand,
463 we already know there is a \r byte, so chances are high
464 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200465 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200466 if (translated == NULL) {
467 PyErr_NoMemory();
468 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200470 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000471 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200474 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
475 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200477 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 seennl |= SEEN_LF;
479 continue;
480 }
481 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483 in++;
484 seennl |= SEEN_CRLF;
485 }
486 else
487 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200488 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000489 continue;
490 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200491 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200493 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000494 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200495 Py_DECREF(output);
496 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100497 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200498 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200499 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 }
501 self->seennl |= seennl;
502 }
503
504 return output;
505
506 error:
507 Py_DECREF(output);
508 return NULL;
509}
510
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300511/*[clinic input]
512_io.IncrementalNewlineDecoder.decode
513 input: object
514 final: int(c_default="0") = False
515[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300517static PyObject *
518_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
519 PyObject *input, int final)
520/*[clinic end generated code: output=0d486755bb37a66e input=d65677385bfd6827]*/
521{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000522 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
523}
524
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300525/*[clinic input]
526_io.IncrementalNewlineDecoder.getstate
527[clinic start generated code]*/
528
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300530_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
531/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532{
533 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700534 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535
536 if (self->decoder != Py_None) {
537 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
538 _PyIO_str_getstate, NULL);
539 if (state == NULL)
540 return NULL;
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300541 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 Py_DECREF(state);
543 return NULL;
544 }
545 Py_INCREF(buffer);
546 Py_DECREF(state);
547 }
548 else {
549 buffer = PyBytes_FromString("");
550 flag = 0;
551 }
552 flag <<= 1;
553 if (self->pendingcr)
554 flag |= 1;
555 return Py_BuildValue("NK", buffer, flag);
556}
557
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300558/*[clinic input]
559_io.IncrementalNewlineDecoder.setstate
560 state: object
561 /
562[clinic start generated code]*/
563
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000564static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300565_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566 PyObject *state)
567/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000568{
569 PyObject *buffer;
Benjamin Peterson47ff0732016-09-08 09:15:54 -0700570 unsigned long long flag;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000571
Serhiy Storchakabb72c472015-04-19 20:38:19 +0300572 if (!PyArg_ParseTuple(state, "OK", &buffer, &flag))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000573 return NULL;
574
Victor Stinner7d7e7752014-06-17 23:31:25 +0200575 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 flag >>= 1;
577
578 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200579 return _PyObject_CallMethodId(self->decoder,
580 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 else
582 Py_RETURN_NONE;
583}
584
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300585/*[clinic input]
586_io.IncrementalNewlineDecoder.reset
587[clinic start generated code]*/
588
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300590_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
591/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592{
593 self->seennl = 0;
594 self->pendingcr = 0;
595 if (self->decoder != Py_None)
596 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
597 else
598 Py_RETURN_NONE;
599}
600
601static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603{
604 switch (self->seennl) {
605 case SEEN_CR:
606 return PyUnicode_FromString("\r");
607 case SEEN_LF:
608 return PyUnicode_FromString("\n");
609 case SEEN_CRLF:
610 return PyUnicode_FromString("\r\n");
611 case SEEN_CR | SEEN_LF:
612 return Py_BuildValue("ss", "\r", "\n");
613 case SEEN_CR | SEEN_CRLF:
614 return Py_BuildValue("ss", "\r", "\r\n");
615 case SEEN_LF | SEEN_CRLF:
616 return Py_BuildValue("ss", "\n", "\r\n");
617 case SEEN_CR | SEEN_LF | SEEN_CRLF:
618 return Py_BuildValue("sss", "\r", "\n", "\r\n");
619 default:
620 Py_RETURN_NONE;
621 }
622
623}
624
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000625/* TextIOWrapper */
626
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627typedef PyObject *
628 (*encodefunc_t)(PyObject *, PyObject *);
629
630typedef struct
631{
632 PyObject_HEAD
633 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000634 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 Py_ssize_t chunk_size;
636 PyObject *buffer;
637 PyObject *encoding;
638 PyObject *encoder;
639 PyObject *decoder;
640 PyObject *readnl;
641 PyObject *errors;
642 const char *writenl; /* utf-8 encoded, NULL stands for \n */
643 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200644 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 char readuniversal;
646 char readtranslate;
647 char writetranslate;
648 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200649 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200651 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 /* Specialized encoding func (see below) */
653 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000654 /* Whether or not it's the start of the stream */
655 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656
657 /* Reads and writes are internally buffered in order to speed things up.
658 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000659
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 Please also note that text to be written is first encoded before being
661 buffered. This is necessary so that encoding errors are immediately
662 reported to the caller, but it unfortunately means that the
663 IncrementalEncoder (whose encode() method is always written in Python)
664 becomes a bottleneck for small writes.
665 */
666 PyObject *decoded_chars; /* buffer for text returned from decoder */
667 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
668 PyObject *pending_bytes; /* list of bytes objects waiting to be
669 written, or NULL */
670 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000671
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672 /* snapshot is either None, or a tuple (dec_flags, next_input) where
673 * dec_flags is the second (integer) item of the decoder state and
674 * next_input is the chunk of input bytes that comes next after the
675 * snapshot point. We use this to reconstruct decoder states in tell().
676 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000677 PyObject *snapshot;
678 /* Bytes-to-characters ratio for the current chunk. Serves as input for
679 the heuristic in tell(). */
680 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 /* Cache raw object if it's a FileIO object */
683 PyObject *raw;
684
685 PyObject *weakreflist;
686 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000687} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689/* A couple of specialized cases in order to bypass the slow incremental
690 encoding methods for the most popular encodings. */
691
692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000693ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696}
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100701 return _PyUnicode_EncodeUTF16(text,
702 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703}
704
705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000706utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100708 return _PyUnicode_EncodeUTF16(text,
709 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710}
711
712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714{
Antoine Pitroue4501852009-05-14 18:55:55 +0000715 if (!self->encoding_start_of_stream) {
716 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200717#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000718 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000720 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000722 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100723 return _PyUnicode_EncodeUTF16(text,
724 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725}
726
Antoine Pitroue4501852009-05-14 18:55:55 +0000727static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000728utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000729{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100730 return _PyUnicode_EncodeUTF32(text,
731 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000732}
733
734static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000735utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000736{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100737 return _PyUnicode_EncodeUTF32(text,
738 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743{
744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200746#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf32be_encode(self, text);
748#else
749 return utf32le_encode(self, text);
750#endif
751 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100752 return _PyUnicode_EncodeUTF32(text,
753 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000754}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200759 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766}
767
768/* Map normalized encoding names onto the specialized encoding funcs */
769
770typedef struct {
771 const char *name;
772 encodefunc_t encodefunc;
773} encodefuncentry;
774
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200775static const encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776 {"ascii", (encodefunc_t) ascii_encode},
777 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000778 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779 {"utf-16-be", (encodefunc_t) utf16be_encode},
780 {"utf-16-le", (encodefunc_t) utf16le_encode},
781 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000782 {"utf-32-be", (encodefunc_t) utf32be_encode},
783 {"utf-32-le", (encodefunc_t) utf32le_encode},
784 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 {NULL, NULL}
786};
787
788
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300789/*[clinic input]
790_io.TextIOWrapper.__init__
791 buffer: object
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700792 encoding: str(accept={str, NoneType}) = NULL
793 errors: str(accept={str, NoneType}) = NULL
794 newline: str(accept={str, NoneType}) = NULL
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300795 line_buffering: int(c_default="0") = False
796 write_through: int(c_default="0") = False
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300798Character and line based layer over a BufferedIOBase object, buffer.
799
800encoding gives the name of the encoding that the stream will be
801decoded or encoded with. It defaults to locale.getpreferredencoding(False).
802
803errors determines the strictness of encoding and decoding (see
804help(codecs.Codec) or the documentation for codecs.register) and
805defaults to "strict".
806
807newline controls how line endings are handled. It can be None, '',
808'\n', '\r', and '\r\n'. It works as follows:
809
810* On input, if newline is None, universal newlines mode is
811 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
812 these are translated into '\n' before being returned to the
813 caller. If it is '', universal newline mode is enabled, but line
814 endings are returned to the caller untranslated. If it has any of
815 the other legal values, input lines are only terminated by the given
816 string, and the line ending is returned to the caller untranslated.
817
818* On output, if newline is None, any '\n' characters written are
819 translated to the system default line separator, os.linesep. If
820 newline is '' or '\n', no translation takes place. If newline is any
821 of the other legal values, any '\n' characters written are translated
822 to the given string.
823
824If line_buffering is True, a call to flush is implied when a call to
825write contains a newline character.
826[clinic start generated code]*/
827
828static int
829_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
830 const char *encoding, const char *errors,
831 const char *newline, int line_buffering,
832 int write_through)
Larry Hastingsdbfdc382015-05-04 06:59:46 -0700833/*[clinic end generated code: output=56a83402ce2a8381 input=3126cb3101a2c99b]*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +0300834{
835 PyObject *raw, *codec_info = NULL;
836 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 PyObject *res;
838 int r;
839
840 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000841 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842
843 if (newline && newline[0] != '\0'
844 && !(newline[0] == '\n' && newline[1] == '\0')
845 && !(newline[0] == '\r' && newline[1] == '\0')
846 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
847 PyErr_Format(PyExc_ValueError,
848 "illegal newline value: %s", newline);
849 return -1;
850 }
851
852 Py_CLEAR(self->buffer);
853 Py_CLEAR(self->encoding);
854 Py_CLEAR(self->encoder);
855 Py_CLEAR(self->decoder);
856 Py_CLEAR(self->readnl);
857 Py_CLEAR(self->decoded_chars);
858 Py_CLEAR(self->pending_bytes);
859 Py_CLEAR(self->snapshot);
860 Py_CLEAR(self->errors);
861 Py_CLEAR(self->raw);
862 self->decoded_chars_used = 0;
863 self->pending_bytes_count = 0;
864 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000865 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866
867 if (encoding == NULL) {
868 /* Try os.device_encoding(fileno) */
869 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100870 state = IO_STATE();
871 if (state == NULL)
872 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200873 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000874 /* Ignore only AttributeError and UnsupportedOperation */
875 if (fileno == NULL) {
876 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
877 PyErr_ExceptionMatches(state->unsupported_operation)) {
878 PyErr_Clear();
879 }
880 else {
881 goto error;
882 }
883 }
884 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200885 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500886 Py_DECREF(fileno);
887 if (fd == -1 && PyErr_Occurred()) {
888 goto error;
889 }
890
891 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892 if (self->encoding == NULL)
893 goto error;
894 else if (!PyUnicode_Check(self->encoding))
895 Py_CLEAR(self->encoding);
896 }
897 }
898 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200899 PyObject *locale_module = _PyIO_get_locale_module(state);
900 if (locale_module == NULL)
901 goto catch_ImportError;
902 self->encoding = _PyObject_CallMethodId(
903 locale_module, &PyId_getpreferredencoding, "O", Py_False);
904 Py_DECREF(locale_module);
905 if (self->encoding == NULL) {
906 catch_ImportError:
907 /*
Martin Panter7462b6492015-11-02 03:37:02 +0000908 Importing locale can raise an ImportError because of
909 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +0200910 ImportError if _locale is not available. These will happen
911 during module building.
912 */
913 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914 PyErr_Clear();
915 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000916 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200917 else
918 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000923 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000925 if (encoding == NULL)
926 goto error;
927 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 else if (encoding != NULL) {
929 self->encoding = PyUnicode_FromString(encoding);
930 if (self->encoding == NULL)
931 goto error;
932 }
933 else {
934 PyErr_SetString(PyExc_IOError,
935 "could not determine default encoding");
936 }
937
Nick Coghlana9b15242014-02-04 22:11:18 +1000938 /* Check we have been asked for a real text encoding */
939 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
940 if (codec_info == NULL) {
941 Py_CLEAR(self->encoding);
942 goto error;
943 }
944
945 /* XXX: Failures beyond this point have the potential to leak elements
946 * of the partially constructed object (like self->encoding)
947 */
948
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 if (errors == NULL)
950 errors = "strict";
951 self->errors = PyBytes_FromString(errors);
952 if (self->errors == NULL)
953 goto error;
954
955 self->chunk_size = 8192;
956 self->readuniversal = (newline == NULL || newline[0] == '\0');
957 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200958 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000959 self->readtranslate = (newline == NULL);
960 if (newline) {
961 self->readnl = PyUnicode_FromString(newline);
962 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 }
965 self->writetranslate = (newline == NULL || newline[0] != '\0');
966 if (!self->readuniversal && self->readnl) {
967 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000968 if (self->writenl == NULL)
969 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 if (!strcmp(self->writenl, "\n"))
971 self->writenl = NULL;
972 }
973#ifdef MS_WINDOWS
974 else
975 self->writenl = "\r\n";
976#endif
977
978 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200979 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 if (res == NULL)
981 goto error;
982 r = PyObject_IsTrue(res);
983 Py_DECREF(res);
984 if (r == -1)
985 goto error;
986 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +1000987 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
988 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if (self->decoder == NULL)
990 goto error;
991
992 if (self->readuniversal) {
993 PyObject *incrementalDecoder = PyObject_CallFunction(
994 (PyObject *)&PyIncrementalNewlineDecoder_Type,
995 "Oi", self->decoder, (int)self->readtranslate);
996 if (incrementalDecoder == NULL)
997 goto error;
Serhiy Storchaka48842712016-04-06 09:45:48 +0300998 Py_XSETREF(self->decoder, incrementalDecoder);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 }
1000 }
1001
1002 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001003 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001004 if (res == NULL)
1005 goto error;
1006 r = PyObject_IsTrue(res);
1007 Py_DECREF(res);
1008 if (r == -1)
1009 goto error;
1010 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001011 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1012 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if (self->encoder == NULL)
1014 goto error;
1015 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001016 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001024 const encodefuncentry *e = encodefuncs;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001025 while (e->name != NULL) {
Serhiy Storchakaf4934ea2016-11-16 10:17:58 +02001026 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
Nick Coghlana9b15242014-02-04 22:11:18 +10001036 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001037 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 self->buffer = buffer;
1040 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1043 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1044 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001045 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001047 if (raw == NULL) {
1048 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1049 PyErr_Clear();
1050 else
1051 goto error;
1052 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 else if (Py_TYPE(raw) == &PyFileIO_Type)
1054 self->raw = raw;
1055 else
1056 Py_DECREF(raw);
1057 }
1058
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001059 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 if (res == NULL)
1061 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001062 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001064 if (r < 0)
1065 goto error;
1066 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067
Martin v. Löwis767046a2011-10-14 15:35:36 +02001068 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001069
Antoine Pitroue4501852009-05-14 18:55:55 +00001070 self->encoding_start_of_stream = 0;
1071 if (self->seekable && self->encoder) {
1072 PyObject *cookieObj;
1073 int cmp;
1074
1075 self->encoding_start_of_stream = 1;
1076
1077 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1078 if (cookieObj == NULL)
1079 goto error;
1080
1081 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1082 Py_DECREF(cookieObj);
1083 if (cmp < 0) {
1084 goto error;
1085 }
1086
1087 if (cmp == 0) {
1088 self->encoding_start_of_stream = 0;
1089 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1090 _PyIO_zero, NULL);
1091 if (res == NULL)
1092 goto error;
1093 Py_DECREF(res);
1094 }
1095 }
1096
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 self->ok = 1;
1098 return 0;
1099
1100 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001101 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001102 return -1;
1103}
1104
1105static int
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001106textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108 self->ok = 0;
1109 Py_CLEAR(self->buffer);
1110 Py_CLEAR(self->encoding);
1111 Py_CLEAR(self->encoder);
1112 Py_CLEAR(self->decoder);
1113 Py_CLEAR(self->readnl);
1114 Py_CLEAR(self->decoded_chars);
1115 Py_CLEAR(self->pending_bytes);
1116 Py_CLEAR(self->snapshot);
1117 Py_CLEAR(self->errors);
1118 Py_CLEAR(self->raw);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001119
1120 Py_CLEAR(self->dict);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121 return 0;
1122}
1123
1124static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001125textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001126{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001127 self->finalizing = 1;
1128 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 return;
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001130 self->ok = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001131 _PyObject_GC_UNTRACK(self);
1132 if (self->weakreflist != NULL)
1133 PyObject_ClearWeakRefs((PyObject *)self);
Serhiy Storchakaa7c972e2016-11-03 15:37:01 +02001134 textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 Py_TYPE(self)->tp_free((PyObject *)self);
1136}
1137
1138static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001139textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140{
1141 Py_VISIT(self->buffer);
1142 Py_VISIT(self->encoding);
1143 Py_VISIT(self->encoder);
1144 Py_VISIT(self->decoder);
1145 Py_VISIT(self->readnl);
1146 Py_VISIT(self->decoded_chars);
1147 Py_VISIT(self->pending_bytes);
1148 Py_VISIT(self->snapshot);
1149 Py_VISIT(self->errors);
1150 Py_VISIT(self->raw);
1151
1152 Py_VISIT(self->dict);
1153 return 0;
1154}
1155
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001157textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158
1159/* This macro takes some shortcuts to make the common case faster. */
1160#define CHECK_CLOSED(self) \
1161 do { \
1162 int r; \
1163 PyObject *_res; \
1164 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1165 if (self->raw != NULL) \
1166 r = _PyFileIO_closed(self->raw); \
1167 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001168 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001169 if (_res == NULL) \
1170 return NULL; \
1171 r = PyObject_IsTrue(_res); \
1172 Py_DECREF(_res); \
1173 if (r < 0) \
1174 return NULL; \
1175 } \
1176 if (r > 0) { \
1177 PyErr_SetString(PyExc_ValueError, \
1178 "I/O operation on closed file."); \
1179 return NULL; \
1180 } \
1181 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 return NULL; \
1184 } while (0)
1185
1186#define CHECK_INITIALIZED(self) \
1187 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001188 PyErr_SetString(PyExc_ValueError, \
1189 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001190 return NULL; \
1191 }
1192
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001193#define CHECK_ATTACHED(self) \
1194 CHECK_INITIALIZED(self); \
1195 if (self->detached) { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "underlying buffer has been detached"); \
1198 return NULL; \
1199 }
1200
1201#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001203 PyErr_SetString(PyExc_ValueError, \
1204 "I/O operation on uninitialized object"); \
1205 return -1; \
1206 } else if (self->detached) { \
1207 PyErr_SetString(PyExc_ValueError, \
1208 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 return -1; \
1210 }
1211
1212
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001213/*[clinic input]
1214_io.TextIOWrapper.detach
1215[clinic start generated code]*/
1216
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001217static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001218_io_TextIOWrapper_detach_impl(textio *self)
1219/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001220{
1221 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001222 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001223 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1224 if (res == NULL)
1225 return NULL;
1226 Py_DECREF(res);
1227 buffer = self->buffer;
1228 self->buffer = NULL;
1229 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001230 return buffer;
1231}
1232
Antoine Pitrou24f36292009-03-28 22:16:42 +00001233/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234 underlying buffered object, though. */
1235static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001236_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001238 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001239
1240 if (self->pending_bytes == NULL)
1241 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001242
1243 pending = self->pending_bytes;
1244 Py_INCREF(pending);
1245 self->pending_bytes_count = 0;
1246 Py_CLEAR(self->pending_bytes);
1247
1248 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1249 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250 if (b == NULL)
1251 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001252 ret = NULL;
1253 do {
1254 ret = PyObject_CallMethodObjArgs(self->buffer,
1255 _PyIO_str_write, b, NULL);
1256 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 Py_DECREF(b);
1258 if (ret == NULL)
1259 return -1;
1260 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261 return 0;
1262}
1263
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001264/*[clinic input]
1265_io.TextIOWrapper.write
1266 text: unicode
1267 /
1268[clinic start generated code]*/
1269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001271_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1272/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273{
1274 PyObject *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275 PyObject *b;
1276 Py_ssize_t textlen;
1277 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001278 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001280 if (PyUnicode_READY(text) == -1)
1281 return NULL;
1282
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001283 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 CHECK_CLOSED(self);
1285
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001286 if (self->encoder == NULL)
1287 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001288
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 Py_INCREF(text);
1290
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292
1293 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295 haslf = 1;
1296
1297 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001298 PyObject *newtext = _PyObject_CallMethodId(
1299 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300 Py_DECREF(text);
1301 if (newtext == NULL)
1302 return NULL;
1303 text = newtext;
1304 }
1305
Antoine Pitroue96ec682011-07-23 21:46:35 +02001306 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001307 text_needflush = 1;
1308 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001310 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001311 needflush = 1;
1312
1313 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001314 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001315 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001316 self->encoding_start_of_stream = 0;
1317 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 else
1319 b = PyObject_CallMethodObjArgs(self->encoder,
1320 _PyIO_str_encode, text, NULL);
1321 Py_DECREF(text);
1322 if (b == NULL)
1323 return NULL;
1324
1325 if (self->pending_bytes == NULL) {
1326 self->pending_bytes = PyList_New(0);
1327 if (self->pending_bytes == NULL) {
1328 Py_DECREF(b);
1329 return NULL;
1330 }
1331 self->pending_bytes_count = 0;
1332 }
1333 if (PyList_Append(self->pending_bytes, b) < 0) {
1334 Py_DECREF(b);
1335 return NULL;
1336 }
1337 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1338 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001339 if (self->pending_bytes_count > self->chunk_size || needflush ||
1340 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001341 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 return NULL;
1343 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001344
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345 if (needflush) {
1346 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1347 if (ret == NULL)
1348 return NULL;
1349 Py_DECREF(ret);
1350 }
1351
1352 Py_CLEAR(self->snapshot);
1353
1354 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001355 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356 if (ret == NULL)
1357 return NULL;
1358 Py_DECREF(ret);
1359 }
1360
1361 return PyLong_FromSsize_t(textlen);
1362}
1363
1364/* Steal a reference to chars and store it in the decoded_char buffer;
1365 */
1366static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001367textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368{
Serhiy Storchaka48842712016-04-06 09:45:48 +03001369 Py_XSETREF(self->decoded_chars, chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 self->decoded_chars_used = 0;
1371}
1372
1373static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001374textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375{
1376 PyObject *chars;
1377 Py_ssize_t avail;
1378
1379 if (self->decoded_chars == NULL)
1380 return PyUnicode_FromStringAndSize(NULL, 0);
1381
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001382 /* decoded_chars is guaranteed to be "ready". */
1383 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 - self->decoded_chars_used);
1385
1386 assert(avail >= 0);
1387
1388 if (n < 0 || n > avail)
1389 n = avail;
1390
1391 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001392 chars = PyUnicode_Substring(self->decoded_chars,
1393 self->decoded_chars_used,
1394 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395 if (chars == NULL)
1396 return NULL;
1397 }
1398 else {
1399 chars = self->decoded_chars;
1400 Py_INCREF(chars);
1401 }
1402
1403 self->decoded_chars_used += n;
1404 return chars;
1405}
1406
1407/* Read and decode the next chunk of data from the BufferedReader.
1408 */
1409static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001410textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411{
1412 PyObject *dec_buffer = NULL;
1413 PyObject *dec_flags = NULL;
1414 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001415 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001417 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418 int eof;
1419
1420 /* The return value is True unless EOF was reached. The decoded string is
1421 * placed in self._decoded_chars (replacing its previous value). The
1422 * entire input chunk is sent to the decoder, though some of it may remain
1423 * buffered in the decoder, yet to be converted.
1424 */
1425
1426 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001427 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001428 return -1;
1429 }
1430
1431 if (self->telling) {
1432 /* To prepare for tell(), we need to snapshot a point in the file
1433 * where the decoder's input buffer is empty.
1434 */
1435
1436 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1437 _PyIO_str_getstate, NULL);
1438 if (state == NULL)
1439 return -1;
1440 /* Given this, we know there was a valid snapshot point
1441 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1442 */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03001443 if (PyArg_ParseTuple(state, "OO", &dec_buffer, &dec_flags) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 Py_DECREF(state);
1445 return -1;
1446 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001447
1448 if (!PyBytes_Check(dec_buffer)) {
1449 PyErr_Format(PyExc_TypeError,
1450 "decoder getstate() should have returned a bytes "
1451 "object, not '%.200s'",
1452 Py_TYPE(dec_buffer)->tp_name);
1453 Py_DECREF(state);
1454 return -1;
1455 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 Py_INCREF(dec_buffer);
1457 Py_INCREF(dec_flags);
1458 Py_DECREF(state);
1459 }
1460
1461 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001462 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001463 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001464 }
1465 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466 if (chunk_size == NULL)
1467 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001468
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001469 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001470 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1471 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001472 Py_DECREF(chunk_size);
1473 if (input_chunk == NULL)
1474 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001475
1476 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001477 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001478 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001479 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1480 Py_TYPE(input_chunk)->tp_name);
1481 goto fail;
1482 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483
Antoine Pitroub8503892014-04-29 10:14:02 +02001484 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001485 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1487 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1488 self->decoder, input_chunk, eof);
1489 }
1490 else {
1491 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1492 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1493 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001494 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001496 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001497 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001498 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001499 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001500 if (nchars > 0)
1501 self->b2cratio = (double) nbytes / nchars;
1502 else
1503 self->b2cratio = 0.0;
1504 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 eof = 0;
1506
1507 if (self->telling) {
1508 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1509 * next input to be decoded is dec_buffer + input_chunk.
1510 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001511 PyObject *next_input = dec_buffer;
1512 PyBytes_Concat(&next_input, input_chunk);
1513 if (next_input == NULL) {
1514 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001515 goto fail;
1516 }
Serhiy Storchaka48842712016-04-06 09:45:48 +03001517 Py_XSETREF(self->snapshot, Py_BuildValue("NN", dec_flags, next_input));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 }
1519 Py_DECREF(input_chunk);
1520
1521 return (eof == 0);
1522
1523 fail:
1524 Py_XDECREF(dec_buffer);
1525 Py_XDECREF(dec_flags);
1526 Py_XDECREF(input_chunk);
1527 return -1;
1528}
1529
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001530/*[clinic input]
1531_io.TextIOWrapper.read
1532 size as n: io_ssize_t = -1
1533 /
1534[clinic start generated code]*/
1535
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001537_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1538/*[clinic end generated code: output=7e651ce6cc6a25a6 input=8c09398424085cca]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001540 PyObject *result = NULL, *chunks = NULL;
1541
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001542 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543 CHECK_CLOSED(self);
1544
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001545 if (self->decoder == NULL)
1546 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 return NULL;
1550
1551 if (n < 0) {
1552 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001553 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 PyObject *decoded;
1555 if (bytes == NULL)
1556 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001557
1558 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1559 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1560 bytes, 1);
1561 else
1562 decoded = PyObject_CallMethodObjArgs(
1563 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001565 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001566 goto fail;
1567
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001568 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569
1570 if (result == NULL) {
1571 Py_DECREF(decoded);
1572 return NULL;
1573 }
1574
1575 PyUnicode_AppendAndDel(&result, decoded);
1576 if (result == NULL)
1577 goto fail;
1578
1579 Py_CLEAR(self->snapshot);
1580 return result;
1581 }
1582 else {
1583 int res = 1;
1584 Py_ssize_t remaining = n;
1585
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001586 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 if (result == NULL)
1588 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001589 if (PyUnicode_READY(result) == -1)
1590 goto fail;
1591 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592
1593 /* Keep reading chunks until we have n characters to return */
1594 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001595 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001596 if (res < 0) {
1597 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1598 when EINTR occurs so we needn't do it ourselves. */
1599 if (_PyIO_trap_eintr()) {
1600 continue;
1601 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001603 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 if (res == 0) /* EOF */
1605 break;
1606 if (chunks == NULL) {
1607 chunks = PyList_New(0);
1608 if (chunks == NULL)
1609 goto fail;
1610 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001611 if (PyUnicode_GET_LENGTH(result) > 0 &&
1612 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 goto fail;
1614 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001615 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 if (result == NULL)
1617 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619 }
1620 if (chunks != NULL) {
1621 if (result != NULL && PyList_Append(chunks, result) < 0)
1622 goto fail;
Serhiy Storchaka48842712016-04-06 09:45:48 +03001623 Py_XSETREF(result, PyUnicode_Join(_PyIO_empty_str, chunks));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624 if (result == NULL)
1625 goto fail;
1626 Py_CLEAR(chunks);
1627 }
1628 return result;
1629 }
1630 fail:
1631 Py_XDECREF(result);
1632 Py_XDECREF(chunks);
1633 return NULL;
1634}
1635
1636
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001637/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 that is to the NUL character. Otherwise the function will produce
1639 incorrect results. */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001640static const char *
1641find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001643 if (kind == PyUnicode_1BYTE_KIND) {
1644 assert(ch < 256);
1645 return (char *) memchr((void *) s, (char) ch, end - s);
1646 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001648 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001649 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001650 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001651 return s;
1652 if (s == end)
1653 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001654 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001655 }
1656}
1657
1658Py_ssize_t
1659_PyIO_find_line_ending(
1660 int translated, int universal, PyObject *readnl,
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001661 int kind, const char *start, const char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001663 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664
1665 if (translated) {
1666 /* Newlines are already translated, only search for \n */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001667 const char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001668 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001669 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 else {
1671 *consumed = len;
1672 return -1;
1673 }
1674 }
1675 else if (universal) {
1676 /* Universal newline search. Find any of \r, \r\n, \n
1677 * The decoder ensures that \r\n are not split in two pieces
1678 */
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001679 const char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001681 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001683 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001684 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001685 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 if (s >= end) {
1687 *consumed = len;
1688 return -1;
1689 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001691 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001693 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001696 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 }
1700 }
1701 }
1702 else {
1703 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001704 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001705 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001706 /* Assume that readnl is an ASCII character. */
1707 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 if (readnl_len == 1) {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001709 const char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001711 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 *consumed = len;
1713 return -1;
1714 }
1715 else {
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001716 const char *s = start;
1717 const char *e = end - (readnl_len - 1)*kind;
1718 const char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 if (e < s)
1720 e = s;
1721 while (s < e) {
1722 Py_ssize_t i;
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02001723 const char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724 if (pos == NULL || pos >= e)
1725 break;
1726 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 break;
1729 }
1730 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001731 return (pos - start)/kind + readnl_len;
1732 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001733 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 if (pos == NULL)
1736 *consumed = len;
1737 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001738 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 return -1;
1740 }
1741 }
1742}
1743
1744static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001745_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746{
1747 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1748 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1749 int res;
1750
1751 CHECK_CLOSED(self);
1752
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001753 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 return NULL;
1755
1756 chunked = 0;
1757
1758 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001759 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001760 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 Py_ssize_t consumed = 0;
1763
1764 /* First, get some data if necessary */
1765 res = 1;
1766 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001767 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001768 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001769 if (res < 0) {
1770 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1771 when EINTR occurs so we needn't do it ourselves. */
1772 if (_PyIO_trap_eintr()) {
1773 continue;
1774 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001776 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 if (res == 0)
1778 break;
1779 }
1780 if (res == 0) {
1781 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001782 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 Py_CLEAR(self->snapshot);
1784 start = endpos = offset_to_buffer = 0;
1785 break;
1786 }
1787
1788 if (remaining == NULL) {
1789 line = self->decoded_chars;
1790 start = self->decoded_chars_used;
1791 offset_to_buffer = 0;
1792 Py_INCREF(line);
1793 }
1794 else {
1795 assert(self->decoded_chars_used == 0);
1796 line = PyUnicode_Concat(remaining, self->decoded_chars);
1797 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001798 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 Py_CLEAR(remaining);
1800 if (line == NULL)
1801 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001802 if (PyUnicode_READY(line) == -1)
1803 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 }
1805
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001806 ptr = PyUnicode_DATA(line);
1807 line_len = PyUnicode_GET_LENGTH(line);
1808 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809
1810 endpos = _PyIO_find_line_ending(
1811 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001812 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001813 ptr + kind * start,
1814 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001815 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 if (endpos >= 0) {
1817 endpos += start;
1818 if (limit >= 0 && (endpos - start) + chunked >= limit)
1819 endpos = start + limit - chunked;
1820 break;
1821 }
1822
1823 /* We can put aside up to `endpos` */
1824 endpos = consumed + start;
1825 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1826 /* Didn't find line ending, but reached length limit */
1827 endpos = start + limit - chunked;
1828 break;
1829 }
1830
1831 if (endpos > start) {
1832 /* No line ending seen yet - put aside current data */
1833 PyObject *s;
1834 if (chunks == NULL) {
1835 chunks = PyList_New(0);
1836 if (chunks == NULL)
1837 goto error;
1838 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001839 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001840 if (s == NULL)
1841 goto error;
1842 if (PyList_Append(chunks, s) < 0) {
1843 Py_DECREF(s);
1844 goto error;
1845 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847 Py_DECREF(s);
1848 }
1849 /* There may be some remaining bytes we'll have to prepend to the
1850 next chunk of data */
1851 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001852 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853 if (remaining == NULL)
1854 goto error;
1855 }
1856 Py_CLEAR(line);
1857 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001858 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 }
1860
1861 if (line != NULL) {
1862 /* Our line ends in the current buffer */
1863 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001864 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1865 PyObject *s = PyUnicode_Substring(line, start, endpos);
1866 Py_CLEAR(line);
1867 if (s == NULL)
1868 goto error;
1869 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 }
1871 }
1872 if (remaining != NULL) {
1873 if (chunks == NULL) {
1874 chunks = PyList_New(0);
1875 if (chunks == NULL)
1876 goto error;
1877 }
1878 if (PyList_Append(chunks, remaining) < 0)
1879 goto error;
1880 Py_CLEAR(remaining);
1881 }
1882 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001883 if (line != NULL) {
1884 if (PyList_Append(chunks, line) < 0)
1885 goto error;
1886 Py_DECREF(line);
1887 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1889 if (line == NULL)
1890 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001891 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001892 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001893 if (line == NULL) {
1894 Py_INCREF(_PyIO_empty_str);
1895 line = _PyIO_empty_str;
1896 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897
1898 return line;
1899
1900 error:
1901 Py_XDECREF(chunks);
1902 Py_XDECREF(remaining);
1903 Py_XDECREF(line);
1904 return NULL;
1905}
1906
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001907/*[clinic input]
1908_io.TextIOWrapper.readline
1909 size: Py_ssize_t = -1
1910 /
1911[clinic start generated code]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001913static PyObject *
1914_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
1915/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
1916{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001917 CHECK_ATTACHED(self);
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03001918 return _textiowrapper_readline(self, size);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919}
1920
1921/* Seek and Tell */
1922
1923typedef struct {
1924 Py_off_t start_pos;
1925 int dec_flags;
1926 int bytes_to_feed;
1927 int chars_to_skip;
1928 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001929} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930
1931/*
1932 To speed up cookie packing/unpacking, we store the fields in a temporary
1933 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1934 The following macros define at which offsets in the intermediary byte
1935 string the various CookieStruct fields will be stored.
1936 */
1937
1938#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1939
Christian Heimes743e0cd2012-10-17 23:52:17 +02001940#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941/* We want the least significant byte of start_pos to also be the least
1942 significant byte of the cookie, which means that in big-endian mode we
1943 must copy the fields in reverse order. */
1944
1945# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1946# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1947# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1948# define OFF_CHARS_TO_SKIP (sizeof(char))
1949# define OFF_NEED_EOF 0
1950
1951#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952/* Little-endian mode: the least significant byte of start_pos will
1953 naturally end up the least significant byte of the cookie. */
1954
1955# define OFF_START_POS 0
1956# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1957# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1958# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1959# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1960
1961#endif
1962
1963static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001964textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965{
1966 unsigned char buffer[COOKIE_BUF_LEN];
1967 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1968 if (cookieLong == NULL)
1969 return -1;
1970
1971 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001972 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973 Py_DECREF(cookieLong);
1974 return -1;
1975 }
1976 Py_DECREF(cookieLong);
1977
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001978 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1979 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1980 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1981 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1982 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983
1984 return 0;
1985}
1986
1987static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001988textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989{
1990 unsigned char buffer[COOKIE_BUF_LEN];
1991
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001992 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1993 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1994 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1995 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1996 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997
Christian Heimes743e0cd2012-10-17 23:52:17 +02001998 return _PyLong_FromByteArray(buffer, sizeof(buffer),
1999 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001
2002static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002003_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004{
2005 PyObject *res;
2006 /* When seeking to the start of the stream, we call decoder.reset()
2007 rather than decoder.getstate().
2008 This is for a few decoders such as utf-16 for which the state value
2009 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2010 utf-16, that we are expecting a BOM).
2011 */
2012 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2013 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2014 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002015 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2016 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 if (res == NULL)
2018 return -1;
2019 Py_DECREF(res);
2020 return 0;
2021}
2022
Antoine Pitroue4501852009-05-14 18:55:55 +00002023static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002024_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002025{
2026 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002027 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002028 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2029 self->encoding_start_of_stream = 1;
2030 }
2031 else {
2032 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2033 _PyIO_zero, NULL);
2034 self->encoding_start_of_stream = 0;
2035 }
2036 if (res == NULL)
2037 return -1;
2038 Py_DECREF(res);
2039 return 0;
2040}
2041
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002042static int
2043_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2044{
2045 /* Same as _textiowrapper_decoder_setstate() above. */
2046 return _textiowrapper_encoder_reset(
2047 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2048}
2049
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002050/*[clinic input]
2051_io.TextIOWrapper.seek
2052 cookie as cookieObj: object
2053 whence: int = 0
2054 /
2055[clinic start generated code]*/
2056
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002058_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2059/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060{
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002061 PyObject *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002062 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 PyObject *res;
2064 int cmp;
2065
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002066 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 CHECK_CLOSED(self);
2068
2069 Py_INCREF(cookieObj);
2070
2071 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002072 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 goto fail;
2074 }
2075
2076 if (whence == 1) {
2077 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 if (cmp < 0)
2080 goto fail;
2081
2082 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002083 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002084 goto fail;
2085 }
2086
2087 /* Seeking to the current position should attempt to
2088 * sync the underlying buffer with the current position.
2089 */
2090 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002091 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 if (cookieObj == NULL)
2093 goto fail;
2094 }
2095 else if (whence == 2) {
2096 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002097 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 if (cmp < 0)
2099 goto fail;
2100
2101 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002102 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 goto fail;
2104 }
2105
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002106 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 if (res == NULL)
2108 goto fail;
2109 Py_DECREF(res);
2110
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002111 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002112 Py_CLEAR(self->snapshot);
2113 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002114 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 if (res == NULL)
2116 goto fail;
2117 Py_DECREF(res);
2118 }
2119
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002120 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002121 Py_CLEAR(cookieObj);
2122 if (res == NULL)
2123 goto fail;
2124 if (self->encoder) {
2125 /* If seek() == 0, we are at the start of stream, otherwise not */
2126 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2127 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2128 Py_DECREF(res);
2129 goto fail;
2130 }
2131 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 return res;
2133 }
2134 else if (whence != 0) {
2135 PyErr_Format(PyExc_ValueError,
2136 "invalid whence (%d, should be 0, 1 or 2)", whence);
2137 goto fail;
2138 }
2139
Antoine Pitroue4501852009-05-14 18:55:55 +00002140 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002141 if (cmp < 0)
2142 goto fail;
2143
2144 if (cmp == 1) {
2145 PyErr_Format(PyExc_ValueError,
2146 "negative seek position %R", cookieObj);
2147 goto fail;
2148 }
2149
2150 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2151 if (res == NULL)
2152 goto fail;
2153 Py_DECREF(res);
2154
2155 /* The strategy of seek() is to go back to the safe start point
2156 * and replay the effect of read(chars_to_skip) from there.
2157 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002158 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002159 goto fail;
2160
2161 /* Seek back to the safe start point. */
2162 posobj = PyLong_FromOff_t(cookie.start_pos);
2163 if (posobj == NULL)
2164 goto fail;
2165 res = PyObject_CallMethodObjArgs(self->buffer,
2166 _PyIO_str_seek, posobj, NULL);
2167 Py_DECREF(posobj);
2168 if (res == NULL)
2169 goto fail;
2170 Py_DECREF(res);
2171
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002172 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 Py_CLEAR(self->snapshot);
2174
2175 /* Restore the decoder to its state from the safe start point. */
2176 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002177 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002178 goto fail;
2179 }
2180
2181 if (cookie.chars_to_skip) {
2182 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002183 PyObject *input_chunk = _PyObject_CallMethodId(
2184 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185 PyObject *decoded;
2186
2187 if (input_chunk == NULL)
2188 goto fail;
2189
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002190 if (!PyBytes_Check(input_chunk)) {
2191 PyErr_Format(PyExc_TypeError,
2192 "underlying read() should have returned a bytes "
2193 "object, not '%.200s'",
2194 Py_TYPE(input_chunk)->tp_name);
2195 Py_DECREF(input_chunk);
2196 goto fail;
2197 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002198
2199 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2200 if (self->snapshot == NULL) {
2201 Py_DECREF(input_chunk);
2202 goto fail;
2203 }
2204
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002205 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2206 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002207
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002208 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 goto fail;
2210
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002211 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002212
2213 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002214 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2216 goto fail;
2217 }
2218 self->decoded_chars_used = cookie.chars_to_skip;
2219 }
2220 else {
2221 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2222 if (self->snapshot == NULL)
2223 goto fail;
2224 }
2225
Antoine Pitroue4501852009-05-14 18:55:55 +00002226 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2227 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002228 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002229 goto fail;
2230 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 return cookieObj;
2232 fail:
2233 Py_XDECREF(cookieObj);
2234 return NULL;
2235
2236}
2237
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002238/*[clinic input]
2239_io.TextIOWrapper.tell
2240[clinic start generated code]*/
2241
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002243_io_TextIOWrapper_tell_impl(textio *self)
2244/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245{
2246 PyObject *res;
2247 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002248 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002249 PyObject *next_input;
2250 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002251 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 PyObject *saved_state = NULL;
2253 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002254 Py_ssize_t dec_buffer_len;
2255 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002257 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258 CHECK_CLOSED(self);
2259
2260 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002261 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262 goto fail;
2263 }
2264 if (!self->telling) {
2265 PyErr_SetString(PyExc_IOError,
2266 "telling position disabled by next() call");
2267 goto fail;
2268 }
2269
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002270 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002272 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273 if (res == NULL)
2274 goto fail;
2275 Py_DECREF(res);
2276
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002277 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002278 if (posobj == NULL)
2279 goto fail;
2280
2281 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002282 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002283 return posobj;
2284 }
2285
2286#if defined(HAVE_LARGEFILE_SUPPORT)
2287 cookie.start_pos = PyLong_AsLongLong(posobj);
2288#else
2289 cookie.start_pos = PyLong_AsLong(posobj);
2290#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002291 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 if (PyErr_Occurred())
2293 goto fail;
2294
2295 /* Skip backward to the snapshot point (see _read_chunk). */
Serhiy Storchakabb72c472015-04-19 20:38:19 +03002296 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002297 goto fail;
2298
2299 assert (PyBytes_Check(next_input));
2300
2301 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2302
2303 /* How many decoded characters have been used up since the snapshot? */
2304 if (self->decoded_chars_used == 0) {
2305 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002306 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002307 }
2308
2309 chars_to_skip = self->decoded_chars_used;
2310
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002311 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2313 _PyIO_str_getstate, NULL);
2314 if (saved_state == NULL)
2315 goto fail;
2316
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002317#define DECODER_GETSTATE() do { \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002318 PyObject *dec_buffer; \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002319 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2320 _PyIO_str_getstate, NULL); \
2321 if (_state == NULL) \
2322 goto fail; \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002323 if (!PyArg_ParseTuple(_state, "Oi", &dec_buffer, &dec_flags)) { \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002324 Py_DECREF(_state); \
2325 goto fail; \
2326 } \
Serhiy Storchaka008d88b2015-05-06 09:53:07 +03002327 if (!PyBytes_Check(dec_buffer)) { \
2328 PyErr_Format(PyExc_TypeError, \
2329 "decoder getstate() should have returned a bytes " \
2330 "object, not '%.200s'", \
2331 Py_TYPE(dec_buffer)->tp_name); \
2332 Py_DECREF(_state); \
2333 goto fail; \
2334 } \
2335 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002336 Py_DECREF(_state); \
2337 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002339#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002340 PyObject *_decoded = _PyObject_CallMethodId( \
2341 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002342 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002343 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002344 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002345 Py_DECREF(_decoded); \
2346 } while (0)
2347
2348 /* Fast search for an acceptable start point, close to our
2349 current pos */
2350 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2351 skip_back = 1;
2352 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2353 input = PyBytes_AS_STRING(next_input);
2354 while (skip_bytes > 0) {
2355 /* Decode up to temptative start point */
2356 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2357 goto fail;
2358 DECODER_DECODE(input, skip_bytes, chars_decoded);
2359 if (chars_decoded <= chars_to_skip) {
2360 DECODER_GETSTATE();
2361 if (dec_buffer_len == 0) {
2362 /* Before pos and no bytes buffered in decoder => OK */
2363 cookie.dec_flags = dec_flags;
2364 chars_to_skip -= chars_decoded;
2365 break;
2366 }
2367 /* Skip back by buffered amount and reset heuristic */
2368 skip_bytes -= dec_buffer_len;
2369 skip_back = 1;
2370 }
2371 else {
2372 /* We're too far ahead, skip back a bit */
2373 skip_bytes -= skip_back;
2374 skip_back *= 2;
2375 }
2376 }
2377 if (skip_bytes <= 0) {
2378 skip_bytes = 0;
2379 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2380 goto fail;
2381 }
2382
2383 /* Note our initial start point. */
2384 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002385 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002386 if (chars_to_skip == 0)
2387 goto finally;
2388
2389 /* We should be close to the desired position. Now feed the decoder one
2390 * byte at a time until we reach the `chars_to_skip` target.
2391 * As we go, note the nearest "safe start point" before the current
2392 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393 * can safely start from there and advance to this location).
2394 */
2395 chars_decoded = 0;
2396 input = PyBytes_AS_STRING(next_input);
2397 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002398 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002400 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002402 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002403 /* We got n chars for 1 byte */
2404 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002405 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002406 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407
2408 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2409 /* Decoder buffer is empty, so this is a safe start point. */
2410 cookie.start_pos += cookie.bytes_to_feed;
2411 chars_to_skip -= chars_decoded;
2412 cookie.dec_flags = dec_flags;
2413 cookie.bytes_to_feed = 0;
2414 chars_decoded = 0;
2415 }
2416 if (chars_decoded >= chars_to_skip)
2417 break;
2418 input++;
2419 }
2420 if (input == input_end) {
2421 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002422 PyObject *decoded = _PyObject_CallMethodId(
2423 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002424 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002426 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002427 Py_DECREF(decoded);
2428 cookie.need_eof = 1;
2429
2430 if (chars_decoded < chars_to_skip) {
2431 PyErr_SetString(PyExc_IOError,
2432 "can't reconstruct logical file position");
2433 goto fail;
2434 }
2435 }
2436
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002437finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002438 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002439 Py_DECREF(saved_state);
2440 if (res == NULL)
2441 return NULL;
2442 Py_DECREF(res);
2443
2444 /* The returned cookie corresponds to the last safe start point. */
2445 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002446 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002447
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002448fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449 if (saved_state) {
2450 PyObject *type, *value, *traceback;
2451 PyErr_Fetch(&type, &value, &traceback);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002452 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002453 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002455 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456 }
2457 return NULL;
2458}
2459
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002460/*[clinic input]
2461_io.TextIOWrapper.truncate
2462 pos: object = None
2463 /
2464[clinic start generated code]*/
2465
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002467_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2468/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470 PyObject *res;
2471
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002472 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473
2474 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2475 if (res == NULL)
2476 return NULL;
2477 Py_DECREF(res);
2478
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002479 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480}
2481
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002482static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002483textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002484{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002485 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002486
2487 CHECK_INITIALIZED(self);
2488
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002489 res = PyUnicode_FromString("<_io.TextIOWrapper");
2490 if (res == NULL)
2491 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002492
Martin v. Löwis767046a2011-10-14 15:35:36 +02002493 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002494 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002495 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002496 PyErr_Clear();
2497 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002498 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002499 }
2500 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002501 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002502 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002503 if (s == NULL)
2504 goto error;
2505 PyUnicode_AppendAndDel(&res, s);
2506 if (res == NULL)
2507 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002508 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002509 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002510 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002511 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002512 PyErr_Clear();
2513 else
2514 goto error;
2515 }
2516 else {
2517 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2518 Py_DECREF(modeobj);
2519 if (s == NULL)
2520 goto error;
2521 PyUnicode_AppendAndDel(&res, s);
2522 if (res == NULL)
2523 return NULL;
2524 }
2525 s = PyUnicode_FromFormat("%U encoding=%R>",
2526 res, self->encoding);
2527 Py_DECREF(res);
2528 return s;
2529error:
2530 Py_XDECREF(res);
2531 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002532}
2533
2534
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535/* Inquiries */
2536
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002537/*[clinic input]
2538_io.TextIOWrapper.fileno
2539[clinic start generated code]*/
2540
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002542_io_TextIOWrapper_fileno_impl(textio *self)
2543/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002545 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002546 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547}
2548
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002549/*[clinic input]
2550_io.TextIOWrapper.seekable
2551[clinic start generated code]*/
2552
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002554_io_TextIOWrapper_seekable_impl(textio *self)
2555/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002557 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002558 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559}
2560
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002561/*[clinic input]
2562_io.TextIOWrapper.readable
2563[clinic start generated code]*/
2564
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002565static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002566_io_TextIOWrapper_readable_impl(textio *self)
2567/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002569 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002570 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002571}
2572
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002573/*[clinic input]
2574_io.TextIOWrapper.writable
2575[clinic start generated code]*/
2576
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002577static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002578_io_TextIOWrapper_writable_impl(textio *self)
2579/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002581 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002582 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583}
2584
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002585/*[clinic input]
2586_io.TextIOWrapper.isatty
2587[clinic start generated code]*/
2588
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002590_io_TextIOWrapper_isatty_impl(textio *self)
2591/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002593 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002594 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595}
2596
2597static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002598textiowrapper_getstate(textio *self, PyObject *args)
2599{
2600 PyErr_Format(PyExc_TypeError,
2601 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2602 return NULL;
2603}
2604
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002605/*[clinic input]
2606_io.TextIOWrapper.flush
2607[clinic start generated code]*/
2608
Antoine Pitrou243757e2010-11-05 21:15:39 +00002609static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002610_io_TextIOWrapper_flush_impl(textio *self)
2611/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002613 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 CHECK_CLOSED(self);
2615 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002616 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002618 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619}
2620
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002621/*[clinic input]
2622_io.TextIOWrapper.close
2623[clinic start generated code]*/
2624
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625static PyObject *
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002626_io_TextIOWrapper_close_impl(textio *self)
2627/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002628{
2629 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002630 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002631 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632
Antoine Pitrou6be88762010-05-03 16:48:20 +00002633 res = textiowrapper_closed_get(self, NULL);
2634 if (res == NULL)
2635 return NULL;
2636 r = PyObject_IsTrue(res);
2637 Py_DECREF(res);
2638 if (r < 0)
2639 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002640
Antoine Pitrou6be88762010-05-03 16:48:20 +00002641 if (r > 0) {
2642 Py_RETURN_NONE; /* stream already closed */
2643 }
2644 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002645 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002646 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002647 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002648 if (res)
2649 Py_DECREF(res);
2650 else
2651 PyErr_Clear();
2652 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002653 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002654 if (res == NULL)
2655 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002656 else
2657 Py_DECREF(res);
2658
Benjamin Peterson68623612012-12-20 11:53:11 -06002659 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2660 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002661 _PyErr_ChainExceptions(exc, val, tb);
2662 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002663 }
2664 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002665 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002666}
2667
2668static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002669textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002670{
2671 PyObject *line;
2672
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002673 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674
2675 self->telling = 0;
2676 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2677 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002678 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679 }
2680 else {
2681 line = PyObject_CallMethodObjArgs((PyObject *)self,
2682 _PyIO_str_readline, NULL);
2683 if (line && !PyUnicode_Check(line)) {
2684 PyErr_Format(PyExc_IOError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03002685 "readline() should have returned a str object, "
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 "not '%.200s'", Py_TYPE(line)->tp_name);
2687 Py_DECREF(line);
2688 return NULL;
2689 }
2690 }
2691
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002692 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 return NULL;
2694
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002695 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 /* Reached EOF or would have blocked */
2697 Py_DECREF(line);
2698 Py_CLEAR(self->snapshot);
2699 self->telling = self->seekable;
2700 return NULL;
2701 }
2702
2703 return line;
2704}
2705
2706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002707textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002708{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002709 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002710 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002711}
2712
2713static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002714textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002715{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002716 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002717 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2718}
2719
2720static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722{
2723 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002724 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725 if (self->decoder == NULL)
2726 Py_RETURN_NONE;
2727 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2728 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002729 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2730 PyErr_Clear();
2731 Py_RETURN_NONE;
2732 }
2733 else {
2734 return NULL;
2735 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 }
2737 return res;
2738}
2739
2740static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002741textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002742{
2743 CHECK_INITIALIZED(self);
2744 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2745}
2746
2747static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002748textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002750 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002751 return PyLong_FromSsize_t(self->chunk_size);
2752}
2753
2754static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002755textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756{
2757 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002758 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002759 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760 if (n == -1 && PyErr_Occurred())
2761 return -1;
2762 if (n <= 0) {
2763 PyErr_SetString(PyExc_ValueError,
2764 "a strictly positive integer is required");
2765 return -1;
2766 }
2767 self->chunk_size = n;
2768 return 0;
2769}
2770
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002771#include "clinic/textio.c.h"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002772
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002773static PyMethodDef incrementalnewlinedecoder_methods[] = {
2774 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
2775 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
2776 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
2777 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
2778 {NULL}
2779};
2780
2781static PyGetSetDef incrementalnewlinedecoder_getset[] = {
2782 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
2783 {NULL}
2784};
2785
2786PyTypeObject PyIncrementalNewlineDecoder_Type = {
2787 PyVarObject_HEAD_INIT(NULL, 0)
2788 "_io.IncrementalNewlineDecoder", /*tp_name*/
2789 sizeof(nldecoder_object), /*tp_basicsize*/
2790 0, /*tp_itemsize*/
2791 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
2792 0, /*tp_print*/
2793 0, /*tp_getattr*/
2794 0, /*tp_setattr*/
2795 0, /*tp_compare */
2796 0, /*tp_repr*/
2797 0, /*tp_as_number*/
2798 0, /*tp_as_sequence*/
2799 0, /*tp_as_mapping*/
2800 0, /*tp_hash */
2801 0, /*tp_call*/
2802 0, /*tp_str*/
2803 0, /*tp_getattro*/
2804 0, /*tp_setattro*/
2805 0, /*tp_as_buffer*/
2806 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
2807 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
2808 0, /* tp_traverse */
2809 0, /* tp_clear */
2810 0, /* tp_richcompare */
2811 0, /*tp_weaklistoffset*/
2812 0, /* tp_iter */
2813 0, /* tp_iternext */
2814 incrementalnewlinedecoder_methods, /* tp_methods */
2815 0, /* tp_members */
2816 incrementalnewlinedecoder_getset, /* tp_getset */
2817 0, /* tp_base */
2818 0, /* tp_dict */
2819 0, /* tp_descr_get */
2820 0, /* tp_descr_set */
2821 0, /* tp_dictoffset */
2822 _io_IncrementalNewlineDecoder___init__, /* tp_init */
2823 0, /* tp_alloc */
2824 PyType_GenericNew, /* tp_new */
2825};
2826
2827
2828static PyMethodDef textiowrapper_methods[] = {
2829 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
2830 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
2831 _IO_TEXTIOWRAPPER_READ_METHODDEF
2832 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
2833 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
2834 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
2835
2836 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
2837 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
2838 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
2839 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
2840 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
Antoine Pitrou243757e2010-11-05 21:15:39 +00002841 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002842
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002843 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
2844 _IO_TEXTIOWRAPPER_TELL_METHODDEF
2845 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002846 {NULL, NULL}
2847};
2848
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002849static PyMemberDef textiowrapper_members[] = {
2850 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2851 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2852 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002853 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002854 {NULL}
2855};
2856
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002857static PyGetSetDef textiowrapper_getset[] = {
2858 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2859 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002860/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2861*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002862 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2863 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2864 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2865 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002866 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002867};
2868
2869PyTypeObject PyTextIOWrapper_Type = {
2870 PyVarObject_HEAD_INIT(NULL, 0)
2871 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002872 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002873 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002874 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002875 0, /*tp_print*/
2876 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002877 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002878 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002879 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002880 0, /*tp_as_number*/
2881 0, /*tp_as_sequence*/
2882 0, /*tp_as_mapping*/
2883 0, /*tp_hash */
2884 0, /*tp_call*/
2885 0, /*tp_str*/
2886 0, /*tp_getattro*/
2887 0, /*tp_setattro*/
2888 0, /*tp_as_buffer*/
2889 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002890 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002891 _io_TextIOWrapper___init____doc__, /* tp_doc */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002892 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2893 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002894 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002895 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002896 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002897 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2898 textiowrapper_methods, /* tp_methods */
2899 textiowrapper_members, /* tp_members */
2900 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002901 0, /* tp_base */
2902 0, /* tp_dict */
2903 0, /* tp_descr_get */
2904 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002905 offsetof(textio, dict), /*tp_dictoffset*/
Serhiy Storchakaf24131f2015-04-16 11:19:43 +03002906 _io_TextIOWrapper___init__, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002907 0, /* tp_alloc */
2908 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002909 0, /* tp_free */
2910 0, /* tp_is_gc */
2911 0, /* tp_bases */
2912 0, /* tp_mro */
2913 0, /* tp_cache */
2914 0, /* tp_subclasses */
2915 0, /* tp_weaklist */
2916 0, /* tp_del */
2917 0, /* tp_version_tag */
2918 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002919};