blob: 0c1b13ec2986fc5974242ca5a61269731eeb796b [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
Antoine Pitrou712cb732013-12-21 15:51:54 +010048 _PyIO_State *state = IO_STATE();
49 if (state != NULL)
50 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000051 return NULL;
52}
53
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000055 "Separate the underlying buffer from the TextIOBase and return it.\n"
56 "\n"
57 "After the underlying buffer has been detached, the TextIO is in an\n"
58 "unusable state.\n"
59 );
60
61static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063{
64 return _unsupported("detach");
65}
66
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000067PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 "Read at most n characters from stream.\n"
69 "\n"
70 "Read from underlying buffer until we have n characters or we hit EOF.\n"
71 "If n is negative or omitted, read until EOF.\n"
72 );
73
74static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076{
77 return _unsupported("read");
78}
79
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000080PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081 "Read until newline or EOF.\n"
82 "\n"
83 "Returns an empty string if EOF is hit immediately.\n"
84 );
85
86static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088{
89 return _unsupported("readline");
90}
91
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000092PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 "Write string to stream.\n"
94 "Returns the number of characters written (which is always equal to\n"
95 "the length of the string).\n"
96 );
97
98static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100{
101 return _unsupported("write");
102}
103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000104PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 "Encoding of the text stream.\n"
106 "\n"
107 "Subclasses should override.\n"
108 );
109
110static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112{
113 Py_RETURN_NONE;
114}
115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000116PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117 "Line endings translated so far.\n"
118 "\n"
119 "Only line endings translated during reading are considered.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000131 "The error setting of the decoder or encoder.\n"
132 "\n"
133 "Subclasses should override.\n"
134 );
135
136static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138{
139 Py_RETURN_NONE;
140}
141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000143static PyMethodDef textiobase_methods[] = {
144 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
145 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
146 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
147 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 {NULL, NULL}
149};
150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyGetSetDef textiobase_getset[] = {
152 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
153 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
154 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000155 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156};
157
158PyTypeObject PyTextIOBase_Type = {
159 PyVarObject_HEAD_INIT(NULL, 0)
160 "_io._TextIOBase", /*tp_name*/
161 0, /*tp_basicsize*/
162 0, /*tp_itemsize*/
163 0, /*tp_dealloc*/
164 0, /*tp_print*/
165 0, /*tp_getattr*/
166 0, /*tp_setattr*/
167 0, /*tp_compare */
168 0, /*tp_repr*/
169 0, /*tp_as_number*/
170 0, /*tp_as_sequence*/
171 0, /*tp_as_mapping*/
172 0, /*tp_hash */
173 0, /*tp_call*/
174 0, /*tp_str*/
175 0, /*tp_getattro*/
176 0, /*tp_setattro*/
177 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
179 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000180 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 0, /* tp_traverse */
182 0, /* tp_clear */
183 0, /* tp_richcompare */
184 0, /* tp_weaklistoffset */
185 0, /* tp_iter */
186 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 &PyIOBase_Type, /* tp_base */
191 0, /* tp_dict */
192 0, /* tp_descr_get */
193 0, /* tp_descr_set */
194 0, /* tp_dictoffset */
195 0, /* tp_init */
196 0, /* tp_alloc */
197 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200198 0, /* tp_free */
199 0, /* tp_is_gc */
200 0, /* tp_bases */
201 0, /* tp_mro */
202 0, /* tp_cache */
203 0, /* tp_subclasses */
204 0, /* tp_weaklist */
205 0, /* tp_del */
206 0, /* tp_version_tag */
207 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208};
209
210
211/* IncrementalNewlineDecoder */
212
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000213PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 "Codec used when reading a file in universal newlines mode. It wraps\n"
215 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
216 "records the types of newlines encountered. When used with\n"
217 "translate=False, it ensures that the newline sequence is returned in\n"
218 "one piece. When used with decoder=None, it expects unicode strings as\n"
219 "decode input and translates newlines without first invoking an external\n"
220 "decoder.\n"
221 );
222
223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 signed int pendingcr: 1;
228 signed int translate: 1;
229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000233incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 PyObject *args, PyObject *kwds)
235{
236 PyObject *decoder;
237 int translate;
238 PyObject *errors = NULL;
239 char *kwlist[] = {"decoder", "translate", "errors", NULL};
240
241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
242 kwlist, &decoder, &translate, &errors))
243 return -1;
244
245 self->decoder = decoder;
246 Py_INCREF(decoder);
247
248 if (errors == NULL) {
249 self->errors = PyUnicode_FromString("strict");
250 if (self->errors == NULL)
251 return -1;
252 }
253 else {
254 Py_INCREF(errors);
255 self->errors = errors;
256 }
257
258 self->translate = translate;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263}
264
265static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000266incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267{
268 Py_CLEAR(self->decoder);
269 Py_CLEAR(self->errors);
270 Py_TYPE(self)->tp_free((PyObject *)self);
271}
272
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200273static int
274check_decoded(PyObject *decoded)
275{
276 if (decoded == NULL)
277 return -1;
278 if (!PyUnicode_Check(decoded)) {
279 PyErr_Format(PyExc_TypeError,
280 "decoder should return a string result, not '%.200s'",
281 Py_TYPE(decoded)->tp_name);
282 Py_DECREF(decoded);
283 return -1;
284 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200285 if (PyUnicode_READY(decoded) < 0) {
286 Py_DECREF(decoded);
287 return -1;
288 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200289 return 0;
290}
291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292#define SEEN_CR 1
293#define SEEN_LF 2
294#define SEEN_CRLF 4
295#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
296
297PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200298_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 PyObject *input, int final)
300{
301 PyObject *output;
302 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200303 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 if (self->decoder == NULL) {
306 PyErr_SetString(PyExc_ValueError,
307 "IncrementalNewlineDecoder.__init__ not called");
308 return NULL;
309 }
310
311 /* decode input (with the eventual \r from a previous pass) */
312 if (self->decoder != Py_None) {
313 output = PyObject_CallMethodObjArgs(self->decoder,
314 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
315 }
316 else {
317 output = input;
318 Py_INCREF(output);
319 }
320
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200321 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return NULL;
323
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Prefix output with CR */
327 int kind;
328 PyObject *modified;
329 char *out;
330
331 modified = PyUnicode_New(output_len + 1,
332 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (modified == NULL)
334 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 kind = PyUnicode_KIND(modified);
336 out = PyUnicode_DATA(modified);
337 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200338 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200340 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self->pendingcr = 0;
342 output_len++;
343 }
344
345 /* retain last \r even when not translating data:
346 * then readline() is sure to get \r\n in one pass
347 */
348 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000349 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
351 {
352 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
353 if (modified == NULL)
354 goto error;
355 Py_DECREF(output);
356 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 self->pendingcr = 1;
358 }
359 }
360
361 /* Record which newlines are read and do newline translation if desired,
362 all in one pass. */
363 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_ssize_t len;
366 int seennl = self->seennl;
367 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 in_str = PyUnicode_DATA(output);
371 len = PyUnicode_GET_LENGTH(output);
372 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 if (len == 0)
375 return output;
376
377 /* If, up to now, newlines are consistently \n, do a quick check
378 for the \r *byte* with the libc's optimized memchr.
379 */
380 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200381 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 }
383
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 if (only_lf) {
385 /* If not already seen, quick scan for a possible "\n" character.
386 (there's nothing else to be done, even when in translation mode)
387 */
388 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100390 if (kind == PyUnicode_1BYTE_KIND)
391 seennl |= SEEN_LF;
392 else {
393 Py_ssize_t i = 0;
394 for (;;) {
395 Py_UCS4 c;
396 /* Fast loop for non-control characters */
397 while (PyUnicode_READ(kind, in_str, i) > '\n')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
400 if (c == '\n') {
401 seennl |= SEEN_LF;
402 break;
403 }
404 if (i >= len)
405 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000406 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000407 }
408 }
409 /* Finished: we have scanned for newlines, and none of them
410 need translating */
411 }
412 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (seennl == SEEN_ALL)
416 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 while (PyUnicode_READ(kind, in_str, i) > '\r')
421 i++;
422 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (c == '\n')
424 seennl |= SEEN_LF;
425 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 }
430 else
431 seennl |= SEEN_CR;
432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 break;
435 if (seennl == SEEN_ALL)
436 break;
437 }
438 endscan:
439 ;
440 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000441 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 void *translated;
443 int kind = PyUnicode_KIND(output);
444 void *in_str = PyUnicode_DATA(output);
445 Py_ssize_t in, out;
446 /* XXX: Previous in-place translation here is disabled as
447 resizing is not possible anymore */
448 /* We could try to optimize this so that we only do a copy
449 when there is something to translate. On the other hand,
450 we already know there is a \r byte, so chances are high
451 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (translated == NULL) {
454 PyErr_NoMemory();
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 seennl |= SEEN_LF;
466 continue;
467 }
468 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 in++;
471 seennl |= SEEN_CRLF;
472 }
473 else
474 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 continue;
477 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_DECREF(output);
483 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100484 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200486 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
488 self->seennl |= seennl;
489 }
490
491 return output;
492
493 error:
494 Py_DECREF(output);
495 return NULL;
496}
497
498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000499incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 PyObject *args, PyObject *kwds)
501{
502 char *kwlist[] = {"input", "final", NULL};
503 PyObject *input;
504 int final = 0;
505
506 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
507 kwlist, &input, &final))
508 return NULL;
509 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 PyObject *buffer;
516 unsigned PY_LONG_LONG flag;
517
518 if (self->decoder != Py_None) {
519 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
520 _PyIO_str_getstate, NULL);
521 if (state == NULL)
522 return NULL;
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
524 Py_DECREF(state);
525 return NULL;
526 }
527 Py_INCREF(buffer);
528 Py_DECREF(state);
529 }
530 else {
531 buffer = PyBytes_FromString("");
532 flag = 0;
533 }
534 flag <<= 1;
535 if (self->pendingcr)
536 flag |= 1;
537 return Py_BuildValue("NK", buffer, flag);
538}
539
540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000541incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542{
543 PyObject *buffer;
544 unsigned PY_LONG_LONG flag;
545
546 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
547 return NULL;
548
549 self->pendingcr = (int) flag & 1;
550 flag >>= 1;
551
552 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200553 return _PyObject_CallMethodId(self->decoder,
554 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555 else
556 Py_RETURN_NONE;
557}
558
559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000560incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561{
562 self->seennl = 0;
563 self->pendingcr = 0;
564 if (self->decoder != Py_None)
565 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
566 else
567 Py_RETURN_NONE;
568}
569
570static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000571incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 switch (self->seennl) {
574 case SEEN_CR:
575 return PyUnicode_FromString("\r");
576 case SEEN_LF:
577 return PyUnicode_FromString("\n");
578 case SEEN_CRLF:
579 return PyUnicode_FromString("\r\n");
580 case SEEN_CR | SEEN_LF:
581 return Py_BuildValue("ss", "\r", "\n");
582 case SEEN_CR | SEEN_CRLF:
583 return Py_BuildValue("ss", "\r", "\r\n");
584 case SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("ss", "\n", "\r\n");
586 case SEEN_CR | SEEN_LF | SEEN_CRLF:
587 return Py_BuildValue("sss", "\r", "\n", "\r\n");
588 default:
589 Py_RETURN_NONE;
590 }
591
592}
593
594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595static PyMethodDef incrementalnewlinedecoder_methods[] = {
596 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
597 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
598 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
599 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000600 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601};
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603static PyGetSetDef incrementalnewlinedecoder_getset[] = {
604 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000605 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606};
607
608PyTypeObject PyIncrementalNewlineDecoder_Type = {
609 PyVarObject_HEAD_INIT(NULL, 0)
610 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /*tp_print*/
615 0, /*tp_getattr*/
616 0, /*tp_setattr*/
617 0, /*tp_compare */
618 0, /*tp_repr*/
619 0, /*tp_as_number*/
620 0, /*tp_as_sequence*/
621 0, /*tp_as_mapping*/
622 0, /*tp_hash */
623 0, /*tp_call*/
624 0, /*tp_str*/
625 0, /*tp_getattro*/
626 0, /*tp_setattro*/
627 0, /*tp_as_buffer*/
628 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 0, /* tp_traverse */
631 0, /* tp_clear */
632 0, /* tp_richcompare */
633 0, /*tp_weaklistoffset*/
634 0, /* tp_iter */
635 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000638 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 0, /* tp_base */
640 0, /* tp_dict */
641 0, /* tp_descr_get */
642 0, /* tp_descr_set */
643 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000644 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 0, /* tp_alloc */
646 PyType_GenericNew, /* tp_new */
647};
648
649
650/* TextIOWrapper */
651
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000652PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 "Character and line based layer over a BufferedIOBase object, buffer.\n"
654 "\n"
655 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200656 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400658 "errors determines the strictness of encoding and decoding (see\n"
659 "help(codecs.Codec) or the documentation for codecs.register) and\n"
660 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200662 "newline controls how line endings are handled. It can be None, '',\n"
663 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
664 "\n"
665 "* On input, if newline is None, universal newlines mode is\n"
666 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
667 " these are translated into '\\n' before being returned to the\n"
668 " caller. If it is '', universal newline mode is enabled, but line\n"
669 " endings are returned to the caller untranslated. If it has any of\n"
670 " the other legal values, input lines are only terminated by the given\n"
671 " string, and the line ending is returned to the caller untranslated.\n"
672 "\n"
673 "* On output, if newline is None, any '\\n' characters written are\n"
674 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300675 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200676 " of the other legal values, any '\\n' characters written are translated\n"
677 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 "\n"
679 "If line_buffering is True, a call to flush is implied when a call to\n"
680 "write contains a newline character."
681 );
682
683typedef PyObject *
684 (*encodefunc_t)(PyObject *, PyObject *);
685
686typedef struct
687{
688 PyObject_HEAD
689 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000690 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691 Py_ssize_t chunk_size;
692 PyObject *buffer;
693 PyObject *encoding;
694 PyObject *encoder;
695 PyObject *decoder;
696 PyObject *readnl;
697 PyObject *errors;
698 const char *writenl; /* utf-8 encoded, NULL stands for \n */
699 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200700 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 char readuniversal;
702 char readtranslate;
703 char writetranslate;
704 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200705 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200707 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 /* Specialized encoding func (see below) */
709 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000710 /* Whether or not it's the start of the stream */
711 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 /* Reads and writes are internally buffered in order to speed things up.
714 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000715
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716 Please also note that text to be written is first encoded before being
717 buffered. This is necessary so that encoding errors are immediately
718 reported to the caller, but it unfortunately means that the
719 IncrementalEncoder (whose encode() method is always written in Python)
720 becomes a bottleneck for small writes.
721 */
722 PyObject *decoded_chars; /* buffer for text returned from decoder */
723 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
724 PyObject *pending_bytes; /* list of bytes objects waiting to be
725 written, or NULL */
726 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 /* snapshot is either None, or a tuple (dec_flags, next_input) where
729 * dec_flags is the second (integer) item of the decoder state and
730 * next_input is the chunk of input bytes that comes next after the
731 * snapshot point. We use this to reconstruct decoder states in tell().
732 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000733 PyObject *snapshot;
734 /* Bytes-to-characters ratio for the current chunk. Serves as input for
735 the heuristic in tell(). */
736 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 /* Cache raw object if it's a FileIO object */
739 PyObject *raw;
740
741 PyObject *weakreflist;
742 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745
746/* A couple of specialized cases in order to bypass the slow incremental
747 encoding methods for the most popular encodings. */
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100758 return _PyUnicode_EncodeUTF16(text,
759 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF16(text,
766 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
Antoine Pitroue4501852009-05-14 18:55:55 +0000772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200774#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000779 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF16(text,
781 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782}
783
Antoine Pitroue4501852009-05-14 18:55:55 +0000784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100787 return _PyUnicode_EncodeUTF32(text,
788 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000793{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100794 return _PyUnicode_EncodeUTF32(text,
795 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000796}
797
798static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000799utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000800{
801 if (!self->encoding_start_of_stream) {
802 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200803#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 return utf32be_encode(self, text);
805#else
806 return utf32le_encode(self, text);
807#endif
808 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100809 return _PyUnicode_EncodeUTF32(text,
810 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000811}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812
813static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817}
818
819static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000820latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823}
824
825/* Map normalized encoding names onto the specialized encoding funcs */
826
827typedef struct {
828 const char *name;
829 encodefunc_t encodefunc;
830} encodefuncentry;
831
Antoine Pitrou24f36292009-03-28 22:16:42 +0000832static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 {"ascii", (encodefunc_t) ascii_encode},
834 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000835 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 {"utf-16-be", (encodefunc_t) utf16be_encode},
837 {"utf-16-le", (encodefunc_t) utf16le_encode},
838 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000839 {"utf-32-be", (encodefunc_t) utf32be_encode},
840 {"utf-32-le", (encodefunc_t) utf32le_encode},
841 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 {NULL, NULL}
843};
844
845
846static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000847textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848{
849 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 NULL};
Nick Coghlana9b15242014-02-04 22:11:18 +1000852 PyObject *buffer, *raw, *codec_info = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 char *encoding = NULL;
854 char *errors = NULL;
855 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200856 int line_buffering = 0, write_through = 0;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100857 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
859 PyObject *res;
860 int r;
861
862 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000863 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200866 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 return -1;
868
869 if (newline && newline[0] != '\0'
870 && !(newline[0] == '\n' && newline[1] == '\0')
871 && !(newline[0] == '\r' && newline[1] == '\0')
872 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
873 PyErr_Format(PyExc_ValueError,
874 "illegal newline value: %s", newline);
875 return -1;
876 }
877
878 Py_CLEAR(self->buffer);
879 Py_CLEAR(self->encoding);
880 Py_CLEAR(self->encoder);
881 Py_CLEAR(self->decoder);
882 Py_CLEAR(self->readnl);
883 Py_CLEAR(self->decoded_chars);
884 Py_CLEAR(self->pending_bytes);
885 Py_CLEAR(self->snapshot);
886 Py_CLEAR(self->errors);
887 Py_CLEAR(self->raw);
888 self->decoded_chars_used = 0;
889 self->pending_bytes_count = 0;
890 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000891 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 if (encoding == NULL) {
894 /* Try os.device_encoding(fileno) */
895 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100896 state = IO_STATE();
897 if (state == NULL)
898 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 /* Ignore only AttributeError and UnsupportedOperation */
901 if (fileno == NULL) {
902 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
903 PyErr_ExceptionMatches(state->unsupported_operation)) {
904 PyErr_Clear();
905 }
906 else {
907 goto error;
908 }
909 }
910 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200911 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500912 Py_DECREF(fileno);
913 if (fd == -1 && PyErr_Occurred()) {
914 goto error;
915 }
916
917 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 if (self->encoding == NULL)
919 goto error;
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
924 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200925 PyObject *locale_module = _PyIO_get_locale_module(state);
926 if (locale_module == NULL)
927 goto catch_ImportError;
928 self->encoding = _PyObject_CallMethodId(
929 locale_module, &PyId_getpreferredencoding, "O", Py_False);
930 Py_DECREF(locale_module);
931 if (self->encoding == NULL) {
932 catch_ImportError:
933 /*
Martin Panter7462b6492015-11-02 03:37:02 +0000934 Importing locale can raise an ImportError because of
935 _functools, and locale.getpreferredencoding can raise an
Antoine Pitrou932ff832013-08-01 21:04:50 +0200936 ImportError if _locale is not available. These will happen
937 during module building.
938 */
939 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
940 PyErr_Clear();
941 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200943 else
944 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200946 else if (!PyUnicode_Check(self->encoding))
947 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000951 if (encoding == NULL)
952 goto error;
953 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 else if (encoding != NULL) {
955 self->encoding = PyUnicode_FromString(encoding);
956 if (self->encoding == NULL)
957 goto error;
958 }
959 else {
960 PyErr_SetString(PyExc_IOError,
961 "could not determine default encoding");
962 }
963
Nick Coghlana9b15242014-02-04 22:11:18 +1000964 /* Check we have been asked for a real text encoding */
965 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
966 if (codec_info == NULL) {
967 Py_CLEAR(self->encoding);
968 goto error;
969 }
970
971 /* XXX: Failures beyond this point have the potential to leak elements
972 * of the partially constructed object (like self->encoding)
973 */
974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (errors == NULL)
976 errors = "strict";
977 self->errors = PyBytes_FromString(errors);
978 if (self->errors == NULL)
979 goto error;
980
981 self->chunk_size = 8192;
982 self->readuniversal = (newline == NULL || newline[0] == '\0');
983 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200984 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 self->readtranslate = (newline == NULL);
986 if (newline) {
987 self->readnl = PyUnicode_FromString(newline);
988 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000989 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 }
991 self->writetranslate = (newline == NULL || newline[0] != '\0');
992 if (!self->readuniversal && self->readnl) {
993 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000994 if (self->writenl == NULL)
995 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (!strcmp(self->writenl, "\n"))
997 self->writenl = NULL;
998 }
999#ifdef MS_WINDOWS
1000 else
1001 self->writenl = "\r\n";
1002#endif
1003
1004 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001005 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 if (res == NULL)
1007 goto error;
1008 r = PyObject_IsTrue(res);
1009 Py_DECREF(res);
1010 if (r == -1)
1011 goto error;
1012 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001013 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
1014 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 if (self->decoder == NULL)
1016 goto error;
1017
1018 if (self->readuniversal) {
1019 PyObject *incrementalDecoder = PyObject_CallFunction(
1020 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1021 "Oi", self->decoder, (int)self->readtranslate);
1022 if (incrementalDecoder == NULL)
1023 goto error;
1024 Py_CLEAR(self->decoder);
1025 self->decoder = incrementalDecoder;
1026 }
1027 }
1028
1029 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 if (res == NULL)
1032 goto error;
1033 r = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035 if (r == -1)
1036 goto error;
1037 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001038 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1039 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if (self->encoder == NULL)
1041 goto error;
1042 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001043 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (res == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (PyUnicode_Check(res)) {
1051 encodefuncentry *e = encodefuncs;
1052 while (e->name != NULL) {
1053 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1054 self->encodefunc = e->encodefunc;
1055 break;
1056 }
1057 e++;
1058 }
1059 }
1060 Py_XDECREF(res);
1061 }
1062
Nick Coghlana9b15242014-02-04 22:11:18 +10001063 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001064 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001065
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self->buffer = buffer;
1067 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1070 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1071 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001072 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001074 if (raw == NULL) {
1075 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1076 PyErr_Clear();
1077 else
1078 goto error;
1079 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 else if (Py_TYPE(raw) == &PyFileIO_Type)
1081 self->raw = raw;
1082 else
1083 Py_DECREF(raw);
1084 }
1085
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 if (res == NULL)
1088 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001089 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001091 if (r < 0)
1092 goto error;
1093 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094
Martin v. Löwis767046a2011-10-14 15:35:36 +02001095 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001096
Antoine Pitroue4501852009-05-14 18:55:55 +00001097 self->encoding_start_of_stream = 0;
1098 if (self->seekable && self->encoder) {
1099 PyObject *cookieObj;
1100 int cmp;
1101
1102 self->encoding_start_of_stream = 1;
1103
1104 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1105 if (cookieObj == NULL)
1106 goto error;
1107
1108 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1109 Py_DECREF(cookieObj);
1110 if (cmp < 0) {
1111 goto error;
1112 }
1113
1114 if (cmp == 0) {
1115 self->encoding_start_of_stream = 0;
1116 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1117 _PyIO_zero, NULL);
1118 if (res == NULL)
1119 goto error;
1120 Py_DECREF(res);
1121 }
1122 }
1123
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 self->ok = 1;
1125 return 0;
1126
1127 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001128 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 return -1;
1130}
1131
1132static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001133_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 self->ok = 0;
1136 Py_CLEAR(self->buffer);
1137 Py_CLEAR(self->encoding);
1138 Py_CLEAR(self->encoder);
1139 Py_CLEAR(self->decoder);
1140 Py_CLEAR(self->readnl);
1141 Py_CLEAR(self->decoded_chars);
1142 Py_CLEAR(self->pending_bytes);
1143 Py_CLEAR(self->snapshot);
1144 Py_CLEAR(self->errors);
1145 Py_CLEAR(self->raw);
1146 return 0;
1147}
1148
1149static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001150textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001152 self->finalizing = 1;
1153 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001155 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 _PyObject_GC_UNTRACK(self);
1157 if (self->weakreflist != NULL)
1158 PyObject_ClearWeakRefs((PyObject *)self);
1159 Py_CLEAR(self->dict);
1160 Py_TYPE(self)->tp_free((PyObject *)self);
1161}
1162
1163static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165{
1166 Py_VISIT(self->buffer);
1167 Py_VISIT(self->encoding);
1168 Py_VISIT(self->encoder);
1169 Py_VISIT(self->decoder);
1170 Py_VISIT(self->readnl);
1171 Py_VISIT(self->decoded_chars);
1172 Py_VISIT(self->pending_bytes);
1173 Py_VISIT(self->snapshot);
1174 Py_VISIT(self->errors);
1175 Py_VISIT(self->raw);
1176
1177 Py_VISIT(self->dict);
1178 return 0;
1179}
1180
1181static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 return -1;
1186 Py_CLEAR(self->dict);
1187 return 0;
1188}
1189
1190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001191textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192
1193/* This macro takes some shortcuts to make the common case faster. */
1194#define CHECK_CLOSED(self) \
1195 do { \
1196 int r; \
1197 PyObject *_res; \
1198 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1199 if (self->raw != NULL) \
1200 r = _PyFileIO_closed(self->raw); \
1201 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001202 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203 if (_res == NULL) \
1204 return NULL; \
1205 r = PyObject_IsTrue(_res); \
1206 Py_DECREF(_res); \
1207 if (r < 0) \
1208 return NULL; \
1209 } \
1210 if (r > 0) { \
1211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on closed file."); \
1213 return NULL; \
1214 } \
1215 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 return NULL; \
1218 } while (0)
1219
1220#define CHECK_INITIALIZED(self) \
1221 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001222 PyErr_SetString(PyExc_ValueError, \
1223 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224 return NULL; \
1225 }
1226
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001227#define CHECK_ATTACHED(self) \
1228 CHECK_INITIALIZED(self); \
1229 if (self->detached) { \
1230 PyErr_SetString(PyExc_ValueError, \
1231 "underlying buffer has been detached"); \
1232 return NULL; \
1233 }
1234
1235#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001237 PyErr_SetString(PyExc_ValueError, \
1238 "I/O operation on uninitialized object"); \
1239 return -1; \
1240 } else if (self->detached) { \
1241 PyErr_SetString(PyExc_ValueError, \
1242 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return -1; \
1244 }
1245
1246
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001247static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001248textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001249{
1250 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001251 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001252 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1253 if (res == NULL)
1254 return NULL;
1255 Py_DECREF(res);
1256 buffer = self->buffer;
1257 self->buffer = NULL;
1258 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001259 return buffer;
1260}
1261
Antoine Pitrou24f36292009-03-28 22:16:42 +00001262/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 underlying buffered object, though. */
1264static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001265_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001267 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268
1269 if (self->pending_bytes == NULL)
1270 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001271
1272 pending = self->pending_bytes;
1273 Py_INCREF(pending);
1274 self->pending_bytes_count = 0;
1275 Py_CLEAR(self->pending_bytes);
1276
1277 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1278 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 if (b == NULL)
1280 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001281 ret = NULL;
1282 do {
1283 ret = PyObject_CallMethodObjArgs(self->buffer,
1284 _PyIO_str_write, b, NULL);
1285 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 Py_DECREF(b);
1287 if (ret == NULL)
1288 return -1;
1289 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 return 0;
1291}
1292
1293static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001294textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295{
1296 PyObject *ret;
1297 PyObject *text; /* owned reference */
1298 PyObject *b;
1299 Py_ssize_t textlen;
1300 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001301 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001303 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1306 return NULL;
1307 }
1308
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 if (PyUnicode_READY(text) == -1)
1310 return NULL;
1311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 CHECK_CLOSED(self);
1313
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001314 if (self->encoder == NULL)
1315 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001316
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 Py_INCREF(text);
1318
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320
1321 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 haslf = 1;
1324
1325 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001326 PyObject *newtext = _PyObject_CallMethodId(
1327 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 Py_DECREF(text);
1329 if (newtext == NULL)
1330 return NULL;
1331 text = newtext;
1332 }
1333
Antoine Pitroue96ec682011-07-23 21:46:35 +02001334 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001335 text_needflush = 1;
1336 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001337 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 needflush = 1;
1340
1341 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001342 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001344 self->encoding_start_of_stream = 0;
1345 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 else
1347 b = PyObject_CallMethodObjArgs(self->encoder,
1348 _PyIO_str_encode, text, NULL);
1349 Py_DECREF(text);
1350 if (b == NULL)
1351 return NULL;
1352
1353 if (self->pending_bytes == NULL) {
1354 self->pending_bytes = PyList_New(0);
1355 if (self->pending_bytes == NULL) {
1356 Py_DECREF(b);
1357 return NULL;
1358 }
1359 self->pending_bytes_count = 0;
1360 }
1361 if (PyList_Append(self->pending_bytes, b) < 0) {
1362 Py_DECREF(b);
1363 return NULL;
1364 }
1365 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1366 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001367 if (self->pending_bytes_count > self->chunk_size || needflush ||
1368 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001369 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 return NULL;
1371 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001372
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373 if (needflush) {
1374 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1375 if (ret == NULL)
1376 return NULL;
1377 Py_DECREF(ret);
1378 }
1379
1380 Py_CLEAR(self->snapshot);
1381
1382 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001383 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 if (ret == NULL)
1385 return NULL;
1386 Py_DECREF(ret);
1387 }
1388
1389 return PyLong_FromSsize_t(textlen);
1390}
1391
1392/* Steal a reference to chars and store it in the decoded_char buffer;
1393 */
1394static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001395textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396{
1397 Py_CLEAR(self->decoded_chars);
1398 self->decoded_chars = chars;
1399 self->decoded_chars_used = 0;
1400}
1401
1402static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001403textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404{
1405 PyObject *chars;
1406 Py_ssize_t avail;
1407
1408 if (self->decoded_chars == NULL)
1409 return PyUnicode_FromStringAndSize(NULL, 0);
1410
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001411 /* decoded_chars is guaranteed to be "ready". */
1412 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 - self->decoded_chars_used);
1414
1415 assert(avail >= 0);
1416
1417 if (n < 0 || n > avail)
1418 n = avail;
1419
1420 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001421 chars = PyUnicode_Substring(self->decoded_chars,
1422 self->decoded_chars_used,
1423 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 if (chars == NULL)
1425 return NULL;
1426 }
1427 else {
1428 chars = self->decoded_chars;
1429 Py_INCREF(chars);
1430 }
1431
1432 self->decoded_chars_used += n;
1433 return chars;
1434}
1435
1436/* Read and decode the next chunk of data from the BufferedReader.
1437 */
1438static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001439textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440{
1441 PyObject *dec_buffer = NULL;
1442 PyObject *dec_flags = NULL;
1443 PyObject *input_chunk = NULL;
1444 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001445 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446 int eof;
1447
1448 /* The return value is True unless EOF was reached. The decoded string is
1449 * placed in self._decoded_chars (replacing its previous value). The
1450 * entire input chunk is sent to the decoder, though some of it may remain
1451 * buffered in the decoder, yet to be converted.
1452 */
1453
1454 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001455 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 return -1;
1457 }
1458
1459 if (self->telling) {
1460 /* To prepare for tell(), we need to snapshot a point in the file
1461 * where the decoder's input buffer is empty.
1462 */
1463
1464 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1465 _PyIO_str_getstate, NULL);
1466 if (state == NULL)
1467 return -1;
1468 /* Given this, we know there was a valid snapshot point
1469 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1470 */
1471 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1472 Py_DECREF(state);
1473 return -1;
1474 }
1475 Py_INCREF(dec_buffer);
1476 Py_INCREF(dec_flags);
1477 Py_DECREF(state);
1478 }
1479
1480 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001481 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001482 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001483 }
1484 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001485 if (chunk_size == NULL)
1486 goto fail;
1487 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001488 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1489 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001490 Py_DECREF(chunk_size);
1491 if (input_chunk == NULL)
1492 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001493 if (!PyBytes_Check(input_chunk)) {
1494 PyErr_Format(PyExc_TypeError,
1495 "underlying %s() should have returned a bytes object, "
1496 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1497 Py_TYPE(input_chunk)->tp_name);
1498 goto fail;
1499 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001501 nbytes = PyBytes_Size(input_chunk);
1502 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503
1504 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1505 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1506 self->decoder, input_chunk, eof);
1507 }
1508 else {
1509 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1510 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1511 }
1512
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001513 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001514 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001515 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001516 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001517 if (nchars > 0)
1518 self->b2cratio = (double) nbytes / nchars;
1519 else
1520 self->b2cratio = 0.0;
1521 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001522 eof = 0;
1523
1524 if (self->telling) {
1525 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1526 * next input to be decoded is dec_buffer + input_chunk.
1527 */
1528 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1529 if (next_input == NULL)
1530 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001531 if (!PyBytes_Check(next_input)) {
1532 PyErr_Format(PyExc_TypeError,
1533 "decoder getstate() should have returned a bytes "
1534 "object, not '%.200s'",
1535 Py_TYPE(next_input)->tp_name);
1536 Py_DECREF(next_input);
1537 goto fail;
1538 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539 Py_DECREF(dec_buffer);
1540 Py_CLEAR(self->snapshot);
1541 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1542 }
1543 Py_DECREF(input_chunk);
1544
1545 return (eof == 0);
1546
1547 fail:
1548 Py_XDECREF(dec_buffer);
1549 Py_XDECREF(dec_flags);
1550 Py_XDECREF(input_chunk);
1551 return -1;
1552}
1553
1554static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001555textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001556{
1557 Py_ssize_t n = -1;
1558 PyObject *result = NULL, *chunks = NULL;
1559
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001560 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001562 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001563 return NULL;
1564
1565 CHECK_CLOSED(self);
1566
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001567 if (self->decoder == NULL)
1568 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001569
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001570 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 return NULL;
1572
1573 if (n < 0) {
1574 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001575 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 PyObject *decoded;
1577 if (bytes == NULL)
1578 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001579
1580 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1581 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1582 bytes, 1);
1583 else
1584 decoded = PyObject_CallMethodObjArgs(
1585 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001586 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001587 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001588 goto fail;
1589
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001590 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001591
1592 if (result == NULL) {
1593 Py_DECREF(decoded);
1594 return NULL;
1595 }
1596
1597 PyUnicode_AppendAndDel(&result, decoded);
1598 if (result == NULL)
1599 goto fail;
1600
1601 Py_CLEAR(self->snapshot);
1602 return result;
1603 }
1604 else {
1605 int res = 1;
1606 Py_ssize_t remaining = n;
1607
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001608 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 if (result == NULL)
1610 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001611 if (PyUnicode_READY(result) == -1)
1612 goto fail;
1613 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614
1615 /* Keep reading chunks until we have n characters to return */
1616 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001617 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001618 if (res < 0) {
1619 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1620 when EINTR occurs so we needn't do it ourselves. */
1621 if (_PyIO_trap_eintr()) {
1622 continue;
1623 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001625 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 if (res == 0) /* EOF */
1627 break;
1628 if (chunks == NULL) {
1629 chunks = PyList_New(0);
1630 if (chunks == NULL)
1631 goto fail;
1632 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001633 if (PyUnicode_GET_LENGTH(result) > 0 &&
1634 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001635 goto fail;
1636 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001637 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 if (result == NULL)
1639 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001640 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 }
1642 if (chunks != NULL) {
1643 if (result != NULL && PyList_Append(chunks, result) < 0)
1644 goto fail;
1645 Py_CLEAR(result);
1646 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1647 if (result == NULL)
1648 goto fail;
1649 Py_CLEAR(chunks);
1650 }
1651 return result;
1652 }
1653 fail:
1654 Py_XDECREF(result);
1655 Py_XDECREF(chunks);
1656 return NULL;
1657}
1658
1659
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001660/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 that is to the NUL character. Otherwise the function will produce
1662 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001663static char *
1664find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001666 if (kind == PyUnicode_1BYTE_KIND) {
1667 assert(ch < 256);
1668 return (char *) memchr((void *) s, (char) ch, end - s);
1669 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001672 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 return s;
1675 if (s == end)
1676 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001677 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 }
1679}
1680
1681Py_ssize_t
1682_PyIO_find_line_ending(
1683 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001684 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687
1688 if (translated) {
1689 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 else {
1694 *consumed = len;
1695 return -1;
1696 }
1697 }
1698 else if (universal) {
1699 /* Universal newline search. Find any of \r, \r\n, \n
1700 * The decoder ensures that \r\n are not split in two pieces
1701 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001702 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001704 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001706 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001707 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001708 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 if (s >= end) {
1710 *consumed = len;
1711 return -1;
1712 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001713 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001714 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001716 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001719 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001721 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 }
1723 }
1724 }
1725 else {
1726 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1728 char *nl = PyUnicode_DATA(readnl);
1729 /* Assume that readnl is an ASCII character. */
1730 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001732 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001733 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001734 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 *consumed = len;
1736 return -1;
1737 }
1738 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001739 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001740 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001741 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001742 if (e < s)
1743 e = s;
1744 while (s < e) {
1745 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001746 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747 if (pos == NULL || pos >= e)
1748 break;
1749 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001750 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751 break;
1752 }
1753 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001754 return (pos - start)/kind + readnl_len;
1755 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001757 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 if (pos == NULL)
1759 *consumed = len;
1760 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001761 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 return -1;
1763 }
1764 }
1765}
1766
1767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001768_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769{
1770 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1771 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1772 int res;
1773
1774 CHECK_CLOSED(self);
1775
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001776 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 return NULL;
1778
1779 chunked = 0;
1780
1781 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001782 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001784 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 Py_ssize_t consumed = 0;
1786
1787 /* First, get some data if necessary */
1788 res = 1;
1789 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001791 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001792 if (res < 0) {
1793 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1794 when EINTR occurs so we needn't do it ourselves. */
1795 if (_PyIO_trap_eintr()) {
1796 continue;
1797 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001799 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 if (res == 0)
1801 break;
1802 }
1803 if (res == 0) {
1804 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001805 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 Py_CLEAR(self->snapshot);
1807 start = endpos = offset_to_buffer = 0;
1808 break;
1809 }
1810
1811 if (remaining == NULL) {
1812 line = self->decoded_chars;
1813 start = self->decoded_chars_used;
1814 offset_to_buffer = 0;
1815 Py_INCREF(line);
1816 }
1817 else {
1818 assert(self->decoded_chars_used == 0);
1819 line = PyUnicode_Concat(remaining, self->decoded_chars);
1820 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001821 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 Py_CLEAR(remaining);
1823 if (line == NULL)
1824 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001825 if (PyUnicode_READY(line) == -1)
1826 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 }
1828
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001829 ptr = PyUnicode_DATA(line);
1830 line_len = PyUnicode_GET_LENGTH(line);
1831 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001832
1833 endpos = _PyIO_find_line_ending(
1834 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001835 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001836 ptr + kind * start,
1837 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001838 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 if (endpos >= 0) {
1840 endpos += start;
1841 if (limit >= 0 && (endpos - start) + chunked >= limit)
1842 endpos = start + limit - chunked;
1843 break;
1844 }
1845
1846 /* We can put aside up to `endpos` */
1847 endpos = consumed + start;
1848 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1849 /* Didn't find line ending, but reached length limit */
1850 endpos = start + limit - chunked;
1851 break;
1852 }
1853
1854 if (endpos > start) {
1855 /* No line ending seen yet - put aside current data */
1856 PyObject *s;
1857 if (chunks == NULL) {
1858 chunks = PyList_New(0);
1859 if (chunks == NULL)
1860 goto error;
1861 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001862 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863 if (s == NULL)
1864 goto error;
1865 if (PyList_Append(chunks, s) < 0) {
1866 Py_DECREF(s);
1867 goto error;
1868 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001869 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 Py_DECREF(s);
1871 }
1872 /* There may be some remaining bytes we'll have to prepend to the
1873 next chunk of data */
1874 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001876 if (remaining == NULL)
1877 goto error;
1878 }
1879 Py_CLEAR(line);
1880 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001881 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 }
1883
1884 if (line != NULL) {
1885 /* Our line ends in the current buffer */
1886 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001887 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1888 PyObject *s = PyUnicode_Substring(line, start, endpos);
1889 Py_CLEAR(line);
1890 if (s == NULL)
1891 goto error;
1892 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 }
1894 }
1895 if (remaining != NULL) {
1896 if (chunks == NULL) {
1897 chunks = PyList_New(0);
1898 if (chunks == NULL)
1899 goto error;
1900 }
1901 if (PyList_Append(chunks, remaining) < 0)
1902 goto error;
1903 Py_CLEAR(remaining);
1904 }
1905 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001906 if (line != NULL) {
1907 if (PyList_Append(chunks, line) < 0)
1908 goto error;
1909 Py_DECREF(line);
1910 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1912 if (line == NULL)
1913 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001914 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001916 if (line == NULL) {
1917 Py_INCREF(_PyIO_empty_str);
1918 line = _PyIO_empty_str;
1919 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920
1921 return line;
1922
1923 error:
1924 Py_XDECREF(chunks);
1925 Py_XDECREF(remaining);
1926 Py_XDECREF(line);
1927 return NULL;
1928}
1929
1930static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001931textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932{
1933 Py_ssize_t limit = -1;
1934
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001935 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1937 return NULL;
1938 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001939 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940}
1941
1942/* Seek and Tell */
1943
1944typedef struct {
1945 Py_off_t start_pos;
1946 int dec_flags;
1947 int bytes_to_feed;
1948 int chars_to_skip;
1949 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001950} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951
1952/*
1953 To speed up cookie packing/unpacking, we store the fields in a temporary
1954 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1955 The following macros define at which offsets in the intermediary byte
1956 string the various CookieStruct fields will be stored.
1957 */
1958
1959#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1960
Christian Heimes743e0cd2012-10-17 23:52:17 +02001961#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962/* We want the least significant byte of start_pos to also be the least
1963 significant byte of the cookie, which means that in big-endian mode we
1964 must copy the fields in reverse order. */
1965
1966# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1967# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1968# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1969# define OFF_CHARS_TO_SKIP (sizeof(char))
1970# define OFF_NEED_EOF 0
1971
1972#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973/* Little-endian mode: the least significant byte of start_pos will
1974 naturally end up the least significant byte of the cookie. */
1975
1976# define OFF_START_POS 0
1977# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1978# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1979# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1980# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1981
1982#endif
1983
1984static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001985textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986{
1987 unsigned char buffer[COOKIE_BUF_LEN];
1988 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1989 if (cookieLong == NULL)
1990 return -1;
1991
1992 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001993 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 Py_DECREF(cookieLong);
1995 return -1;
1996 }
1997 Py_DECREF(cookieLong);
1998
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001999 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2000 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2001 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2002 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2003 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004
2005 return 0;
2006}
2007
2008static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002009textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010{
2011 unsigned char buffer[COOKIE_BUF_LEN];
2012
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002013 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2014 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2015 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2016 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2017 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002018
Christian Heimes743e0cd2012-10-17 23:52:17 +02002019 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2020 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022
2023static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002024_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025{
2026 PyObject *res;
2027 /* When seeking to the start of the stream, we call decoder.reset()
2028 rather than decoder.getstate().
2029 This is for a few decoders such as utf-16 for which the state value
2030 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2031 utf-16, that we are expecting a BOM).
2032 */
2033 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2034 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2035 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002036 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2037 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 if (res == NULL)
2039 return -1;
2040 Py_DECREF(res);
2041 return 0;
2042}
2043
Antoine Pitroue4501852009-05-14 18:55:55 +00002044static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002045_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002046{
2047 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002048 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002049 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2050 self->encoding_start_of_stream = 1;
2051 }
2052 else {
2053 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2054 _PyIO_zero, NULL);
2055 self->encoding_start_of_stream = 0;
2056 }
2057 if (res == NULL)
2058 return -1;
2059 Py_DECREF(res);
2060 return 0;
2061}
2062
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002063static int
2064_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2065{
2066 /* Same as _textiowrapper_decoder_setstate() above. */
2067 return _textiowrapper_encoder_reset(
2068 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2069}
2070
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002072textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073{
2074 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002075 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 PyObject *res;
2078 int cmp;
2079
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002080 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2083 return NULL;
2084 CHECK_CLOSED(self);
2085
2086 Py_INCREF(cookieObj);
2087
2088 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002089 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 goto fail;
2091 }
2092
2093 if (whence == 1) {
2094 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002095 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 if (cmp < 0)
2097 goto fail;
2098
2099 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002100 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 goto fail;
2102 }
2103
2104 /* Seeking to the current position should attempt to
2105 * sync the underlying buffer with the current position.
2106 */
2107 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002108 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 if (cookieObj == NULL)
2110 goto fail;
2111 }
2112 else if (whence == 2) {
2113 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002114 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 if (cmp < 0)
2116 goto fail;
2117
2118 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002119 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002120 goto fail;
2121 }
2122
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002123 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124 if (res == NULL)
2125 goto fail;
2126 Py_DECREF(res);
2127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002128 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 Py_CLEAR(self->snapshot);
2130 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002131 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 if (res == NULL)
2133 goto fail;
2134 Py_DECREF(res);
2135 }
2136
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002137 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002138 Py_CLEAR(cookieObj);
2139 if (res == NULL)
2140 goto fail;
2141 if (self->encoder) {
2142 /* If seek() == 0, we are at the start of stream, otherwise not */
2143 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2144 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2145 Py_DECREF(res);
2146 goto fail;
2147 }
2148 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002149 return res;
2150 }
2151 else if (whence != 0) {
2152 PyErr_Format(PyExc_ValueError,
2153 "invalid whence (%d, should be 0, 1 or 2)", whence);
2154 goto fail;
2155 }
2156
Antoine Pitroue4501852009-05-14 18:55:55 +00002157 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 if (cmp < 0)
2159 goto fail;
2160
2161 if (cmp == 1) {
2162 PyErr_Format(PyExc_ValueError,
2163 "negative seek position %R", cookieObj);
2164 goto fail;
2165 }
2166
2167 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2168 if (res == NULL)
2169 goto fail;
2170 Py_DECREF(res);
2171
2172 /* The strategy of seek() is to go back to the safe start point
2173 * and replay the effect of read(chars_to_skip) from there.
2174 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002175 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176 goto fail;
2177
2178 /* Seek back to the safe start point. */
2179 posobj = PyLong_FromOff_t(cookie.start_pos);
2180 if (posobj == NULL)
2181 goto fail;
2182 res = PyObject_CallMethodObjArgs(self->buffer,
2183 _PyIO_str_seek, posobj, NULL);
2184 Py_DECREF(posobj);
2185 if (res == NULL)
2186 goto fail;
2187 Py_DECREF(res);
2188
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002189 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190 Py_CLEAR(self->snapshot);
2191
2192 /* Restore the decoder to its state from the safe start point. */
2193 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002194 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195 goto fail;
2196 }
2197
2198 if (cookie.chars_to_skip) {
2199 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002200 PyObject *input_chunk = _PyObject_CallMethodId(
2201 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 PyObject *decoded;
2203
2204 if (input_chunk == NULL)
2205 goto fail;
2206
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002207 if (!PyBytes_Check(input_chunk)) {
2208 PyErr_Format(PyExc_TypeError,
2209 "underlying read() should have returned a bytes "
2210 "object, not '%.200s'",
2211 Py_TYPE(input_chunk)->tp_name);
2212 Py_DECREF(input_chunk);
2213 goto fail;
2214 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215
2216 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2217 if (self->snapshot == NULL) {
2218 Py_DECREF(input_chunk);
2219 goto fail;
2220 }
2221
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002222 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2223 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002225 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002226 goto fail;
2227
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002228 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229
2230 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002231 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2233 goto fail;
2234 }
2235 self->decoded_chars_used = cookie.chars_to_skip;
2236 }
2237 else {
2238 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2239 if (self->snapshot == NULL)
2240 goto fail;
2241 }
2242
Antoine Pitroue4501852009-05-14 18:55:55 +00002243 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2244 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002245 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002246 goto fail;
2247 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002248 return cookieObj;
2249 fail:
2250 Py_XDECREF(cookieObj);
2251 return NULL;
2252
2253}
2254
2255static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002256textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257{
2258 PyObject *res;
2259 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002260 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002261 PyObject *next_input;
2262 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002263 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002264 PyObject *saved_state = NULL;
2265 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002266 char *dec_buffer;
2267 Py_ssize_t dec_buffer_len;
2268 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002270 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271 CHECK_CLOSED(self);
2272
2273 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002274 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002275 goto fail;
2276 }
2277 if (!self->telling) {
2278 PyErr_SetString(PyExc_IOError,
2279 "telling position disabled by next() call");
2280 goto fail;
2281 }
2282
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002283 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002285 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286 if (res == NULL)
2287 goto fail;
2288 Py_DECREF(res);
2289
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002290 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002291 if (posobj == NULL)
2292 goto fail;
2293
2294 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002295 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296 return posobj;
2297 }
2298
2299#if defined(HAVE_LARGEFILE_SUPPORT)
2300 cookie.start_pos = PyLong_AsLongLong(posobj);
2301#else
2302 cookie.start_pos = PyLong_AsLong(posobj);
2303#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002304 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002305 if (PyErr_Occurred())
2306 goto fail;
2307
2308 /* Skip backward to the snapshot point (see _read_chunk). */
2309 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2310 goto fail;
2311
2312 assert (PyBytes_Check(next_input));
2313
2314 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2315
2316 /* How many decoded characters have been used up since the snapshot? */
2317 if (self->decoded_chars_used == 0) {
2318 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002319 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002320 }
2321
2322 chars_to_skip = self->decoded_chars_used;
2323
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002324 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2326 _PyIO_str_getstate, NULL);
2327 if (saved_state == NULL)
2328 goto fail;
2329
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330#define DECODER_GETSTATE() do { \
2331 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2332 _PyIO_str_getstate, NULL); \
2333 if (_state == NULL) \
2334 goto fail; \
2335 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2336 Py_DECREF(_state); \
2337 goto fail; \
2338 } \
2339 Py_DECREF(_state); \
2340 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002342#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002343 PyObject *_decoded = _PyObject_CallMethodId( \
2344 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002345 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002346 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002347 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002348 Py_DECREF(_decoded); \
2349 } while (0)
2350
2351 /* Fast search for an acceptable start point, close to our
2352 current pos */
2353 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2354 skip_back = 1;
2355 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2356 input = PyBytes_AS_STRING(next_input);
2357 while (skip_bytes > 0) {
2358 /* Decode up to temptative start point */
2359 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2360 goto fail;
2361 DECODER_DECODE(input, skip_bytes, chars_decoded);
2362 if (chars_decoded <= chars_to_skip) {
2363 DECODER_GETSTATE();
2364 if (dec_buffer_len == 0) {
2365 /* Before pos and no bytes buffered in decoder => OK */
2366 cookie.dec_flags = dec_flags;
2367 chars_to_skip -= chars_decoded;
2368 break;
2369 }
2370 /* Skip back by buffered amount and reset heuristic */
2371 skip_bytes -= dec_buffer_len;
2372 skip_back = 1;
2373 }
2374 else {
2375 /* We're too far ahead, skip back a bit */
2376 skip_bytes -= skip_back;
2377 skip_back *= 2;
2378 }
2379 }
2380 if (skip_bytes <= 0) {
2381 skip_bytes = 0;
2382 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2383 goto fail;
2384 }
2385
2386 /* Note our initial start point. */
2387 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002388 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002389 if (chars_to_skip == 0)
2390 goto finally;
2391
2392 /* We should be close to the desired position. Now feed the decoder one
2393 * byte at a time until we reach the `chars_to_skip` target.
2394 * As we go, note the nearest "safe start point" before the current
2395 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 * can safely start from there and advance to this location).
2397 */
2398 chars_decoded = 0;
2399 input = PyBytes_AS_STRING(next_input);
2400 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002401 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002403 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002405 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002406 /* We got n chars for 1 byte */
2407 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002409 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410
2411 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2412 /* Decoder buffer is empty, so this is a safe start point. */
2413 cookie.start_pos += cookie.bytes_to_feed;
2414 chars_to_skip -= chars_decoded;
2415 cookie.dec_flags = dec_flags;
2416 cookie.bytes_to_feed = 0;
2417 chars_decoded = 0;
2418 }
2419 if (chars_decoded >= chars_to_skip)
2420 break;
2421 input++;
2422 }
2423 if (input == input_end) {
2424 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002425 PyObject *decoded = _PyObject_CallMethodId(
2426 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002427 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002428 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002429 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430 Py_DECREF(decoded);
2431 cookie.need_eof = 1;
2432
2433 if (chars_decoded < chars_to_skip) {
2434 PyErr_SetString(PyExc_IOError,
2435 "can't reconstruct logical file position");
2436 goto fail;
2437 }
2438 }
2439
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002440finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002441 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002442 Py_DECREF(saved_state);
2443 if (res == NULL)
2444 return NULL;
2445 Py_DECREF(res);
2446
2447 /* The returned cookie corresponds to the last safe start point. */
2448 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002449 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002451fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452 if (saved_state) {
2453 PyObject *type, *value, *traceback;
2454 PyErr_Fetch(&type, &value, &traceback);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002455 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002456 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002458 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459 }
2460 return NULL;
2461}
2462
2463static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002464textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465{
2466 PyObject *pos = Py_None;
2467 PyObject *res;
2468
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002469 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2471 return NULL;
2472 }
2473
2474 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2475 if (res == NULL)
2476 return NULL;
2477 Py_DECREF(res);
2478
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002479 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480}
2481
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002482static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002483textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002484{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002485 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002486
2487 CHECK_INITIALIZED(self);
2488
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002489 res = PyUnicode_FromString("<_io.TextIOWrapper");
2490 if (res == NULL)
2491 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002492
Martin v. Löwis767046a2011-10-14 15:35:36 +02002493 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002494 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002495 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002496 PyErr_Clear();
2497 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002498 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002499 }
2500 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002501 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002502 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002503 if (s == NULL)
2504 goto error;
2505 PyUnicode_AppendAndDel(&res, s);
2506 if (res == NULL)
2507 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002508 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002509 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002510 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002511 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002512 PyErr_Clear();
2513 else
2514 goto error;
2515 }
2516 else {
2517 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2518 Py_DECREF(modeobj);
2519 if (s == NULL)
2520 goto error;
2521 PyUnicode_AppendAndDel(&res, s);
2522 if (res == NULL)
2523 return NULL;
2524 }
2525 s = PyUnicode_FromFormat("%U encoding=%R>",
2526 res, self->encoding);
2527 Py_DECREF(res);
2528 return s;
2529error:
2530 Py_XDECREF(res);
2531 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002532}
2533
2534
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535/* Inquiries */
2536
2537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002538textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002540 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002541 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542}
2543
2544static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002545textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002547 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002548 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549}
2550
2551static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002552textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002554 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002555 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556}
2557
2558static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002559textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002561 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002562 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563}
2564
2565static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002568 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002569 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570}
2571
2572static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002573textiowrapper_getstate(textio *self, PyObject *args)
2574{
2575 PyErr_Format(PyExc_TypeError,
2576 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2577 return NULL;
2578}
2579
2580static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002581textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002582{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002583 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584 CHECK_CLOSED(self);
2585 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002586 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002588 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589}
2590
2591static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593{
2594 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002595 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002596 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597
Antoine Pitrou6be88762010-05-03 16:48:20 +00002598 res = textiowrapper_closed_get(self, NULL);
2599 if (res == NULL)
2600 return NULL;
2601 r = PyObject_IsTrue(res);
2602 Py_DECREF(res);
2603 if (r < 0)
2604 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002605
Antoine Pitrou6be88762010-05-03 16:48:20 +00002606 if (r > 0) {
2607 Py_RETURN_NONE; /* stream already closed */
2608 }
2609 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002610 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002611 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002612 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002613 if (res)
2614 Py_DECREF(res);
2615 else
2616 PyErr_Clear();
2617 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002618 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002619 if (res == NULL)
2620 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002621 else
2622 Py_DECREF(res);
2623
Benjamin Peterson68623612012-12-20 11:53:11 -06002624 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2625 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002626 _PyErr_ChainExceptions(exc, val, tb);
2627 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002628 }
2629 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002630 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002631}
2632
2633static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002634textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635{
2636 PyObject *line;
2637
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002638 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639
2640 self->telling = 0;
2641 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2642 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002643 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 }
2645 else {
2646 line = PyObject_CallMethodObjArgs((PyObject *)self,
2647 _PyIO_str_readline, NULL);
2648 if (line && !PyUnicode_Check(line)) {
2649 PyErr_Format(PyExc_IOError,
2650 "readline() should have returned an str object, "
2651 "not '%.200s'", Py_TYPE(line)->tp_name);
2652 Py_DECREF(line);
2653 return NULL;
2654 }
2655 }
2656
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002657 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002658 return NULL;
2659
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002660 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 /* Reached EOF or would have blocked */
2662 Py_DECREF(line);
2663 Py_CLEAR(self->snapshot);
2664 self->telling = self->seekable;
2665 return NULL;
2666 }
2667
2668 return line;
2669}
2670
2671static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002672textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002674 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002675 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676}
2677
2678static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002679textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002681 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2683}
2684
2685static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002686textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687{
2688 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002689 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 if (self->decoder == NULL)
2691 Py_RETURN_NONE;
2692 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2693 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002694 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2695 PyErr_Clear();
2696 Py_RETURN_NONE;
2697 }
2698 else {
2699 return NULL;
2700 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002701 }
2702 return res;
2703}
2704
2705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002706textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002707{
2708 CHECK_INITIALIZED(self);
2709 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2710}
2711
2712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002713textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002715 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002716 return PyLong_FromSsize_t(self->chunk_size);
2717}
2718
2719static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002720textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002721{
2722 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002723 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002724 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725 if (n == -1 && PyErr_Occurred())
2726 return -1;
2727 if (n <= 0) {
2728 PyErr_SetString(PyExc_ValueError,
2729 "a strictly positive integer is required");
2730 return -1;
2731 }
2732 self->chunk_size = n;
2733 return 0;
2734}
2735
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002736static PyMethodDef textiowrapper_methods[] = {
2737 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2738 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2739 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2740 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2741 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2742 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002744 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2745 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2746 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2747 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2748 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002749 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002751 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2752 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2753 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002754 {NULL, NULL}
2755};
2756
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002757static PyMemberDef textiowrapper_members[] = {
2758 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2759 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2760 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002761 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762 {NULL}
2763};
2764
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002765static PyGetSetDef textiowrapper_getset[] = {
2766 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2767 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2769*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002770 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2771 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2772 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2773 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002774 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002775};
2776
2777PyTypeObject PyTextIOWrapper_Type = {
2778 PyVarObject_HEAD_INIT(NULL, 0)
2779 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002780 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002781 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002782 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002783 0, /*tp_print*/
2784 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002785 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002786 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002787 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002788 0, /*tp_as_number*/
2789 0, /*tp_as_sequence*/
2790 0, /*tp_as_mapping*/
2791 0, /*tp_hash */
2792 0, /*tp_call*/
2793 0, /*tp_str*/
2794 0, /*tp_getattro*/
2795 0, /*tp_setattro*/
2796 0, /*tp_as_buffer*/
2797 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002798 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002799 textiowrapper_doc, /* tp_doc */
2800 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2801 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002802 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002803 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002804 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002805 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2806 textiowrapper_methods, /* tp_methods */
2807 textiowrapper_members, /* tp_members */
2808 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002809 0, /* tp_base */
2810 0, /* tp_dict */
2811 0, /* tp_descr_get */
2812 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002813 offsetof(textio, dict), /*tp_dictoffset*/
2814 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002815 0, /* tp_alloc */
2816 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002817 0, /* tp_free */
2818 0, /* tp_is_gc */
2819 0, /* tp_bases */
2820 0, /* tp_mro */
2821 0, /* tp_cache */
2822 0, /* tp_subclasses */
2823 0, /* tp_weaklist */
2824 0, /* tp_del */
2825 0, /* tp_version_tag */
2826 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002827};