blob: 140688fe229dd63ebee520c9065b0a2a118e868d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
Antoine Pitrou712cb732013-12-21 15:51:54 +010048 _PyIO_State *state = IO_STATE();
49 if (state != NULL)
50 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000051 return NULL;
52}
53
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000055 "Separate the underlying buffer from the TextIOBase and return it.\n"
56 "\n"
57 "After the underlying buffer has been detached, the TextIO is in an\n"
58 "unusable state.\n"
59 );
60
61static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063{
64 return _unsupported("detach");
65}
66
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000067PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 "Read at most n characters from stream.\n"
69 "\n"
70 "Read from underlying buffer until we have n characters or we hit EOF.\n"
71 "If n is negative or omitted, read until EOF.\n"
72 );
73
74static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076{
77 return _unsupported("read");
78}
79
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000080PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081 "Read until newline or EOF.\n"
82 "\n"
83 "Returns an empty string if EOF is hit immediately.\n"
84 );
85
86static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088{
89 return _unsupported("readline");
90}
91
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000092PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 "Write string to stream.\n"
94 "Returns the number of characters written (which is always equal to\n"
95 "the length of the string).\n"
96 );
97
98static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100{
101 return _unsupported("write");
102}
103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000104PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 "Encoding of the text stream.\n"
106 "\n"
107 "Subclasses should override.\n"
108 );
109
110static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112{
113 Py_RETURN_NONE;
114}
115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000116PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117 "Line endings translated so far.\n"
118 "\n"
119 "Only line endings translated during reading are considered.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000131 "The error setting of the decoder or encoder.\n"
132 "\n"
133 "Subclasses should override.\n"
134 );
135
136static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138{
139 Py_RETURN_NONE;
140}
141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000143static PyMethodDef textiobase_methods[] = {
144 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
145 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
146 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
147 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 {NULL, NULL}
149};
150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyGetSetDef textiobase_getset[] = {
152 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
153 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
154 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000155 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156};
157
158PyTypeObject PyTextIOBase_Type = {
159 PyVarObject_HEAD_INIT(NULL, 0)
160 "_io._TextIOBase", /*tp_name*/
161 0, /*tp_basicsize*/
162 0, /*tp_itemsize*/
163 0, /*tp_dealloc*/
164 0, /*tp_print*/
165 0, /*tp_getattr*/
166 0, /*tp_setattr*/
167 0, /*tp_compare */
168 0, /*tp_repr*/
169 0, /*tp_as_number*/
170 0, /*tp_as_sequence*/
171 0, /*tp_as_mapping*/
172 0, /*tp_hash */
173 0, /*tp_call*/
174 0, /*tp_str*/
175 0, /*tp_getattro*/
176 0, /*tp_setattro*/
177 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
179 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000180 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 0, /* tp_traverse */
182 0, /* tp_clear */
183 0, /* tp_richcompare */
184 0, /* tp_weaklistoffset */
185 0, /* tp_iter */
186 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 &PyIOBase_Type, /* tp_base */
191 0, /* tp_dict */
192 0, /* tp_descr_get */
193 0, /* tp_descr_set */
194 0, /* tp_dictoffset */
195 0, /* tp_init */
196 0, /* tp_alloc */
197 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200198 0, /* tp_free */
199 0, /* tp_is_gc */
200 0, /* tp_bases */
201 0, /* tp_mro */
202 0, /* tp_cache */
203 0, /* tp_subclasses */
204 0, /* tp_weaklist */
205 0, /* tp_del */
206 0, /* tp_version_tag */
207 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208};
209
210
211/* IncrementalNewlineDecoder */
212
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000213PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 "Codec used when reading a file in universal newlines mode. It wraps\n"
215 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
216 "records the types of newlines encountered. When used with\n"
217 "translate=False, it ensures that the newline sequence is returned in\n"
218 "one piece. When used with decoder=None, it expects unicode strings as\n"
219 "decode input and translates newlines without first invoking an external\n"
220 "decoder.\n"
221 );
222
223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 signed int pendingcr: 1;
228 signed int translate: 1;
229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000233incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 PyObject *args, PyObject *kwds)
235{
236 PyObject *decoder;
237 int translate;
238 PyObject *errors = NULL;
239 char *kwlist[] = {"decoder", "translate", "errors", NULL};
240
241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
242 kwlist, &decoder, &translate, &errors))
243 return -1;
244
245 self->decoder = decoder;
246 Py_INCREF(decoder);
247
248 if (errors == NULL) {
249 self->errors = PyUnicode_FromString("strict");
250 if (self->errors == NULL)
251 return -1;
252 }
253 else {
254 Py_INCREF(errors);
255 self->errors = errors;
256 }
257
258 self->translate = translate;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263}
264
265static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000266incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267{
268 Py_CLEAR(self->decoder);
269 Py_CLEAR(self->errors);
270 Py_TYPE(self)->tp_free((PyObject *)self);
271}
272
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200273static int
274check_decoded(PyObject *decoded)
275{
276 if (decoded == NULL)
277 return -1;
278 if (!PyUnicode_Check(decoded)) {
279 PyErr_Format(PyExc_TypeError,
280 "decoder should return a string result, not '%.200s'",
281 Py_TYPE(decoded)->tp_name);
282 Py_DECREF(decoded);
283 return -1;
284 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200285 if (PyUnicode_READY(decoded) < 0) {
286 Py_DECREF(decoded);
287 return -1;
288 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200289 return 0;
290}
291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292#define SEEN_CR 1
293#define SEEN_LF 2
294#define SEEN_CRLF 4
295#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
296
297PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200298_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 PyObject *input, int final)
300{
301 PyObject *output;
302 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200303 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 if (self->decoder == NULL) {
306 PyErr_SetString(PyExc_ValueError,
307 "IncrementalNewlineDecoder.__init__ not called");
308 return NULL;
309 }
310
311 /* decode input (with the eventual \r from a previous pass) */
312 if (self->decoder != Py_None) {
313 output = PyObject_CallMethodObjArgs(self->decoder,
314 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
315 }
316 else {
317 output = input;
318 Py_INCREF(output);
319 }
320
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200321 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return NULL;
323
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Prefix output with CR */
327 int kind;
328 PyObject *modified;
329 char *out;
330
331 modified = PyUnicode_New(output_len + 1,
332 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (modified == NULL)
334 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 kind = PyUnicode_KIND(modified);
336 out = PyUnicode_DATA(modified);
337 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200338 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200340 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self->pendingcr = 0;
342 output_len++;
343 }
344
345 /* retain last \r even when not translating data:
346 * then readline() is sure to get \r\n in one pass
347 */
348 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000349 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
351 {
352 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
353 if (modified == NULL)
354 goto error;
355 Py_DECREF(output);
356 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 self->pendingcr = 1;
358 }
359 }
360
361 /* Record which newlines are read and do newline translation if desired,
362 all in one pass. */
363 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_ssize_t len;
366 int seennl = self->seennl;
367 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 in_str = PyUnicode_DATA(output);
371 len = PyUnicode_GET_LENGTH(output);
372 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 if (len == 0)
375 return output;
376
377 /* If, up to now, newlines are consistently \n, do a quick check
378 for the \r *byte* with the libc's optimized memchr.
379 */
380 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200381 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 }
383
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 if (only_lf) {
385 /* If not already seen, quick scan for a possible "\n" character.
386 (there's nothing else to be done, even when in translation mode)
387 */
388 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100390 if (kind == PyUnicode_1BYTE_KIND)
391 seennl |= SEEN_LF;
392 else {
393 Py_ssize_t i = 0;
394 for (;;) {
395 Py_UCS4 c;
396 /* Fast loop for non-control characters */
397 while (PyUnicode_READ(kind, in_str, i) > '\n')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
400 if (c == '\n') {
401 seennl |= SEEN_LF;
402 break;
403 }
404 if (i >= len)
405 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000406 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000407 }
408 }
409 /* Finished: we have scanned for newlines, and none of them
410 need translating */
411 }
412 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (seennl == SEEN_ALL)
416 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 while (PyUnicode_READ(kind, in_str, i) > '\r')
421 i++;
422 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (c == '\n')
424 seennl |= SEEN_LF;
425 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 }
430 else
431 seennl |= SEEN_CR;
432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 break;
435 if (seennl == SEEN_ALL)
436 break;
437 }
438 endscan:
439 ;
440 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000441 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 void *translated;
443 int kind = PyUnicode_KIND(output);
444 void *in_str = PyUnicode_DATA(output);
445 Py_ssize_t in, out;
446 /* XXX: Previous in-place translation here is disabled as
447 resizing is not possible anymore */
448 /* We could try to optimize this so that we only do a copy
449 when there is something to translate. On the other hand,
450 we already know there is a \r byte, so chances are high
451 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (translated == NULL) {
454 PyErr_NoMemory();
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 seennl |= SEEN_LF;
466 continue;
467 }
468 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 in++;
471 seennl |= SEEN_CRLF;
472 }
473 else
474 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 continue;
477 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_DECREF(output);
483 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100484 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200486 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
488 self->seennl |= seennl;
489 }
490
491 return output;
492
493 error:
494 Py_DECREF(output);
495 return NULL;
496}
497
498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000499incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 PyObject *args, PyObject *kwds)
501{
502 char *kwlist[] = {"input", "final", NULL};
503 PyObject *input;
504 int final = 0;
505
506 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
507 kwlist, &input, &final))
508 return NULL;
509 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 PyObject *buffer;
516 unsigned PY_LONG_LONG flag;
517
518 if (self->decoder != Py_None) {
519 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
520 _PyIO_str_getstate, NULL);
521 if (state == NULL)
522 return NULL;
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
524 Py_DECREF(state);
525 return NULL;
526 }
527 Py_INCREF(buffer);
528 Py_DECREF(state);
529 }
530 else {
531 buffer = PyBytes_FromString("");
532 flag = 0;
533 }
534 flag <<= 1;
535 if (self->pendingcr)
536 flag |= 1;
537 return Py_BuildValue("NK", buffer, flag);
538}
539
540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000541incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542{
543 PyObject *buffer;
544 unsigned PY_LONG_LONG flag;
545
546 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
547 return NULL;
548
549 self->pendingcr = (int) flag & 1;
550 flag >>= 1;
551
552 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200553 return _PyObject_CallMethodId(self->decoder,
554 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555 else
556 Py_RETURN_NONE;
557}
558
559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000560incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561{
562 self->seennl = 0;
563 self->pendingcr = 0;
564 if (self->decoder != Py_None)
565 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
566 else
567 Py_RETURN_NONE;
568}
569
570static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000571incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 switch (self->seennl) {
574 case SEEN_CR:
575 return PyUnicode_FromString("\r");
576 case SEEN_LF:
577 return PyUnicode_FromString("\n");
578 case SEEN_CRLF:
579 return PyUnicode_FromString("\r\n");
580 case SEEN_CR | SEEN_LF:
581 return Py_BuildValue("ss", "\r", "\n");
582 case SEEN_CR | SEEN_CRLF:
583 return Py_BuildValue("ss", "\r", "\r\n");
584 case SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("ss", "\n", "\r\n");
586 case SEEN_CR | SEEN_LF | SEEN_CRLF:
587 return Py_BuildValue("sss", "\r", "\n", "\r\n");
588 default:
589 Py_RETURN_NONE;
590 }
591
592}
593
594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595static PyMethodDef incrementalnewlinedecoder_methods[] = {
596 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
597 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
598 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
599 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000600 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601};
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603static PyGetSetDef incrementalnewlinedecoder_getset[] = {
604 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000605 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606};
607
608PyTypeObject PyIncrementalNewlineDecoder_Type = {
609 PyVarObject_HEAD_INIT(NULL, 0)
610 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /*tp_print*/
615 0, /*tp_getattr*/
616 0, /*tp_setattr*/
617 0, /*tp_compare */
618 0, /*tp_repr*/
619 0, /*tp_as_number*/
620 0, /*tp_as_sequence*/
621 0, /*tp_as_mapping*/
622 0, /*tp_hash */
623 0, /*tp_call*/
624 0, /*tp_str*/
625 0, /*tp_getattro*/
626 0, /*tp_setattro*/
627 0, /*tp_as_buffer*/
628 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 0, /* tp_traverse */
631 0, /* tp_clear */
632 0, /* tp_richcompare */
633 0, /*tp_weaklistoffset*/
634 0, /* tp_iter */
635 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000638 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 0, /* tp_base */
640 0, /* tp_dict */
641 0, /* tp_descr_get */
642 0, /* tp_descr_set */
643 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000644 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 0, /* tp_alloc */
646 PyType_GenericNew, /* tp_new */
647};
648
649
650/* TextIOWrapper */
651
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000652PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 "Character and line based layer over a BufferedIOBase object, buffer.\n"
654 "\n"
655 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200656 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400658 "errors determines the strictness of encoding and decoding (see\n"
659 "help(codecs.Codec) or the documentation for codecs.register) and\n"
660 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200662 "newline controls how line endings are handled. It can be None, '',\n"
663 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
664 "\n"
665 "* On input, if newline is None, universal newlines mode is\n"
666 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
667 " these are translated into '\\n' before being returned to the\n"
668 " caller. If it is '', universal newline mode is enabled, but line\n"
669 " endings are returned to the caller untranslated. If it has any of\n"
670 " the other legal values, input lines are only terminated by the given\n"
671 " string, and the line ending is returned to the caller untranslated.\n"
672 "\n"
673 "* On output, if newline is None, any '\\n' characters written are\n"
674 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300675 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200676 " of the other legal values, any '\\n' characters written are translated\n"
677 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 "\n"
679 "If line_buffering is True, a call to flush is implied when a call to\n"
680 "write contains a newline character."
681 );
682
683typedef PyObject *
684 (*encodefunc_t)(PyObject *, PyObject *);
685
686typedef struct
687{
688 PyObject_HEAD
689 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000690 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691 Py_ssize_t chunk_size;
692 PyObject *buffer;
693 PyObject *encoding;
694 PyObject *encoder;
695 PyObject *decoder;
696 PyObject *readnl;
697 PyObject *errors;
698 const char *writenl; /* utf-8 encoded, NULL stands for \n */
699 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200700 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 char readuniversal;
702 char readtranslate;
703 char writetranslate;
704 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200705 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200707 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 /* Specialized encoding func (see below) */
709 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000710 /* Whether or not it's the start of the stream */
711 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 /* Reads and writes are internally buffered in order to speed things up.
714 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000715
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716 Please also note that text to be written is first encoded before being
717 buffered. This is necessary so that encoding errors are immediately
718 reported to the caller, but it unfortunately means that the
719 IncrementalEncoder (whose encode() method is always written in Python)
720 becomes a bottleneck for small writes.
721 */
722 PyObject *decoded_chars; /* buffer for text returned from decoder */
723 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
724 PyObject *pending_bytes; /* list of bytes objects waiting to be
725 written, or NULL */
726 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 /* snapshot is either None, or a tuple (dec_flags, next_input) where
729 * dec_flags is the second (integer) item of the decoder state and
730 * next_input is the chunk of input bytes that comes next after the
731 * snapshot point. We use this to reconstruct decoder states in tell().
732 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000733 PyObject *snapshot;
734 /* Bytes-to-characters ratio for the current chunk. Serves as input for
735 the heuristic in tell(). */
736 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 /* Cache raw object if it's a FileIO object */
739 PyObject *raw;
740
741 PyObject *weakreflist;
742 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745
746/* A couple of specialized cases in order to bypass the slow incremental
747 encoding methods for the most popular encodings. */
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100758 return _PyUnicode_EncodeUTF16(text,
759 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF16(text,
766 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
Antoine Pitroue4501852009-05-14 18:55:55 +0000772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200774#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000779 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF16(text,
781 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782}
783
Antoine Pitroue4501852009-05-14 18:55:55 +0000784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100787 return _PyUnicode_EncodeUTF32(text,
788 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000793{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100794 return _PyUnicode_EncodeUTF32(text,
795 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000796}
797
798static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000799utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000800{
801 if (!self->encoding_start_of_stream) {
802 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200803#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 return utf32be_encode(self, text);
805#else
806 return utf32le_encode(self, text);
807#endif
808 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100809 return _PyUnicode_EncodeUTF32(text,
810 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000811}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812
813static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817}
818
819static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000820latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823}
824
825/* Map normalized encoding names onto the specialized encoding funcs */
826
827typedef struct {
828 const char *name;
829 encodefunc_t encodefunc;
830} encodefuncentry;
831
Antoine Pitrou24f36292009-03-28 22:16:42 +0000832static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 {"ascii", (encodefunc_t) ascii_encode},
834 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000835 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 {"utf-16-be", (encodefunc_t) utf16be_encode},
837 {"utf-16-le", (encodefunc_t) utf16le_encode},
838 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000839 {"utf-32-be", (encodefunc_t) utf32be_encode},
840 {"utf-32-le", (encodefunc_t) utf32le_encode},
841 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 {NULL, NULL}
843};
844
845
846static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000847textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848{
849 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 NULL};
Nick Coghlana9b15242014-02-04 22:11:18 +1000852 PyObject *buffer, *raw, *codec_info = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 char *encoding = NULL;
854 char *errors = NULL;
855 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200856 int line_buffering = 0, write_through = 0;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100857 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
859 PyObject *res;
860 int r;
861
862 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000863 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200866 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 return -1;
868
869 if (newline && newline[0] != '\0'
870 && !(newline[0] == '\n' && newline[1] == '\0')
871 && !(newline[0] == '\r' && newline[1] == '\0')
872 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
873 PyErr_Format(PyExc_ValueError,
874 "illegal newline value: %s", newline);
875 return -1;
876 }
877
878 Py_CLEAR(self->buffer);
879 Py_CLEAR(self->encoding);
880 Py_CLEAR(self->encoder);
881 Py_CLEAR(self->decoder);
882 Py_CLEAR(self->readnl);
883 Py_CLEAR(self->decoded_chars);
884 Py_CLEAR(self->pending_bytes);
885 Py_CLEAR(self->snapshot);
886 Py_CLEAR(self->errors);
887 Py_CLEAR(self->raw);
888 self->decoded_chars_used = 0;
889 self->pending_bytes_count = 0;
890 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000891 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 if (encoding == NULL) {
894 /* Try os.device_encoding(fileno) */
895 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100896 state = IO_STATE();
897 if (state == NULL)
898 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 /* Ignore only AttributeError and UnsupportedOperation */
901 if (fileno == NULL) {
902 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
903 PyErr_ExceptionMatches(state->unsupported_operation)) {
904 PyErr_Clear();
905 }
906 else {
907 goto error;
908 }
909 }
910 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200911 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500912 Py_DECREF(fileno);
913 if (fd == -1 && PyErr_Occurred()) {
914 goto error;
915 }
916
917 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 if (self->encoding == NULL)
919 goto error;
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
924 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200925 PyObject *locale_module = _PyIO_get_locale_module(state);
926 if (locale_module == NULL)
927 goto catch_ImportError;
928 self->encoding = _PyObject_CallMethodId(
929 locale_module, &PyId_getpreferredencoding, "O", Py_False);
930 Py_DECREF(locale_module);
931 if (self->encoding == NULL) {
932 catch_ImportError:
933 /*
934 Importing locale can raise a ImportError because of
935 _functools, and locale.getpreferredencoding can raise a
936 ImportError if _locale is not available. These will happen
937 during module building.
938 */
939 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
940 PyErr_Clear();
941 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200943 else
944 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200946 else if (!PyUnicode_Check(self->encoding))
947 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000951 if (encoding == NULL)
952 goto error;
953 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 else if (encoding != NULL) {
955 self->encoding = PyUnicode_FromString(encoding);
956 if (self->encoding == NULL)
957 goto error;
958 }
959 else {
960 PyErr_SetString(PyExc_IOError,
961 "could not determine default encoding");
962 }
963
Nick Coghlana9b15242014-02-04 22:11:18 +1000964 /* Check we have been asked for a real text encoding */
965 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
966 if (codec_info == NULL) {
967 Py_CLEAR(self->encoding);
968 goto error;
969 }
970
971 /* XXX: Failures beyond this point have the potential to leak elements
972 * of the partially constructed object (like self->encoding)
973 */
974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (errors == NULL)
976 errors = "strict";
977 self->errors = PyBytes_FromString(errors);
978 if (self->errors == NULL)
979 goto error;
980
981 self->chunk_size = 8192;
982 self->readuniversal = (newline == NULL || newline[0] == '\0');
983 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200984 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 self->readtranslate = (newline == NULL);
986 if (newline) {
987 self->readnl = PyUnicode_FromString(newline);
988 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000989 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 }
991 self->writetranslate = (newline == NULL || newline[0] != '\0');
992 if (!self->readuniversal && self->readnl) {
993 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000994 if (self->writenl == NULL)
995 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (!strcmp(self->writenl, "\n"))
997 self->writenl = NULL;
998 }
999#ifdef MS_WINDOWS
1000 else
1001 self->writenl = "\r\n";
1002#endif
1003
1004 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001005 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 if (res == NULL)
1007 goto error;
1008 r = PyObject_IsTrue(res);
1009 Py_DECREF(res);
1010 if (r == -1)
1011 goto error;
1012 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001013 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
1014 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 if (self->decoder == NULL)
1016 goto error;
1017
1018 if (self->readuniversal) {
1019 PyObject *incrementalDecoder = PyObject_CallFunction(
1020 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1021 "Oi", self->decoder, (int)self->readtranslate);
1022 if (incrementalDecoder == NULL)
1023 goto error;
1024 Py_CLEAR(self->decoder);
1025 self->decoder = incrementalDecoder;
1026 }
1027 }
1028
1029 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 if (res == NULL)
1032 goto error;
1033 r = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035 if (r == -1)
1036 goto error;
1037 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001038 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1039 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if (self->encoder == NULL)
1041 goto error;
1042 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001043 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (res == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (PyUnicode_Check(res)) {
1051 encodefuncentry *e = encodefuncs;
1052 while (e->name != NULL) {
1053 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1054 self->encodefunc = e->encodefunc;
1055 break;
1056 }
1057 e++;
1058 }
1059 }
1060 Py_XDECREF(res);
1061 }
1062
Nick Coghlana9b15242014-02-04 22:11:18 +10001063 /* Finished sorting out the codec details */
1064 Py_DECREF(codec_info);
1065
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self->buffer = buffer;
1067 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1070 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1071 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001072 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001074 if (raw == NULL) {
1075 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1076 PyErr_Clear();
1077 else
1078 goto error;
1079 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 else if (Py_TYPE(raw) == &PyFileIO_Type)
1081 self->raw = raw;
1082 else
1083 Py_DECREF(raw);
1084 }
1085
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 if (res == NULL)
1088 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001089 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001091 if (r < 0)
1092 goto error;
1093 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094
Martin v. Löwis767046a2011-10-14 15:35:36 +02001095 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001096
Antoine Pitroue4501852009-05-14 18:55:55 +00001097 self->encoding_start_of_stream = 0;
1098 if (self->seekable && self->encoder) {
1099 PyObject *cookieObj;
1100 int cmp;
1101
1102 self->encoding_start_of_stream = 1;
1103
1104 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1105 if (cookieObj == NULL)
1106 goto error;
1107
1108 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1109 Py_DECREF(cookieObj);
1110 if (cmp < 0) {
1111 goto error;
1112 }
1113
1114 if (cmp == 0) {
1115 self->encoding_start_of_stream = 0;
1116 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1117 _PyIO_zero, NULL);
1118 if (res == NULL)
1119 goto error;
1120 Py_DECREF(res);
1121 }
1122 }
1123
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 self->ok = 1;
1125 return 0;
1126
1127 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001128 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 return -1;
1130}
1131
1132static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001133_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 self->ok = 0;
1136 Py_CLEAR(self->buffer);
1137 Py_CLEAR(self->encoding);
1138 Py_CLEAR(self->encoder);
1139 Py_CLEAR(self->decoder);
1140 Py_CLEAR(self->readnl);
1141 Py_CLEAR(self->decoded_chars);
1142 Py_CLEAR(self->pending_bytes);
1143 Py_CLEAR(self->snapshot);
1144 Py_CLEAR(self->errors);
1145 Py_CLEAR(self->raw);
1146 return 0;
1147}
1148
1149static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001150textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001152 self->finalizing = 1;
1153 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001155 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 _PyObject_GC_UNTRACK(self);
1157 if (self->weakreflist != NULL)
1158 PyObject_ClearWeakRefs((PyObject *)self);
1159 Py_CLEAR(self->dict);
1160 Py_TYPE(self)->tp_free((PyObject *)self);
1161}
1162
1163static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165{
1166 Py_VISIT(self->buffer);
1167 Py_VISIT(self->encoding);
1168 Py_VISIT(self->encoder);
1169 Py_VISIT(self->decoder);
1170 Py_VISIT(self->readnl);
1171 Py_VISIT(self->decoded_chars);
1172 Py_VISIT(self->pending_bytes);
1173 Py_VISIT(self->snapshot);
1174 Py_VISIT(self->errors);
1175 Py_VISIT(self->raw);
1176
1177 Py_VISIT(self->dict);
1178 return 0;
1179}
1180
1181static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 return -1;
1186 Py_CLEAR(self->dict);
1187 return 0;
1188}
1189
1190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001191textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192
1193/* This macro takes some shortcuts to make the common case faster. */
1194#define CHECK_CLOSED(self) \
1195 do { \
1196 int r; \
1197 PyObject *_res; \
1198 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1199 if (self->raw != NULL) \
1200 r = _PyFileIO_closed(self->raw); \
1201 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001202 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203 if (_res == NULL) \
1204 return NULL; \
1205 r = PyObject_IsTrue(_res); \
1206 Py_DECREF(_res); \
1207 if (r < 0) \
1208 return NULL; \
1209 } \
1210 if (r > 0) { \
1211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on closed file."); \
1213 return NULL; \
1214 } \
1215 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 return NULL; \
1218 } while (0)
1219
1220#define CHECK_INITIALIZED(self) \
1221 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001222 if (self->detached) { \
1223 PyErr_SetString(PyExc_ValueError, \
1224 "underlying buffer has been detached"); \
1225 } else { \
1226 PyErr_SetString(PyExc_ValueError, \
1227 "I/O operation on uninitialized object"); \
1228 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 return NULL; \
1230 }
1231
1232#define CHECK_INITIALIZED_INT(self) \
1233 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001234 if (self->detached) { \
1235 PyErr_SetString(PyExc_ValueError, \
1236 "underlying buffer has been detached"); \
1237 } else { \
1238 PyErr_SetString(PyExc_ValueError, \
1239 "I/O operation on uninitialized object"); \
1240 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 return -1; \
1242 }
1243
1244
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001245static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001246textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001247{
1248 PyObject *buffer, *res;
1249 CHECK_INITIALIZED(self);
1250 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1251 if (res == NULL)
1252 return NULL;
1253 Py_DECREF(res);
1254 buffer = self->buffer;
1255 self->buffer = NULL;
1256 self->detached = 1;
1257 self->ok = 0;
1258 return buffer;
1259}
1260
Antoine Pitrou24f36292009-03-28 22:16:42 +00001261/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001262 underlying buffered object, though. */
1263static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001264_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001266 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267
1268 if (self->pending_bytes == NULL)
1269 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001270
1271 pending = self->pending_bytes;
1272 Py_INCREF(pending);
1273 self->pending_bytes_count = 0;
1274 Py_CLEAR(self->pending_bytes);
1275
1276 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1277 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 if (b == NULL)
1279 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001280 ret = NULL;
1281 do {
1282 ret = PyObject_CallMethodObjArgs(self->buffer,
1283 _PyIO_str_write, b, NULL);
1284 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001285 Py_DECREF(b);
1286 if (ret == NULL)
1287 return -1;
1288 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 return 0;
1290}
1291
1292static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001293textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294{
1295 PyObject *ret;
1296 PyObject *text; /* owned reference */
1297 PyObject *b;
1298 Py_ssize_t textlen;
1299 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001300 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301
1302 CHECK_INITIALIZED(self);
1303
1304 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1305 return NULL;
1306 }
1307
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 if (PyUnicode_READY(text) == -1)
1309 return NULL;
1310
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001311 CHECK_CLOSED(self);
1312
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001313 if (self->encoder == NULL)
1314 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001315
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 Py_INCREF(text);
1317
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319
1320 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001321 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 haslf = 1;
1323
1324 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001325 PyObject *newtext = _PyObject_CallMethodId(
1326 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327 Py_DECREF(text);
1328 if (newtext == NULL)
1329 return NULL;
1330 text = newtext;
1331 }
1332
Antoine Pitroue96ec682011-07-23 21:46:35 +02001333 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001334 text_needflush = 1;
1335 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001337 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001338 needflush = 1;
1339
1340 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001341 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001343 self->encoding_start_of_stream = 0;
1344 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345 else
1346 b = PyObject_CallMethodObjArgs(self->encoder,
1347 _PyIO_str_encode, text, NULL);
1348 Py_DECREF(text);
1349 if (b == NULL)
1350 return NULL;
1351
1352 if (self->pending_bytes == NULL) {
1353 self->pending_bytes = PyList_New(0);
1354 if (self->pending_bytes == NULL) {
1355 Py_DECREF(b);
1356 return NULL;
1357 }
1358 self->pending_bytes_count = 0;
1359 }
1360 if (PyList_Append(self->pending_bytes, b) < 0) {
1361 Py_DECREF(b);
1362 return NULL;
1363 }
1364 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1365 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001366 if (self->pending_bytes_count > self->chunk_size || needflush ||
1367 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001368 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369 return NULL;
1370 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001371
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372 if (needflush) {
1373 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1374 if (ret == NULL)
1375 return NULL;
1376 Py_DECREF(ret);
1377 }
1378
1379 Py_CLEAR(self->snapshot);
1380
1381 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001382 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383 if (ret == NULL)
1384 return NULL;
1385 Py_DECREF(ret);
1386 }
1387
1388 return PyLong_FromSsize_t(textlen);
1389}
1390
1391/* Steal a reference to chars and store it in the decoded_char buffer;
1392 */
1393static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001394textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395{
1396 Py_CLEAR(self->decoded_chars);
1397 self->decoded_chars = chars;
1398 self->decoded_chars_used = 0;
1399}
1400
1401static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001402textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403{
1404 PyObject *chars;
1405 Py_ssize_t avail;
1406
1407 if (self->decoded_chars == NULL)
1408 return PyUnicode_FromStringAndSize(NULL, 0);
1409
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001410 /* decoded_chars is guaranteed to be "ready". */
1411 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412 - self->decoded_chars_used);
1413
1414 assert(avail >= 0);
1415
1416 if (n < 0 || n > avail)
1417 n = avail;
1418
1419 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001420 chars = PyUnicode_Substring(self->decoded_chars,
1421 self->decoded_chars_used,
1422 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001423 if (chars == NULL)
1424 return NULL;
1425 }
1426 else {
1427 chars = self->decoded_chars;
1428 Py_INCREF(chars);
1429 }
1430
1431 self->decoded_chars_used += n;
1432 return chars;
1433}
1434
1435/* Read and decode the next chunk of data from the BufferedReader.
1436 */
1437static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001438textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001439{
1440 PyObject *dec_buffer = NULL;
1441 PyObject *dec_flags = NULL;
1442 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001443 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001445 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446 int eof;
1447
1448 /* The return value is True unless EOF was reached. The decoded string is
1449 * placed in self._decoded_chars (replacing its previous value). The
1450 * entire input chunk is sent to the decoder, though some of it may remain
1451 * buffered in the decoder, yet to be converted.
1452 */
1453
1454 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001455 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001456 return -1;
1457 }
1458
1459 if (self->telling) {
1460 /* To prepare for tell(), we need to snapshot a point in the file
1461 * where the decoder's input buffer is empty.
1462 */
1463
1464 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1465 _PyIO_str_getstate, NULL);
1466 if (state == NULL)
1467 return -1;
1468 /* Given this, we know there was a valid snapshot point
1469 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1470 */
1471 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1472 Py_DECREF(state);
1473 return -1;
1474 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001475
1476 if (!PyBytes_Check(dec_buffer)) {
1477 PyErr_Format(PyExc_TypeError,
1478 "decoder getstate() should have returned a bytes "
1479 "object, not '%.200s'",
1480 Py_TYPE(dec_buffer)->tp_name);
1481 Py_DECREF(state);
1482 return -1;
1483 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 Py_INCREF(dec_buffer);
1485 Py_INCREF(dec_flags);
1486 Py_DECREF(state);
1487 }
1488
1489 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001490 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001491 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001492 }
1493 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001494 if (chunk_size == NULL)
1495 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001496
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001498 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1499 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 Py_DECREF(chunk_size);
1501 if (input_chunk == NULL)
1502 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001503
1504 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001505 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001506 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001507 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1508 Py_TYPE(input_chunk)->tp_name);
1509 goto fail;
1510 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511
Antoine Pitroub8503892014-04-29 10:14:02 +02001512 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001513 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1515 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1516 self->decoder, input_chunk, eof);
1517 }
1518 else {
1519 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1520 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1521 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001522 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001524 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001525 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001526 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001527 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001528 if (nchars > 0)
1529 self->b2cratio = (double) nbytes / nchars;
1530 else
1531 self->b2cratio = 0.0;
1532 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533 eof = 0;
1534
1535 if (self->telling) {
1536 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1537 * next input to be decoded is dec_buffer + input_chunk.
1538 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001539 PyObject *next_input = dec_buffer;
1540 PyBytes_Concat(&next_input, input_chunk);
1541 if (next_input == NULL) {
1542 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001543 goto fail;
1544 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545 Py_CLEAR(self->snapshot);
1546 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1547 }
1548 Py_DECREF(input_chunk);
1549
1550 return (eof == 0);
1551
1552 fail:
1553 Py_XDECREF(dec_buffer);
1554 Py_XDECREF(dec_flags);
1555 Py_XDECREF(input_chunk);
1556 return -1;
1557}
1558
1559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001560textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561{
1562 Py_ssize_t n = -1;
1563 PyObject *result = NULL, *chunks = NULL;
1564
1565 CHECK_INITIALIZED(self);
1566
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001567 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001568 return NULL;
1569
1570 CHECK_CLOSED(self);
1571
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001572 if (self->decoder == NULL)
1573 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001574
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001575 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 return NULL;
1577
1578 if (n < 0) {
1579 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001580 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581 PyObject *decoded;
1582 if (bytes == NULL)
1583 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001584
1585 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1586 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1587 bytes, 1);
1588 else
1589 decoded = PyObject_CallMethodObjArgs(
1590 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001591 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001592 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 goto fail;
1594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001595 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596
1597 if (result == NULL) {
1598 Py_DECREF(decoded);
1599 return NULL;
1600 }
1601
1602 PyUnicode_AppendAndDel(&result, decoded);
1603 if (result == NULL)
1604 goto fail;
1605
1606 Py_CLEAR(self->snapshot);
1607 return result;
1608 }
1609 else {
1610 int res = 1;
1611 Py_ssize_t remaining = n;
1612
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001613 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 if (result == NULL)
1615 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001616 if (PyUnicode_READY(result) == -1)
1617 goto fail;
1618 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619
1620 /* Keep reading chunks until we have n characters to return */
1621 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001622 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001623 if (res < 0) {
1624 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1625 when EINTR occurs so we needn't do it ourselves. */
1626 if (_PyIO_trap_eintr()) {
1627 continue;
1628 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001630 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 if (res == 0) /* EOF */
1632 break;
1633 if (chunks == NULL) {
1634 chunks = PyList_New(0);
1635 if (chunks == NULL)
1636 goto fail;
1637 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001638 if (PyUnicode_GET_LENGTH(result) > 0 &&
1639 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 goto fail;
1641 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001642 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 if (result == NULL)
1644 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 }
1647 if (chunks != NULL) {
1648 if (result != NULL && PyList_Append(chunks, result) < 0)
1649 goto fail;
1650 Py_CLEAR(result);
1651 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1652 if (result == NULL)
1653 goto fail;
1654 Py_CLEAR(chunks);
1655 }
1656 return result;
1657 }
1658 fail:
1659 Py_XDECREF(result);
1660 Py_XDECREF(chunks);
1661 return NULL;
1662}
1663
1664
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001665/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666 that is to the NUL character. Otherwise the function will produce
1667 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668static char *
1669find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001671 if (kind == PyUnicode_1BYTE_KIND) {
1672 assert(ch < 256);
1673 return (char *) memchr((void *) s, (char) ch, end - s);
1674 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001676 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001677 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001678 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 return s;
1680 if (s == end)
1681 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001682 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 }
1684}
1685
1686Py_ssize_t
1687_PyIO_find_line_ending(
1688 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001689 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001691 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692
1693 if (translated) {
1694 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001697 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 else {
1699 *consumed = len;
1700 return -1;
1701 }
1702 }
1703 else if (universal) {
1704 /* Universal newline search. Find any of \r, \r\n, \n
1705 * The decoder ensures that \r\n are not split in two pieces
1706 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001707 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001709 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001711 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001712 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001713 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 if (s >= end) {
1715 *consumed = len;
1716 return -1;
1717 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001719 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001721 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001723 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001724 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001726 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727 }
1728 }
1729 }
1730 else {
1731 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001732 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1733 char *nl = PyUnicode_DATA(readnl);
1734 /* Assume that readnl is an ASCII character. */
1735 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001737 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001739 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 *consumed = len;
1741 return -1;
1742 }
1743 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001744 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001745 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001746 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747 if (e < s)
1748 e = s;
1749 while (s < e) {
1750 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001751 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 if (pos == NULL || pos >= e)
1753 break;
1754 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001755 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 break;
1757 }
1758 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001759 return (pos - start)/kind + readnl_len;
1760 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 if (pos == NULL)
1764 *consumed = len;
1765 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001766 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767 return -1;
1768 }
1769 }
1770}
1771
1772static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001773_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001774{
1775 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1776 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1777 int res;
1778
1779 CHECK_CLOSED(self);
1780
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001781 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 return NULL;
1783
1784 chunked = 0;
1785
1786 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001787 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001789 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790 Py_ssize_t consumed = 0;
1791
1792 /* First, get some data if necessary */
1793 res = 1;
1794 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001795 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001796 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001797 if (res < 0) {
1798 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1799 when EINTR occurs so we needn't do it ourselves. */
1800 if (_PyIO_trap_eintr()) {
1801 continue;
1802 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001804 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 if (res == 0)
1806 break;
1807 }
1808 if (res == 0) {
1809 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001810 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811 Py_CLEAR(self->snapshot);
1812 start = endpos = offset_to_buffer = 0;
1813 break;
1814 }
1815
1816 if (remaining == NULL) {
1817 line = self->decoded_chars;
1818 start = self->decoded_chars_used;
1819 offset_to_buffer = 0;
1820 Py_INCREF(line);
1821 }
1822 else {
1823 assert(self->decoded_chars_used == 0);
1824 line = PyUnicode_Concat(remaining, self->decoded_chars);
1825 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001826 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 Py_CLEAR(remaining);
1828 if (line == NULL)
1829 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001830 if (PyUnicode_READY(line) == -1)
1831 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001832 }
1833
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001834 ptr = PyUnicode_DATA(line);
1835 line_len = PyUnicode_GET_LENGTH(line);
1836 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837
1838 endpos = _PyIO_find_line_ending(
1839 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001840 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001841 ptr + kind * start,
1842 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001843 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844 if (endpos >= 0) {
1845 endpos += start;
1846 if (limit >= 0 && (endpos - start) + chunked >= limit)
1847 endpos = start + limit - chunked;
1848 break;
1849 }
1850
1851 /* We can put aside up to `endpos` */
1852 endpos = consumed + start;
1853 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1854 /* Didn't find line ending, but reached length limit */
1855 endpos = start + limit - chunked;
1856 break;
1857 }
1858
1859 if (endpos > start) {
1860 /* No line ending seen yet - put aside current data */
1861 PyObject *s;
1862 if (chunks == NULL) {
1863 chunks = PyList_New(0);
1864 if (chunks == NULL)
1865 goto error;
1866 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001867 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001868 if (s == NULL)
1869 goto error;
1870 if (PyList_Append(chunks, s) < 0) {
1871 Py_DECREF(s);
1872 goto error;
1873 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001874 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001875 Py_DECREF(s);
1876 }
1877 /* There may be some remaining bytes we'll have to prepend to the
1878 next chunk of data */
1879 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001880 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881 if (remaining == NULL)
1882 goto error;
1883 }
1884 Py_CLEAR(line);
1885 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001886 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001887 }
1888
1889 if (line != NULL) {
1890 /* Our line ends in the current buffer */
1891 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001892 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1893 PyObject *s = PyUnicode_Substring(line, start, endpos);
1894 Py_CLEAR(line);
1895 if (s == NULL)
1896 goto error;
1897 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898 }
1899 }
1900 if (remaining != NULL) {
1901 if (chunks == NULL) {
1902 chunks = PyList_New(0);
1903 if (chunks == NULL)
1904 goto error;
1905 }
1906 if (PyList_Append(chunks, remaining) < 0)
1907 goto error;
1908 Py_CLEAR(remaining);
1909 }
1910 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001911 if (line != NULL) {
1912 if (PyList_Append(chunks, line) < 0)
1913 goto error;
1914 Py_DECREF(line);
1915 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001916 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1917 if (line == NULL)
1918 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001919 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001921 if (line == NULL) {
1922 Py_INCREF(_PyIO_empty_str);
1923 line = _PyIO_empty_str;
1924 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925
1926 return line;
1927
1928 error:
1929 Py_XDECREF(chunks);
1930 Py_XDECREF(remaining);
1931 Py_XDECREF(line);
1932 return NULL;
1933}
1934
1935static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001936textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937{
1938 Py_ssize_t limit = -1;
1939
1940 CHECK_INITIALIZED(self);
1941 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1942 return NULL;
1943 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001944 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945}
1946
1947/* Seek and Tell */
1948
1949typedef struct {
1950 Py_off_t start_pos;
1951 int dec_flags;
1952 int bytes_to_feed;
1953 int chars_to_skip;
1954 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001955} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956
1957/*
1958 To speed up cookie packing/unpacking, we store the fields in a temporary
1959 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1960 The following macros define at which offsets in the intermediary byte
1961 string the various CookieStruct fields will be stored.
1962 */
1963
1964#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1965
Christian Heimes743e0cd2012-10-17 23:52:17 +02001966#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967/* We want the least significant byte of start_pos to also be the least
1968 significant byte of the cookie, which means that in big-endian mode we
1969 must copy the fields in reverse order. */
1970
1971# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1972# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1973# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1974# define OFF_CHARS_TO_SKIP (sizeof(char))
1975# define OFF_NEED_EOF 0
1976
1977#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001978/* Little-endian mode: the least significant byte of start_pos will
1979 naturally end up the least significant byte of the cookie. */
1980
1981# define OFF_START_POS 0
1982# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1983# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1984# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1985# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1986
1987#endif
1988
1989static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001990textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001991{
1992 unsigned char buffer[COOKIE_BUF_LEN];
1993 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1994 if (cookieLong == NULL)
1995 return -1;
1996
1997 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001998 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 Py_DECREF(cookieLong);
2000 return -1;
2001 }
2002 Py_DECREF(cookieLong);
2003
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002004 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2005 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2006 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2007 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2008 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009
2010 return 0;
2011}
2012
2013static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002014textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015{
2016 unsigned char buffer[COOKIE_BUF_LEN];
2017
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002018 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2019 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2020 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2021 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2022 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023
Christian Heimes743e0cd2012-10-17 23:52:17 +02002024 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2025 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027
2028static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030{
2031 PyObject *res;
2032 /* When seeking to the start of the stream, we call decoder.reset()
2033 rather than decoder.getstate().
2034 This is for a few decoders such as utf-16 for which the state value
2035 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2036 utf-16, that we are expecting a BOM).
2037 */
2038 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2039 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2040 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002041 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2042 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 if (res == NULL)
2044 return -1;
2045 Py_DECREF(res);
2046 return 0;
2047}
2048
Antoine Pitroue4501852009-05-14 18:55:55 +00002049static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002050_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002051{
2052 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002053 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002054 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2055 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2056 self->encoding_start_of_stream = 1;
2057 }
2058 else {
2059 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2060 _PyIO_zero, NULL);
2061 self->encoding_start_of_stream = 0;
2062 }
2063 if (res == NULL)
2064 return -1;
2065 Py_DECREF(res);
2066 return 0;
2067}
2068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002070textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071{
2072 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002073 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 PyObject *res;
2076 int cmp;
2077
2078 CHECK_INITIALIZED(self);
2079
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2081 return NULL;
2082 CHECK_CLOSED(self);
2083
2084 Py_INCREF(cookieObj);
2085
2086 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002087 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002088 goto fail;
2089 }
2090
2091 if (whence == 1) {
2092 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002093 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002094 if (cmp < 0)
2095 goto fail;
2096
2097 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002098 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002099 goto fail;
2100 }
2101
2102 /* Seeking to the current position should attempt to
2103 * sync the underlying buffer with the current position.
2104 */
2105 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002106 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 if (cookieObj == NULL)
2108 goto fail;
2109 }
2110 else if (whence == 2) {
2111 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002112 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002113 if (cmp < 0)
2114 goto fail;
2115
2116 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002117 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002118 goto fail;
2119 }
2120
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002121 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002122 if (res == NULL)
2123 goto fail;
2124 Py_DECREF(res);
2125
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002126 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002127 Py_CLEAR(self->snapshot);
2128 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002129 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130 if (res == NULL)
2131 goto fail;
2132 Py_DECREF(res);
2133 }
2134
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002135 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 Py_XDECREF(cookieObj);
2137 return res;
2138 }
2139 else if (whence != 0) {
2140 PyErr_Format(PyExc_ValueError,
2141 "invalid whence (%d, should be 0, 1 or 2)", whence);
2142 goto fail;
2143 }
2144
Antoine Pitroue4501852009-05-14 18:55:55 +00002145 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002146 if (cmp < 0)
2147 goto fail;
2148
2149 if (cmp == 1) {
2150 PyErr_Format(PyExc_ValueError,
2151 "negative seek position %R", cookieObj);
2152 goto fail;
2153 }
2154
2155 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2156 if (res == NULL)
2157 goto fail;
2158 Py_DECREF(res);
2159
2160 /* The strategy of seek() is to go back to the safe start point
2161 * and replay the effect of read(chars_to_skip) from there.
2162 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002163 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164 goto fail;
2165
2166 /* Seek back to the safe start point. */
2167 posobj = PyLong_FromOff_t(cookie.start_pos);
2168 if (posobj == NULL)
2169 goto fail;
2170 res = PyObject_CallMethodObjArgs(self->buffer,
2171 _PyIO_str_seek, posobj, NULL);
2172 Py_DECREF(posobj);
2173 if (res == NULL)
2174 goto fail;
2175 Py_DECREF(res);
2176
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002177 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002178 Py_CLEAR(self->snapshot);
2179
2180 /* Restore the decoder to its state from the safe start point. */
2181 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002182 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183 goto fail;
2184 }
2185
2186 if (cookie.chars_to_skip) {
2187 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002188 PyObject *input_chunk = _PyObject_CallMethodId(
2189 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190 PyObject *decoded;
2191
2192 if (input_chunk == NULL)
2193 goto fail;
2194
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002195 if (!PyBytes_Check(input_chunk)) {
2196 PyErr_Format(PyExc_TypeError,
2197 "underlying read() should have returned a bytes "
2198 "object, not '%.200s'",
2199 Py_TYPE(input_chunk)->tp_name);
2200 Py_DECREF(input_chunk);
2201 goto fail;
2202 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002203
2204 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2205 if (self->snapshot == NULL) {
2206 Py_DECREF(input_chunk);
2207 goto fail;
2208 }
2209
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002210 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2211 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002212
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002213 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214 goto fail;
2215
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002216 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002217
2218 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002219 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2221 goto fail;
2222 }
2223 self->decoded_chars_used = cookie.chars_to_skip;
2224 }
2225 else {
2226 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2227 if (self->snapshot == NULL)
2228 goto fail;
2229 }
2230
Antoine Pitroue4501852009-05-14 18:55:55 +00002231 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2232 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002233 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002234 goto fail;
2235 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 return cookieObj;
2237 fail:
2238 Py_XDECREF(cookieObj);
2239 return NULL;
2240
2241}
2242
2243static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002244textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245{
2246 PyObject *res;
2247 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002248 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002249 PyObject *next_input;
2250 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002251 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 PyObject *saved_state = NULL;
2253 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002254 char *dec_buffer;
2255 Py_ssize_t dec_buffer_len;
2256 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257
2258 CHECK_INITIALIZED(self);
2259 CHECK_CLOSED(self);
2260
2261 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002262 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263 goto fail;
2264 }
2265 if (!self->telling) {
2266 PyErr_SetString(PyExc_IOError,
2267 "telling position disabled by next() call");
2268 goto fail;
2269 }
2270
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002271 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002273 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002274 if (res == NULL)
2275 goto fail;
2276 Py_DECREF(res);
2277
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002278 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002279 if (posobj == NULL)
2280 goto fail;
2281
2282 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002283 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002284 return posobj;
2285 }
2286
2287#if defined(HAVE_LARGEFILE_SUPPORT)
2288 cookie.start_pos = PyLong_AsLongLong(posobj);
2289#else
2290 cookie.start_pos = PyLong_AsLong(posobj);
2291#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002292 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002293 if (PyErr_Occurred())
2294 goto fail;
2295
2296 /* Skip backward to the snapshot point (see _read_chunk). */
2297 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2298 goto fail;
2299
2300 assert (PyBytes_Check(next_input));
2301
2302 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2303
2304 /* How many decoded characters have been used up since the snapshot? */
2305 if (self->decoded_chars_used == 0) {
2306 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002307 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308 }
2309
2310 chars_to_skip = self->decoded_chars_used;
2311
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002312 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002313 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2314 _PyIO_str_getstate, NULL);
2315 if (saved_state == NULL)
2316 goto fail;
2317
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002318#define DECODER_GETSTATE() do { \
2319 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2320 _PyIO_str_getstate, NULL); \
2321 if (_state == NULL) \
2322 goto fail; \
2323 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2324 Py_DECREF(_state); \
2325 goto fail; \
2326 } \
2327 Py_DECREF(_state); \
2328 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002329
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002331 PyObject *_decoded = _PyObject_CallMethodId( \
2332 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002333 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002334 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002335 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002336 Py_DECREF(_decoded); \
2337 } while (0)
2338
2339 /* Fast search for an acceptable start point, close to our
2340 current pos */
2341 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2342 skip_back = 1;
2343 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2344 input = PyBytes_AS_STRING(next_input);
2345 while (skip_bytes > 0) {
2346 /* Decode up to temptative start point */
2347 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2348 goto fail;
2349 DECODER_DECODE(input, skip_bytes, chars_decoded);
2350 if (chars_decoded <= chars_to_skip) {
2351 DECODER_GETSTATE();
2352 if (dec_buffer_len == 0) {
2353 /* Before pos and no bytes buffered in decoder => OK */
2354 cookie.dec_flags = dec_flags;
2355 chars_to_skip -= chars_decoded;
2356 break;
2357 }
2358 /* Skip back by buffered amount and reset heuristic */
2359 skip_bytes -= dec_buffer_len;
2360 skip_back = 1;
2361 }
2362 else {
2363 /* We're too far ahead, skip back a bit */
2364 skip_bytes -= skip_back;
2365 skip_back *= 2;
2366 }
2367 }
2368 if (skip_bytes <= 0) {
2369 skip_bytes = 0;
2370 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2371 goto fail;
2372 }
2373
2374 /* Note our initial start point. */
2375 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002376 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002377 if (chars_to_skip == 0)
2378 goto finally;
2379
2380 /* We should be close to the desired position. Now feed the decoder one
2381 * byte at a time until we reach the `chars_to_skip` target.
2382 * As we go, note the nearest "safe start point" before the current
2383 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384 * can safely start from there and advance to this location).
2385 */
2386 chars_decoded = 0;
2387 input = PyBytes_AS_STRING(next_input);
2388 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002389 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002391 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002392
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002393 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002394 /* We got n chars for 1 byte */
2395 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002397 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398
2399 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2400 /* Decoder buffer is empty, so this is a safe start point. */
2401 cookie.start_pos += cookie.bytes_to_feed;
2402 chars_to_skip -= chars_decoded;
2403 cookie.dec_flags = dec_flags;
2404 cookie.bytes_to_feed = 0;
2405 chars_decoded = 0;
2406 }
2407 if (chars_decoded >= chars_to_skip)
2408 break;
2409 input++;
2410 }
2411 if (input == input_end) {
2412 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002413 PyObject *decoded = _PyObject_CallMethodId(
2414 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002415 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002416 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002417 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 Py_DECREF(decoded);
2419 cookie.need_eof = 1;
2420
2421 if (chars_decoded < chars_to_skip) {
2422 PyErr_SetString(PyExc_IOError,
2423 "can't reconstruct logical file position");
2424 goto fail;
2425 }
2426 }
2427
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002428finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002429 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430 Py_DECREF(saved_state);
2431 if (res == NULL)
2432 return NULL;
2433 Py_DECREF(res);
2434
2435 /* The returned cookie corresponds to the last safe start point. */
2436 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002437 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002439fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440 if (saved_state) {
2441 PyObject *type, *value, *traceback;
2442 PyErr_Fetch(&type, &value, &traceback);
2443
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002444 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445 Py_DECREF(saved_state);
2446 if (res == NULL)
2447 return NULL;
2448 Py_DECREF(res);
2449
2450 PyErr_Restore(type, value, traceback);
2451 }
2452 return NULL;
2453}
2454
2455static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002456textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457{
2458 PyObject *pos = Py_None;
2459 PyObject *res;
2460
2461 CHECK_INITIALIZED(self)
2462 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2463 return NULL;
2464 }
2465
2466 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2467 if (res == NULL)
2468 return NULL;
2469 Py_DECREF(res);
2470
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002471 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472}
2473
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002475textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002476{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002477 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002478
2479 CHECK_INITIALIZED(self);
2480
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002481 res = PyUnicode_FromString("<_io.TextIOWrapper");
2482 if (res == NULL)
2483 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002484 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002485 if (nameobj == NULL) {
2486 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2487 PyErr_Clear();
2488 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002489 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002490 }
2491 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002492 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002493 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002494 if (s == NULL)
2495 goto error;
2496 PyUnicode_AppendAndDel(&res, s);
2497 if (res == NULL)
2498 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002499 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002500 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002501 if (modeobj == NULL) {
2502 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2503 PyErr_Clear();
2504 else
2505 goto error;
2506 }
2507 else {
2508 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2509 Py_DECREF(modeobj);
2510 if (s == NULL)
2511 goto error;
2512 PyUnicode_AppendAndDel(&res, s);
2513 if (res == NULL)
2514 return NULL;
2515 }
2516 s = PyUnicode_FromFormat("%U encoding=%R>",
2517 res, self->encoding);
2518 Py_DECREF(res);
2519 return s;
2520error:
2521 Py_XDECREF(res);
2522 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002523}
2524
2525
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526/* Inquiries */
2527
2528static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002529textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530{
2531 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002532 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533}
2534
2535static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002536textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537{
2538 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002539 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540}
2541
2542static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002543textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544{
2545 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002546 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547}
2548
2549static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002550textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551{
2552 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002553 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554}
2555
2556static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002557textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558{
2559 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002560 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561}
2562
2563static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002564textiowrapper_getstate(textio *self, PyObject *args)
2565{
2566 PyErr_Format(PyExc_TypeError,
2567 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2568 return NULL;
2569}
2570
2571static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002572textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573{
2574 CHECK_INITIALIZED(self);
2575 CHECK_CLOSED(self);
2576 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002577 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002579 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580}
2581
2582static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002583textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584{
2585 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002586 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588
Antoine Pitrou6be88762010-05-03 16:48:20 +00002589 res = textiowrapper_closed_get(self, NULL);
2590 if (res == NULL)
2591 return NULL;
2592 r = PyObject_IsTrue(res);
2593 Py_DECREF(res);
2594 if (r < 0)
2595 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002596
Antoine Pitrou6be88762010-05-03 16:48:20 +00002597 if (r > 0) {
2598 Py_RETURN_NONE; /* stream already closed */
2599 }
2600 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002601 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002602 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002603 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002604 if (res)
2605 Py_DECREF(res);
2606 else
2607 PyErr_Clear();
2608 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002609 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002610 if (res == NULL)
2611 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002612 else
2613 Py_DECREF(res);
2614
Benjamin Peterson68623612012-12-20 11:53:11 -06002615 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2616 if (exc != NULL) {
2617 if (res != NULL) {
2618 Py_CLEAR(res);
2619 PyErr_Restore(exc, val, tb);
2620 }
2621 else {
Serhiy Storchaka76d3f142014-06-11 07:18:53 +03002622 PyObject *exc2, *val2, *tb2;
2623 PyErr_Fetch(&exc2, &val2, &tb2);
Serhiy Storchaka8a8f7f92014-06-09 09:13:04 +03002624 PyErr_NormalizeException(&exc, &val, &tb);
Benjamin Peterson68623612012-12-20 11:53:11 -06002625 Py_DECREF(exc);
2626 Py_XDECREF(tb);
Serhiy Storchaka76d3f142014-06-11 07:18:53 +03002627 PyErr_NormalizeException(&exc2, &val2, &tb2);
Benjamin Peterson68623612012-12-20 11:53:11 -06002628 PyException_SetContext(val2, val);
Serhiy Storchaka76d3f142014-06-11 07:18:53 +03002629 PyErr_Restore(exc2, val2, tb2);
Benjamin Peterson68623612012-12-20 11:53:11 -06002630 }
2631 }
2632 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002633 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634}
2635
2636static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002637textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002638{
2639 PyObject *line;
2640
2641 CHECK_INITIALIZED(self);
2642
2643 self->telling = 0;
2644 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2645 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002646 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647 }
2648 else {
2649 line = PyObject_CallMethodObjArgs((PyObject *)self,
2650 _PyIO_str_readline, NULL);
2651 if (line && !PyUnicode_Check(line)) {
2652 PyErr_Format(PyExc_IOError,
2653 "readline() should have returned an str object, "
2654 "not '%.200s'", Py_TYPE(line)->tp_name);
2655 Py_DECREF(line);
2656 return NULL;
2657 }
2658 }
2659
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002660 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 return NULL;
2662
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002663 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 /* Reached EOF or would have blocked */
2665 Py_DECREF(line);
2666 Py_CLEAR(self->snapshot);
2667 self->telling = self->seekable;
2668 return NULL;
2669 }
2670
2671 return line;
2672}
2673
2674static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676{
2677 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002678 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679}
2680
2681static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002682textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002683{
2684 CHECK_INITIALIZED(self);
2685 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2686}
2687
2688static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690{
2691 PyObject *res;
2692 CHECK_INITIALIZED(self);
2693 if (self->decoder == NULL)
2694 Py_RETURN_NONE;
2695 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2696 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002697 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2698 PyErr_Clear();
2699 Py_RETURN_NONE;
2700 }
2701 else {
2702 return NULL;
2703 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002704 }
2705 return res;
2706}
2707
2708static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002709textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002710{
2711 CHECK_INITIALIZED(self);
2712 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2713}
2714
2715static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002716textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002717{
2718 CHECK_INITIALIZED(self);
2719 return PyLong_FromSsize_t(self->chunk_size);
2720}
2721
2722static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002723textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724{
2725 Py_ssize_t n;
2726 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002727 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728 if (n == -1 && PyErr_Occurred())
2729 return -1;
2730 if (n <= 0) {
2731 PyErr_SetString(PyExc_ValueError,
2732 "a strictly positive integer is required");
2733 return -1;
2734 }
2735 self->chunk_size = n;
2736 return 0;
2737}
2738
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002739static PyMethodDef textiowrapper_methods[] = {
2740 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2741 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2742 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2743 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2744 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2745 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002746
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002747 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2748 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2749 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2750 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2751 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002752 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002754 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2755 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2756 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002757 {NULL, NULL}
2758};
2759
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002760static PyMemberDef textiowrapper_members[] = {
2761 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2762 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2763 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002764 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002765 {NULL}
2766};
2767
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002768static PyGetSetDef textiowrapper_getset[] = {
2769 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2770 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002771/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2772*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002773 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2774 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2775 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2776 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002777 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002778};
2779
2780PyTypeObject PyTextIOWrapper_Type = {
2781 PyVarObject_HEAD_INIT(NULL, 0)
2782 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002783 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002784 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002785 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002786 0, /*tp_print*/
2787 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002788 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002790 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002791 0, /*tp_as_number*/
2792 0, /*tp_as_sequence*/
2793 0, /*tp_as_mapping*/
2794 0, /*tp_hash */
2795 0, /*tp_call*/
2796 0, /*tp_str*/
2797 0, /*tp_getattro*/
2798 0, /*tp_setattro*/
2799 0, /*tp_as_buffer*/
2800 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002801 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002802 textiowrapper_doc, /* tp_doc */
2803 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2804 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002805 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002806 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002807 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002808 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2809 textiowrapper_methods, /* tp_methods */
2810 textiowrapper_members, /* tp_members */
2811 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002812 0, /* tp_base */
2813 0, /* tp_dict */
2814 0, /* tp_descr_get */
2815 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002816 offsetof(textio, dict), /*tp_dictoffset*/
2817 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002818 0, /* tp_alloc */
2819 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002820 0, /* tp_free */
2821 0, /* tp_is_gc */
2822 0, /* tp_bases */
2823 0, /* tp_mro */
2824 0, /* tp_cache */
2825 0, /* tp_subclasses */
2826 0, /* tp_weaklist */
2827 0, /* tp_del */
2828 0, /* tp_version_tag */
2829 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002830};