blob: 67ac4457d550702f17ddcfba15c0b8198d4692ce [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
Antoine Pitrou712cb732013-12-21 15:51:54 +010048 _PyIO_State *state = IO_STATE();
49 if (state != NULL)
50 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000051 return NULL;
52}
53
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000055 "Separate the underlying buffer from the TextIOBase and return it.\n"
56 "\n"
57 "After the underlying buffer has been detached, the TextIO is in an\n"
58 "unusable state.\n"
59 );
60
61static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063{
64 return _unsupported("detach");
65}
66
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000067PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 "Read at most n characters from stream.\n"
69 "\n"
70 "Read from underlying buffer until we have n characters or we hit EOF.\n"
71 "If n is negative or omitted, read until EOF.\n"
72 );
73
74static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076{
77 return _unsupported("read");
78}
79
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000080PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081 "Read until newline or EOF.\n"
82 "\n"
83 "Returns an empty string if EOF is hit immediately.\n"
84 );
85
86static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088{
89 return _unsupported("readline");
90}
91
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000092PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 "Write string to stream.\n"
94 "Returns the number of characters written (which is always equal to\n"
95 "the length of the string).\n"
96 );
97
98static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100{
101 return _unsupported("write");
102}
103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000104PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 "Encoding of the text stream.\n"
106 "\n"
107 "Subclasses should override.\n"
108 );
109
110static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112{
113 Py_RETURN_NONE;
114}
115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000116PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117 "Line endings translated so far.\n"
118 "\n"
119 "Only line endings translated during reading are considered.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000131 "The error setting of the decoder or encoder.\n"
132 "\n"
133 "Subclasses should override.\n"
134 );
135
136static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138{
139 Py_RETURN_NONE;
140}
141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000143static PyMethodDef textiobase_methods[] = {
144 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
145 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
146 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
147 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 {NULL, NULL}
149};
150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyGetSetDef textiobase_getset[] = {
152 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
153 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
154 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000155 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156};
157
158PyTypeObject PyTextIOBase_Type = {
159 PyVarObject_HEAD_INIT(NULL, 0)
160 "_io._TextIOBase", /*tp_name*/
161 0, /*tp_basicsize*/
162 0, /*tp_itemsize*/
163 0, /*tp_dealloc*/
164 0, /*tp_print*/
165 0, /*tp_getattr*/
166 0, /*tp_setattr*/
167 0, /*tp_compare */
168 0, /*tp_repr*/
169 0, /*tp_as_number*/
170 0, /*tp_as_sequence*/
171 0, /*tp_as_mapping*/
172 0, /*tp_hash */
173 0, /*tp_call*/
174 0, /*tp_str*/
175 0, /*tp_getattro*/
176 0, /*tp_setattro*/
177 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
179 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000180 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 0, /* tp_traverse */
182 0, /* tp_clear */
183 0, /* tp_richcompare */
184 0, /* tp_weaklistoffset */
185 0, /* tp_iter */
186 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 &PyIOBase_Type, /* tp_base */
191 0, /* tp_dict */
192 0, /* tp_descr_get */
193 0, /* tp_descr_set */
194 0, /* tp_dictoffset */
195 0, /* tp_init */
196 0, /* tp_alloc */
197 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200198 0, /* tp_free */
199 0, /* tp_is_gc */
200 0, /* tp_bases */
201 0, /* tp_mro */
202 0, /* tp_cache */
203 0, /* tp_subclasses */
204 0, /* tp_weaklist */
205 0, /* tp_del */
206 0, /* tp_version_tag */
207 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208};
209
210
211/* IncrementalNewlineDecoder */
212
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000213PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 "Codec used when reading a file in universal newlines mode. It wraps\n"
215 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
216 "records the types of newlines encountered. When used with\n"
217 "translate=False, it ensures that the newline sequence is returned in\n"
218 "one piece. When used with decoder=None, it expects unicode strings as\n"
219 "decode input and translates newlines without first invoking an external\n"
220 "decoder.\n"
221 );
222
223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000233incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 PyObject *args, PyObject *kwds)
235{
236 PyObject *decoder;
237 int translate;
238 PyObject *errors = NULL;
239 char *kwlist[] = {"decoder", "translate", "errors", NULL};
240
241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
242 kwlist, &decoder, &translate, &errors))
243 return -1;
244
245 self->decoder = decoder;
246 Py_INCREF(decoder);
247
248 if (errors == NULL) {
249 self->errors = PyUnicode_FromString("strict");
250 if (self->errors == NULL)
251 return -1;
252 }
253 else {
254 Py_INCREF(errors);
255 self->errors = errors;
256 }
257
258 self->translate = translate;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263}
264
265static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000266incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267{
268 Py_CLEAR(self->decoder);
269 Py_CLEAR(self->errors);
270 Py_TYPE(self)->tp_free((PyObject *)self);
271}
272
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200273static int
274check_decoded(PyObject *decoded)
275{
276 if (decoded == NULL)
277 return -1;
278 if (!PyUnicode_Check(decoded)) {
279 PyErr_Format(PyExc_TypeError,
280 "decoder should return a string result, not '%.200s'",
281 Py_TYPE(decoded)->tp_name);
282 Py_DECREF(decoded);
283 return -1;
284 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200285 if (PyUnicode_READY(decoded) < 0) {
286 Py_DECREF(decoded);
287 return -1;
288 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200289 return 0;
290}
291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292#define SEEN_CR 1
293#define SEEN_LF 2
294#define SEEN_CRLF 4
295#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
296
297PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200298_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 PyObject *input, int final)
300{
301 PyObject *output;
302 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200303 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 if (self->decoder == NULL) {
306 PyErr_SetString(PyExc_ValueError,
307 "IncrementalNewlineDecoder.__init__ not called");
308 return NULL;
309 }
310
311 /* decode input (with the eventual \r from a previous pass) */
312 if (self->decoder != Py_None) {
313 output = PyObject_CallMethodObjArgs(self->decoder,
314 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
315 }
316 else {
317 output = input;
318 Py_INCREF(output);
319 }
320
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200321 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return NULL;
323
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Prefix output with CR */
327 int kind;
328 PyObject *modified;
329 char *out;
330
331 modified = PyUnicode_New(output_len + 1,
332 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (modified == NULL)
334 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 kind = PyUnicode_KIND(modified);
336 out = PyUnicode_DATA(modified);
337 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200338 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200340 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self->pendingcr = 0;
342 output_len++;
343 }
344
345 /* retain last \r even when not translating data:
346 * then readline() is sure to get \r\n in one pass
347 */
348 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000349 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
351 {
352 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
353 if (modified == NULL)
354 goto error;
355 Py_DECREF(output);
356 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 self->pendingcr = 1;
358 }
359 }
360
361 /* Record which newlines are read and do newline translation if desired,
362 all in one pass. */
363 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_ssize_t len;
366 int seennl = self->seennl;
367 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 in_str = PyUnicode_DATA(output);
371 len = PyUnicode_GET_LENGTH(output);
372 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 if (len == 0)
375 return output;
376
377 /* If, up to now, newlines are consistently \n, do a quick check
378 for the \r *byte* with the libc's optimized memchr.
379 */
380 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200381 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 }
383
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 if (only_lf) {
385 /* If not already seen, quick scan for a possible "\n" character.
386 (there's nothing else to be done, even when in translation mode)
387 */
388 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100390 if (kind == PyUnicode_1BYTE_KIND)
391 seennl |= SEEN_LF;
392 else {
393 Py_ssize_t i = 0;
394 for (;;) {
395 Py_UCS4 c;
396 /* Fast loop for non-control characters */
397 while (PyUnicode_READ(kind, in_str, i) > '\n')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
400 if (c == '\n') {
401 seennl |= SEEN_LF;
402 break;
403 }
404 if (i >= len)
405 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000406 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000407 }
408 }
409 /* Finished: we have scanned for newlines, and none of them
410 need translating */
411 }
412 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (seennl == SEEN_ALL)
416 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 while (PyUnicode_READ(kind, in_str, i) > '\r')
421 i++;
422 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (c == '\n')
424 seennl |= SEEN_LF;
425 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 }
430 else
431 seennl |= SEEN_CR;
432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 break;
435 if (seennl == SEEN_ALL)
436 break;
437 }
438 endscan:
439 ;
440 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000441 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 void *translated;
443 int kind = PyUnicode_KIND(output);
444 void *in_str = PyUnicode_DATA(output);
445 Py_ssize_t in, out;
446 /* XXX: Previous in-place translation here is disabled as
447 resizing is not possible anymore */
448 /* We could try to optimize this so that we only do a copy
449 when there is something to translate. On the other hand,
450 we already know there is a \r byte, so chances are high
451 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (translated == NULL) {
454 PyErr_NoMemory();
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 seennl |= SEEN_LF;
466 continue;
467 }
468 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 in++;
471 seennl |= SEEN_CRLF;
472 }
473 else
474 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 continue;
477 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_DECREF(output);
483 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100484 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200486 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
488 self->seennl |= seennl;
489 }
490
491 return output;
492
493 error:
494 Py_DECREF(output);
495 return NULL;
496}
497
498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000499incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 PyObject *args, PyObject *kwds)
501{
502 char *kwlist[] = {"input", "final", NULL};
503 PyObject *input;
504 int final = 0;
505
506 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
507 kwlist, &input, &final))
508 return NULL;
509 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 PyObject *buffer;
516 unsigned PY_LONG_LONG flag;
517
518 if (self->decoder != Py_None) {
519 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
520 _PyIO_str_getstate, NULL);
521 if (state == NULL)
522 return NULL;
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
524 Py_DECREF(state);
525 return NULL;
526 }
527 Py_INCREF(buffer);
528 Py_DECREF(state);
529 }
530 else {
531 buffer = PyBytes_FromString("");
532 flag = 0;
533 }
534 flag <<= 1;
535 if (self->pendingcr)
536 flag |= 1;
537 return Py_BuildValue("NK", buffer, flag);
538}
539
540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000541incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542{
543 PyObject *buffer;
544 unsigned PY_LONG_LONG flag;
545
546 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
547 return NULL;
548
Victor Stinner7d7e7752014-06-17 23:31:25 +0200549 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550 flag >>= 1;
551
552 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200553 return _PyObject_CallMethodId(self->decoder,
554 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555 else
556 Py_RETURN_NONE;
557}
558
559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000560incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561{
562 self->seennl = 0;
563 self->pendingcr = 0;
564 if (self->decoder != Py_None)
565 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
566 else
567 Py_RETURN_NONE;
568}
569
570static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000571incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 switch (self->seennl) {
574 case SEEN_CR:
575 return PyUnicode_FromString("\r");
576 case SEEN_LF:
577 return PyUnicode_FromString("\n");
578 case SEEN_CRLF:
579 return PyUnicode_FromString("\r\n");
580 case SEEN_CR | SEEN_LF:
581 return Py_BuildValue("ss", "\r", "\n");
582 case SEEN_CR | SEEN_CRLF:
583 return Py_BuildValue("ss", "\r", "\r\n");
584 case SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("ss", "\n", "\r\n");
586 case SEEN_CR | SEEN_LF | SEEN_CRLF:
587 return Py_BuildValue("sss", "\r", "\n", "\r\n");
588 default:
589 Py_RETURN_NONE;
590 }
591
592}
593
594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595static PyMethodDef incrementalnewlinedecoder_methods[] = {
596 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
597 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
598 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
599 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000600 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601};
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603static PyGetSetDef incrementalnewlinedecoder_getset[] = {
604 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000605 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606};
607
608PyTypeObject PyIncrementalNewlineDecoder_Type = {
609 PyVarObject_HEAD_INIT(NULL, 0)
610 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /*tp_print*/
615 0, /*tp_getattr*/
616 0, /*tp_setattr*/
617 0, /*tp_compare */
618 0, /*tp_repr*/
619 0, /*tp_as_number*/
620 0, /*tp_as_sequence*/
621 0, /*tp_as_mapping*/
622 0, /*tp_hash */
623 0, /*tp_call*/
624 0, /*tp_str*/
625 0, /*tp_getattro*/
626 0, /*tp_setattro*/
627 0, /*tp_as_buffer*/
628 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 0, /* tp_traverse */
631 0, /* tp_clear */
632 0, /* tp_richcompare */
633 0, /*tp_weaklistoffset*/
634 0, /* tp_iter */
635 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000638 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 0, /* tp_base */
640 0, /* tp_dict */
641 0, /* tp_descr_get */
642 0, /* tp_descr_set */
643 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000644 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 0, /* tp_alloc */
646 PyType_GenericNew, /* tp_new */
647};
648
649
650/* TextIOWrapper */
651
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000652PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 "Character and line based layer over a BufferedIOBase object, buffer.\n"
654 "\n"
655 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200656 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400658 "errors determines the strictness of encoding and decoding (see\n"
659 "help(codecs.Codec) or the documentation for codecs.register) and\n"
660 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200662 "newline controls how line endings are handled. It can be None, '',\n"
663 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
664 "\n"
665 "* On input, if newline is None, universal newlines mode is\n"
666 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
667 " these are translated into '\\n' before being returned to the\n"
668 " caller. If it is '', universal newline mode is enabled, but line\n"
669 " endings are returned to the caller untranslated. If it has any of\n"
670 " the other legal values, input lines are only terminated by the given\n"
671 " string, and the line ending is returned to the caller untranslated.\n"
672 "\n"
673 "* On output, if newline is None, any '\\n' characters written are\n"
674 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300675 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200676 " of the other legal values, any '\\n' characters written are translated\n"
677 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 "\n"
679 "If line_buffering is True, a call to flush is implied when a call to\n"
680 "write contains a newline character."
681 );
682
683typedef PyObject *
684 (*encodefunc_t)(PyObject *, PyObject *);
685
686typedef struct
687{
688 PyObject_HEAD
689 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000690 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691 Py_ssize_t chunk_size;
692 PyObject *buffer;
693 PyObject *encoding;
694 PyObject *encoder;
695 PyObject *decoder;
696 PyObject *readnl;
697 PyObject *errors;
698 const char *writenl; /* utf-8 encoded, NULL stands for \n */
699 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200700 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 char readuniversal;
702 char readtranslate;
703 char writetranslate;
704 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200705 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200707 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 /* Specialized encoding func (see below) */
709 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000710 /* Whether or not it's the start of the stream */
711 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 /* Reads and writes are internally buffered in order to speed things up.
714 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000715
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716 Please also note that text to be written is first encoded before being
717 buffered. This is necessary so that encoding errors are immediately
718 reported to the caller, but it unfortunately means that the
719 IncrementalEncoder (whose encode() method is always written in Python)
720 becomes a bottleneck for small writes.
721 */
722 PyObject *decoded_chars; /* buffer for text returned from decoder */
723 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
724 PyObject *pending_bytes; /* list of bytes objects waiting to be
725 written, or NULL */
726 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 /* snapshot is either None, or a tuple (dec_flags, next_input) where
729 * dec_flags is the second (integer) item of the decoder state and
730 * next_input is the chunk of input bytes that comes next after the
731 * snapshot point. We use this to reconstruct decoder states in tell().
732 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000733 PyObject *snapshot;
734 /* Bytes-to-characters ratio for the current chunk. Serves as input for
735 the heuristic in tell(). */
736 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 /* Cache raw object if it's a FileIO object */
739 PyObject *raw;
740
741 PyObject *weakreflist;
742 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745
746/* A couple of specialized cases in order to bypass the slow incremental
747 encoding methods for the most popular encodings. */
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100758 return _PyUnicode_EncodeUTF16(text,
759 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF16(text,
766 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
Antoine Pitroue4501852009-05-14 18:55:55 +0000772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200774#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000779 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF16(text,
781 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782}
783
Antoine Pitroue4501852009-05-14 18:55:55 +0000784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100787 return _PyUnicode_EncodeUTF32(text,
788 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000793{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100794 return _PyUnicode_EncodeUTF32(text,
795 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000796}
797
798static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000799utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000800{
801 if (!self->encoding_start_of_stream) {
802 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200803#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 return utf32be_encode(self, text);
805#else
806 return utf32le_encode(self, text);
807#endif
808 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100809 return _PyUnicode_EncodeUTF32(text,
810 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000811}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812
813static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817}
818
819static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000820latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823}
824
825/* Map normalized encoding names onto the specialized encoding funcs */
826
827typedef struct {
828 const char *name;
829 encodefunc_t encodefunc;
830} encodefuncentry;
831
Antoine Pitrou24f36292009-03-28 22:16:42 +0000832static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 {"ascii", (encodefunc_t) ascii_encode},
834 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000835 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 {"utf-16-be", (encodefunc_t) utf16be_encode},
837 {"utf-16-le", (encodefunc_t) utf16le_encode},
838 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000839 {"utf-32-be", (encodefunc_t) utf32be_encode},
840 {"utf-32-le", (encodefunc_t) utf32le_encode},
841 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 {NULL, NULL}
843};
844
845
846static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000847textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848{
849 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 NULL};
Nick Coghlana9b15242014-02-04 22:11:18 +1000852 PyObject *buffer, *raw, *codec_info = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 char *encoding = NULL;
854 char *errors = NULL;
855 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200856 int line_buffering = 0, write_through = 0;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100857 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
859 PyObject *res;
860 int r;
861
862 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000863 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200866 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 return -1;
868
869 if (newline && newline[0] != '\0'
870 && !(newline[0] == '\n' && newline[1] == '\0')
871 && !(newline[0] == '\r' && newline[1] == '\0')
872 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
873 PyErr_Format(PyExc_ValueError,
874 "illegal newline value: %s", newline);
875 return -1;
876 }
877
878 Py_CLEAR(self->buffer);
879 Py_CLEAR(self->encoding);
880 Py_CLEAR(self->encoder);
881 Py_CLEAR(self->decoder);
882 Py_CLEAR(self->readnl);
883 Py_CLEAR(self->decoded_chars);
884 Py_CLEAR(self->pending_bytes);
885 Py_CLEAR(self->snapshot);
886 Py_CLEAR(self->errors);
887 Py_CLEAR(self->raw);
888 self->decoded_chars_used = 0;
889 self->pending_bytes_count = 0;
890 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000891 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 if (encoding == NULL) {
894 /* Try os.device_encoding(fileno) */
895 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100896 state = IO_STATE();
897 if (state == NULL)
898 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 /* Ignore only AttributeError and UnsupportedOperation */
901 if (fileno == NULL) {
902 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
903 PyErr_ExceptionMatches(state->unsupported_operation)) {
904 PyErr_Clear();
905 }
906 else {
907 goto error;
908 }
909 }
910 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200911 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500912 Py_DECREF(fileno);
913 if (fd == -1 && PyErr_Occurred()) {
914 goto error;
915 }
916
917 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 if (self->encoding == NULL)
919 goto error;
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
924 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200925 PyObject *locale_module = _PyIO_get_locale_module(state);
926 if (locale_module == NULL)
927 goto catch_ImportError;
928 self->encoding = _PyObject_CallMethodId(
929 locale_module, &PyId_getpreferredencoding, "O", Py_False);
930 Py_DECREF(locale_module);
931 if (self->encoding == NULL) {
932 catch_ImportError:
933 /*
934 Importing locale can raise a ImportError because of
935 _functools, and locale.getpreferredencoding can raise a
936 ImportError if _locale is not available. These will happen
937 during module building.
938 */
939 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
940 PyErr_Clear();
941 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200943 else
944 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200946 else if (!PyUnicode_Check(self->encoding))
947 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000951 if (encoding == NULL)
952 goto error;
953 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 else if (encoding != NULL) {
955 self->encoding = PyUnicode_FromString(encoding);
956 if (self->encoding == NULL)
957 goto error;
958 }
959 else {
960 PyErr_SetString(PyExc_IOError,
961 "could not determine default encoding");
962 }
963
Nick Coghlana9b15242014-02-04 22:11:18 +1000964 /* Check we have been asked for a real text encoding */
965 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
966 if (codec_info == NULL) {
967 Py_CLEAR(self->encoding);
968 goto error;
969 }
970
971 /* XXX: Failures beyond this point have the potential to leak elements
972 * of the partially constructed object (like self->encoding)
973 */
974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (errors == NULL)
976 errors = "strict";
977 self->errors = PyBytes_FromString(errors);
978 if (self->errors == NULL)
979 goto error;
980
981 self->chunk_size = 8192;
982 self->readuniversal = (newline == NULL || newline[0] == '\0');
983 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200984 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 self->readtranslate = (newline == NULL);
986 if (newline) {
987 self->readnl = PyUnicode_FromString(newline);
988 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000989 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 }
991 self->writetranslate = (newline == NULL || newline[0] != '\0');
992 if (!self->readuniversal && self->readnl) {
993 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000994 if (self->writenl == NULL)
995 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (!strcmp(self->writenl, "\n"))
997 self->writenl = NULL;
998 }
999#ifdef MS_WINDOWS
1000 else
1001 self->writenl = "\r\n";
1002#endif
1003
1004 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001005 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 if (res == NULL)
1007 goto error;
1008 r = PyObject_IsTrue(res);
1009 Py_DECREF(res);
1010 if (r == -1)
1011 goto error;
1012 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001013 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
1014 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 if (self->decoder == NULL)
1016 goto error;
1017
1018 if (self->readuniversal) {
1019 PyObject *incrementalDecoder = PyObject_CallFunction(
1020 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1021 "Oi", self->decoder, (int)self->readtranslate);
1022 if (incrementalDecoder == NULL)
1023 goto error;
1024 Py_CLEAR(self->decoder);
1025 self->decoder = incrementalDecoder;
1026 }
1027 }
1028
1029 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 if (res == NULL)
1032 goto error;
1033 r = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035 if (r == -1)
1036 goto error;
1037 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001038 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1039 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if (self->encoder == NULL)
1041 goto error;
1042 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001043 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (res == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (PyUnicode_Check(res)) {
1051 encodefuncentry *e = encodefuncs;
1052 while (e->name != NULL) {
1053 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1054 self->encodefunc = e->encodefunc;
1055 break;
1056 }
1057 e++;
1058 }
1059 }
1060 Py_XDECREF(res);
1061 }
1062
Nick Coghlana9b15242014-02-04 22:11:18 +10001063 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001064 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001065
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self->buffer = buffer;
1067 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1070 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1071 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001072 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001074 if (raw == NULL) {
1075 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1076 PyErr_Clear();
1077 else
1078 goto error;
1079 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 else if (Py_TYPE(raw) == &PyFileIO_Type)
1081 self->raw = raw;
1082 else
1083 Py_DECREF(raw);
1084 }
1085
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 if (res == NULL)
1088 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001089 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001091 if (r < 0)
1092 goto error;
1093 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094
Martin v. Löwis767046a2011-10-14 15:35:36 +02001095 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001096
Antoine Pitroue4501852009-05-14 18:55:55 +00001097 self->encoding_start_of_stream = 0;
1098 if (self->seekable && self->encoder) {
1099 PyObject *cookieObj;
1100 int cmp;
1101
1102 self->encoding_start_of_stream = 1;
1103
1104 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1105 if (cookieObj == NULL)
1106 goto error;
1107
1108 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1109 Py_DECREF(cookieObj);
1110 if (cmp < 0) {
1111 goto error;
1112 }
1113
1114 if (cmp == 0) {
1115 self->encoding_start_of_stream = 0;
1116 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1117 _PyIO_zero, NULL);
1118 if (res == NULL)
1119 goto error;
1120 Py_DECREF(res);
1121 }
1122 }
1123
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 self->ok = 1;
1125 return 0;
1126
1127 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001128 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 return -1;
1130}
1131
1132static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001133_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 self->ok = 0;
1136 Py_CLEAR(self->buffer);
1137 Py_CLEAR(self->encoding);
1138 Py_CLEAR(self->encoder);
1139 Py_CLEAR(self->decoder);
1140 Py_CLEAR(self->readnl);
1141 Py_CLEAR(self->decoded_chars);
1142 Py_CLEAR(self->pending_bytes);
1143 Py_CLEAR(self->snapshot);
1144 Py_CLEAR(self->errors);
1145 Py_CLEAR(self->raw);
1146 return 0;
1147}
1148
1149static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001150textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001152 self->finalizing = 1;
1153 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001155 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 _PyObject_GC_UNTRACK(self);
1157 if (self->weakreflist != NULL)
1158 PyObject_ClearWeakRefs((PyObject *)self);
1159 Py_CLEAR(self->dict);
1160 Py_TYPE(self)->tp_free((PyObject *)self);
1161}
1162
1163static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165{
1166 Py_VISIT(self->buffer);
1167 Py_VISIT(self->encoding);
1168 Py_VISIT(self->encoder);
1169 Py_VISIT(self->decoder);
1170 Py_VISIT(self->readnl);
1171 Py_VISIT(self->decoded_chars);
1172 Py_VISIT(self->pending_bytes);
1173 Py_VISIT(self->snapshot);
1174 Py_VISIT(self->errors);
1175 Py_VISIT(self->raw);
1176
1177 Py_VISIT(self->dict);
1178 return 0;
1179}
1180
1181static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 return -1;
1186 Py_CLEAR(self->dict);
1187 return 0;
1188}
1189
1190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001191textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192
1193/* This macro takes some shortcuts to make the common case faster. */
1194#define CHECK_CLOSED(self) \
1195 do { \
1196 int r; \
1197 PyObject *_res; \
1198 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1199 if (self->raw != NULL) \
1200 r = _PyFileIO_closed(self->raw); \
1201 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001202 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203 if (_res == NULL) \
1204 return NULL; \
1205 r = PyObject_IsTrue(_res); \
1206 Py_DECREF(_res); \
1207 if (r < 0) \
1208 return NULL; \
1209 } \
1210 if (r > 0) { \
1211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on closed file."); \
1213 return NULL; \
1214 } \
1215 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 return NULL; \
1218 } while (0)
1219
1220#define CHECK_INITIALIZED(self) \
1221 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001222 PyErr_SetString(PyExc_ValueError, \
1223 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224 return NULL; \
1225 }
1226
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001227#define CHECK_ATTACHED(self) \
1228 CHECK_INITIALIZED(self); \
1229 if (self->detached) { \
1230 PyErr_SetString(PyExc_ValueError, \
1231 "underlying buffer has been detached"); \
1232 return NULL; \
1233 }
1234
1235#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001237 PyErr_SetString(PyExc_ValueError, \
1238 "I/O operation on uninitialized object"); \
1239 return -1; \
1240 } else if (self->detached) { \
1241 PyErr_SetString(PyExc_ValueError, \
1242 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return -1; \
1244 }
1245
1246
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001247static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001248textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001249{
1250 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001251 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001252 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1253 if (res == NULL)
1254 return NULL;
1255 Py_DECREF(res);
1256 buffer = self->buffer;
1257 self->buffer = NULL;
1258 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001259 return buffer;
1260}
1261
Antoine Pitrou24f36292009-03-28 22:16:42 +00001262/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 underlying buffered object, though. */
1264static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001265_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001267 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268
1269 if (self->pending_bytes == NULL)
1270 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001271
1272 pending = self->pending_bytes;
1273 Py_INCREF(pending);
1274 self->pending_bytes_count = 0;
1275 Py_CLEAR(self->pending_bytes);
1276
1277 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1278 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 if (b == NULL)
1280 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001281 ret = NULL;
1282 do {
1283 ret = PyObject_CallMethodObjArgs(self->buffer,
1284 _PyIO_str_write, b, NULL);
1285 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 Py_DECREF(b);
1287 if (ret == NULL)
1288 return -1;
1289 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 return 0;
1291}
1292
1293static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001294textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295{
1296 PyObject *ret;
1297 PyObject *text; /* owned reference */
1298 PyObject *b;
1299 Py_ssize_t textlen;
1300 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001301 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001303 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1306 return NULL;
1307 }
1308
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 if (PyUnicode_READY(text) == -1)
1310 return NULL;
1311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 CHECK_CLOSED(self);
1313
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001314 if (self->encoder == NULL)
1315 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001316
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 Py_INCREF(text);
1318
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320
1321 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 haslf = 1;
1324
1325 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001326 PyObject *newtext = _PyObject_CallMethodId(
1327 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 Py_DECREF(text);
1329 if (newtext == NULL)
1330 return NULL;
1331 text = newtext;
1332 }
1333
Antoine Pitroue96ec682011-07-23 21:46:35 +02001334 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001335 text_needflush = 1;
1336 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001337 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 needflush = 1;
1340
1341 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001342 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001344 self->encoding_start_of_stream = 0;
1345 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 else
1347 b = PyObject_CallMethodObjArgs(self->encoder,
1348 _PyIO_str_encode, text, NULL);
1349 Py_DECREF(text);
1350 if (b == NULL)
1351 return NULL;
1352
1353 if (self->pending_bytes == NULL) {
1354 self->pending_bytes = PyList_New(0);
1355 if (self->pending_bytes == NULL) {
1356 Py_DECREF(b);
1357 return NULL;
1358 }
1359 self->pending_bytes_count = 0;
1360 }
1361 if (PyList_Append(self->pending_bytes, b) < 0) {
1362 Py_DECREF(b);
1363 return NULL;
1364 }
1365 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1366 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001367 if (self->pending_bytes_count > self->chunk_size || needflush ||
1368 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001369 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 return NULL;
1371 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001372
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373 if (needflush) {
1374 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1375 if (ret == NULL)
1376 return NULL;
1377 Py_DECREF(ret);
1378 }
1379
1380 Py_CLEAR(self->snapshot);
1381
1382 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001383 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 if (ret == NULL)
1385 return NULL;
1386 Py_DECREF(ret);
1387 }
1388
1389 return PyLong_FromSsize_t(textlen);
1390}
1391
1392/* Steal a reference to chars and store it in the decoded_char buffer;
1393 */
1394static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001395textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396{
1397 Py_CLEAR(self->decoded_chars);
1398 self->decoded_chars = chars;
1399 self->decoded_chars_used = 0;
1400}
1401
1402static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001403textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404{
1405 PyObject *chars;
1406 Py_ssize_t avail;
1407
1408 if (self->decoded_chars == NULL)
1409 return PyUnicode_FromStringAndSize(NULL, 0);
1410
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001411 /* decoded_chars is guaranteed to be "ready". */
1412 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 - self->decoded_chars_used);
1414
1415 assert(avail >= 0);
1416
1417 if (n < 0 || n > avail)
1418 n = avail;
1419
1420 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001421 chars = PyUnicode_Substring(self->decoded_chars,
1422 self->decoded_chars_used,
1423 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 if (chars == NULL)
1425 return NULL;
1426 }
1427 else {
1428 chars = self->decoded_chars;
1429 Py_INCREF(chars);
1430 }
1431
1432 self->decoded_chars_used += n;
1433 return chars;
1434}
1435
1436/* Read and decode the next chunk of data from the BufferedReader.
1437 */
1438static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001439textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440{
1441 PyObject *dec_buffer = NULL;
1442 PyObject *dec_flags = NULL;
1443 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001444 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001446 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447 int eof;
1448
1449 /* The return value is True unless EOF was reached. The decoded string is
1450 * placed in self._decoded_chars (replacing its previous value). The
1451 * entire input chunk is sent to the decoder, though some of it may remain
1452 * buffered in the decoder, yet to be converted.
1453 */
1454
1455 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001456 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001457 return -1;
1458 }
1459
1460 if (self->telling) {
1461 /* To prepare for tell(), we need to snapshot a point in the file
1462 * where the decoder's input buffer is empty.
1463 */
1464
1465 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1466 _PyIO_str_getstate, NULL);
1467 if (state == NULL)
1468 return -1;
1469 /* Given this, we know there was a valid snapshot point
1470 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1471 */
1472 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1473 Py_DECREF(state);
1474 return -1;
1475 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001476
1477 if (!PyBytes_Check(dec_buffer)) {
1478 PyErr_Format(PyExc_TypeError,
1479 "decoder getstate() should have returned a bytes "
1480 "object, not '%.200s'",
1481 Py_TYPE(dec_buffer)->tp_name);
1482 Py_DECREF(state);
1483 return -1;
1484 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001485 Py_INCREF(dec_buffer);
1486 Py_INCREF(dec_flags);
1487 Py_DECREF(state);
1488 }
1489
1490 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001491 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001492 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001493 }
1494 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495 if (chunk_size == NULL)
1496 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001497
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001499 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1500 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 Py_DECREF(chunk_size);
1502 if (input_chunk == NULL)
1503 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001504
1505 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001506 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001507 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001508 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1509 Py_TYPE(input_chunk)->tp_name);
1510 goto fail;
1511 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512
Antoine Pitroub8503892014-04-29 10:14:02 +02001513 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001514 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1516 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1517 self->decoder, input_chunk, eof);
1518 }
1519 else {
1520 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1521 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1522 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001523 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001525 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001527 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001528 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001529 if (nchars > 0)
1530 self->b2cratio = (double) nbytes / nchars;
1531 else
1532 self->b2cratio = 0.0;
1533 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 eof = 0;
1535
1536 if (self->telling) {
1537 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1538 * next input to be decoded is dec_buffer + input_chunk.
1539 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001540 PyObject *next_input = dec_buffer;
1541 PyBytes_Concat(&next_input, input_chunk);
1542 if (next_input == NULL) {
1543 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001544 goto fail;
1545 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 Py_CLEAR(self->snapshot);
1547 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1548 }
1549 Py_DECREF(input_chunk);
1550
1551 return (eof == 0);
1552
1553 fail:
1554 Py_XDECREF(dec_buffer);
1555 Py_XDECREF(dec_flags);
1556 Py_XDECREF(input_chunk);
1557 return -1;
1558}
1559
1560static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001561textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001562{
1563 Py_ssize_t n = -1;
1564 PyObject *result = NULL, *chunks = NULL;
1565
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001566 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001568 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 return NULL;
1570
1571 CHECK_CLOSED(self);
1572
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001573 if (self->decoder == NULL)
1574 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001575
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001576 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 return NULL;
1578
1579 if (n < 0) {
1580 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001581 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582 PyObject *decoded;
1583 if (bytes == NULL)
1584 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001585
1586 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1587 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1588 bytes, 1);
1589 else
1590 decoded = PyObject_CallMethodObjArgs(
1591 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001593 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594 goto fail;
1595
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001596 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597
1598 if (result == NULL) {
1599 Py_DECREF(decoded);
1600 return NULL;
1601 }
1602
1603 PyUnicode_AppendAndDel(&result, decoded);
1604 if (result == NULL)
1605 goto fail;
1606
1607 Py_CLEAR(self->snapshot);
1608 return result;
1609 }
1610 else {
1611 int res = 1;
1612 Py_ssize_t remaining = n;
1613
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001614 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615 if (result == NULL)
1616 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001617 if (PyUnicode_READY(result) == -1)
1618 goto fail;
1619 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620
1621 /* Keep reading chunks until we have n characters to return */
1622 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001623 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001624 if (res < 0) {
1625 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1626 when EINTR occurs so we needn't do it ourselves. */
1627 if (_PyIO_trap_eintr()) {
1628 continue;
1629 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001631 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 if (res == 0) /* EOF */
1633 break;
1634 if (chunks == NULL) {
1635 chunks = PyList_New(0);
1636 if (chunks == NULL)
1637 goto fail;
1638 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001639 if (PyUnicode_GET_LENGTH(result) > 0 &&
1640 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 goto fail;
1642 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001643 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 if (result == NULL)
1645 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001646 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 }
1648 if (chunks != NULL) {
1649 if (result != NULL && PyList_Append(chunks, result) < 0)
1650 goto fail;
1651 Py_CLEAR(result);
1652 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1653 if (result == NULL)
1654 goto fail;
1655 Py_CLEAR(chunks);
1656 }
1657 return result;
1658 }
1659 fail:
1660 Py_XDECREF(result);
1661 Py_XDECREF(chunks);
1662 return NULL;
1663}
1664
1665
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001666/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 that is to the NUL character. Otherwise the function will produce
1668 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001669static char *
1670find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001672 if (kind == PyUnicode_1BYTE_KIND) {
1673 assert(ch < 256);
1674 return (char *) memchr((void *) s, (char) ch, end - s);
1675 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001677 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001678 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001679 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 return s;
1681 if (s == end)
1682 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001683 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 }
1685}
1686
1687Py_ssize_t
1688_PyIO_find_line_ending(
1689 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693
1694 if (translated) {
1695 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 else {
1700 *consumed = len;
1701 return -1;
1702 }
1703 }
1704 else if (universal) {
1705 /* Universal newline search. Find any of \r, \r\n, \n
1706 * The decoder ensures that \r\n are not split in two pieces
1707 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001708 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001710 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001712 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001713 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001714 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 if (s >= end) {
1716 *consumed = len;
1717 return -1;
1718 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001719 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001720 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001722 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001724 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001725 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001727 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 }
1729 }
1730 }
1731 else {
1732 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001733 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001734 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001735 /* Assume that readnl is an ASCII character. */
1736 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001738 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001740 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 *consumed = len;
1742 return -1;
1743 }
1744 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001746 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001747 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748 if (e < s)
1749 e = s;
1750 while (s < e) {
1751 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001752 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 if (pos == NULL || pos >= e)
1754 break;
1755 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001756 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757 break;
1758 }
1759 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001760 return (pos - start)/kind + readnl_len;
1761 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 if (pos == NULL)
1765 *consumed = len;
1766 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001767 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 return -1;
1769 }
1770 }
1771}
1772
1773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001774_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775{
1776 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1777 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1778 int res;
1779
1780 CHECK_CLOSED(self);
1781
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001782 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 return NULL;
1784
1785 chunked = 0;
1786
1787 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001788 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 Py_ssize_t consumed = 0;
1792
1793 /* First, get some data if necessary */
1794 res = 1;
1795 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001796 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001797 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001798 if (res < 0) {
1799 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1800 when EINTR occurs so we needn't do it ourselves. */
1801 if (_PyIO_trap_eintr()) {
1802 continue;
1803 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001805 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 if (res == 0)
1807 break;
1808 }
1809 if (res == 0) {
1810 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001811 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 Py_CLEAR(self->snapshot);
1813 start = endpos = offset_to_buffer = 0;
1814 break;
1815 }
1816
1817 if (remaining == NULL) {
1818 line = self->decoded_chars;
1819 start = self->decoded_chars_used;
1820 offset_to_buffer = 0;
1821 Py_INCREF(line);
1822 }
1823 else {
1824 assert(self->decoded_chars_used == 0);
1825 line = PyUnicode_Concat(remaining, self->decoded_chars);
1826 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001827 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 Py_CLEAR(remaining);
1829 if (line == NULL)
1830 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001831 if (PyUnicode_READY(line) == -1)
1832 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833 }
1834
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001835 ptr = PyUnicode_DATA(line);
1836 line_len = PyUnicode_GET_LENGTH(line);
1837 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838
1839 endpos = _PyIO_find_line_ending(
1840 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001841 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001842 ptr + kind * start,
1843 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001844 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845 if (endpos >= 0) {
1846 endpos += start;
1847 if (limit >= 0 && (endpos - start) + chunked >= limit)
1848 endpos = start + limit - chunked;
1849 break;
1850 }
1851
1852 /* We can put aside up to `endpos` */
1853 endpos = consumed + start;
1854 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1855 /* Didn't find line ending, but reached length limit */
1856 endpos = start + limit - chunked;
1857 break;
1858 }
1859
1860 if (endpos > start) {
1861 /* No line ending seen yet - put aside current data */
1862 PyObject *s;
1863 if (chunks == NULL) {
1864 chunks = PyList_New(0);
1865 if (chunks == NULL)
1866 goto error;
1867 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001868 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 if (s == NULL)
1870 goto error;
1871 if (PyList_Append(chunks, s) < 0) {
1872 Py_DECREF(s);
1873 goto error;
1874 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001876 Py_DECREF(s);
1877 }
1878 /* There may be some remaining bytes we'll have to prepend to the
1879 next chunk of data */
1880 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001881 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 if (remaining == NULL)
1883 goto error;
1884 }
1885 Py_CLEAR(line);
1886 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001887 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888 }
1889
1890 if (line != NULL) {
1891 /* Our line ends in the current buffer */
1892 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001893 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1894 PyObject *s = PyUnicode_Substring(line, start, endpos);
1895 Py_CLEAR(line);
1896 if (s == NULL)
1897 goto error;
1898 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 }
1900 }
1901 if (remaining != NULL) {
1902 if (chunks == NULL) {
1903 chunks = PyList_New(0);
1904 if (chunks == NULL)
1905 goto error;
1906 }
1907 if (PyList_Append(chunks, remaining) < 0)
1908 goto error;
1909 Py_CLEAR(remaining);
1910 }
1911 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001912 if (line != NULL) {
1913 if (PyList_Append(chunks, line) < 0)
1914 goto error;
1915 Py_DECREF(line);
1916 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1918 if (line == NULL)
1919 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001920 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001922 if (line == NULL) {
1923 Py_INCREF(_PyIO_empty_str);
1924 line = _PyIO_empty_str;
1925 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926
1927 return line;
1928
1929 error:
1930 Py_XDECREF(chunks);
1931 Py_XDECREF(remaining);
1932 Py_XDECREF(line);
1933 return NULL;
1934}
1935
1936static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001937textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938{
1939 Py_ssize_t limit = -1;
1940
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001941 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1943 return NULL;
1944 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001945 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946}
1947
1948/* Seek and Tell */
1949
1950typedef struct {
1951 Py_off_t start_pos;
1952 int dec_flags;
1953 int bytes_to_feed;
1954 int chars_to_skip;
1955 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001956} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957
1958/*
1959 To speed up cookie packing/unpacking, we store the fields in a temporary
1960 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1961 The following macros define at which offsets in the intermediary byte
1962 string the various CookieStruct fields will be stored.
1963 */
1964
1965#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1966
Christian Heimes743e0cd2012-10-17 23:52:17 +02001967#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968/* We want the least significant byte of start_pos to also be the least
1969 significant byte of the cookie, which means that in big-endian mode we
1970 must copy the fields in reverse order. */
1971
1972# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1973# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1974# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1975# define OFF_CHARS_TO_SKIP (sizeof(char))
1976# define OFF_NEED_EOF 0
1977
1978#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979/* Little-endian mode: the least significant byte of start_pos will
1980 naturally end up the least significant byte of the cookie. */
1981
1982# define OFF_START_POS 0
1983# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1984# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1985# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1986# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1987
1988#endif
1989
1990static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001991textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992{
1993 unsigned char buffer[COOKIE_BUF_LEN];
1994 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1995 if (cookieLong == NULL)
1996 return -1;
1997
1998 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001999 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 Py_DECREF(cookieLong);
2001 return -1;
2002 }
2003 Py_DECREF(cookieLong);
2004
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002005 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2006 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2007 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2008 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2009 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010
2011 return 0;
2012}
2013
2014static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002015textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016{
2017 unsigned char buffer[COOKIE_BUF_LEN];
2018
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002019 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2020 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2021 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2022 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2023 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024
Christian Heimes743e0cd2012-10-17 23:52:17 +02002025 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2026 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028
2029static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002030_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031{
2032 PyObject *res;
2033 /* When seeking to the start of the stream, we call decoder.reset()
2034 rather than decoder.getstate().
2035 This is for a few decoders such as utf-16 for which the state value
2036 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2037 utf-16, that we are expecting a BOM).
2038 */
2039 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2040 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2041 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002042 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2043 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (res == NULL)
2045 return -1;
2046 Py_DECREF(res);
2047 return 0;
2048}
2049
Antoine Pitroue4501852009-05-14 18:55:55 +00002050static int
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002051_textiowrapper_encoder_reset(textio *self, int start_of_stream)
Antoine Pitroue4501852009-05-14 18:55:55 +00002052{
2053 PyObject *res;
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002054 if (start_of_stream) {
Antoine Pitroue4501852009-05-14 18:55:55 +00002055 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2056 self->encoding_start_of_stream = 1;
2057 }
2058 else {
2059 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2060 _PyIO_zero, NULL);
2061 self->encoding_start_of_stream = 0;
2062 }
2063 if (res == NULL)
2064 return -1;
2065 Py_DECREF(res);
2066 return 0;
2067}
2068
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002069static int
2070_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2071{
2072 /* Same as _textiowrapper_decoder_setstate() above. */
2073 return _textiowrapper_encoder_reset(
2074 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2075}
2076
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002078textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079{
2080 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002081 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 PyObject *res;
2084 int cmp;
2085
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002086 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002088 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2089 return NULL;
2090 CHECK_CLOSED(self);
2091
2092 Py_INCREF(cookieObj);
2093
2094 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002095 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 goto fail;
2097 }
2098
2099 if (whence == 1) {
2100 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002101 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 if (cmp < 0)
2103 goto fail;
2104
2105 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002106 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 goto fail;
2108 }
2109
2110 /* Seeking to the current position should attempt to
2111 * sync the underlying buffer with the current position.
2112 */
2113 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002114 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 if (cookieObj == NULL)
2116 goto fail;
2117 }
2118 else if (whence == 2) {
2119 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002120 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 if (cmp < 0)
2122 goto fail;
2123
2124 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002125 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126 goto fail;
2127 }
2128
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002129 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130 if (res == NULL)
2131 goto fail;
2132 Py_DECREF(res);
2133
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002134 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135 Py_CLEAR(self->snapshot);
2136 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002137 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138 if (res == NULL)
2139 goto fail;
2140 Py_DECREF(res);
2141 }
2142
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002143 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Antoine Pitrou85e3ee72015-04-13 20:01:21 +02002144 Py_CLEAR(cookieObj);
2145 if (res == NULL)
2146 goto fail;
2147 if (self->encoder) {
2148 /* If seek() == 0, we are at the start of stream, otherwise not */
2149 cmp = PyObject_RichCompareBool(res, _PyIO_zero, Py_EQ);
2150 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2151 Py_DECREF(res);
2152 goto fail;
2153 }
2154 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155 return res;
2156 }
2157 else if (whence != 0) {
2158 PyErr_Format(PyExc_ValueError,
2159 "invalid whence (%d, should be 0, 1 or 2)", whence);
2160 goto fail;
2161 }
2162
Antoine Pitroue4501852009-05-14 18:55:55 +00002163 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164 if (cmp < 0)
2165 goto fail;
2166
2167 if (cmp == 1) {
2168 PyErr_Format(PyExc_ValueError,
2169 "negative seek position %R", cookieObj);
2170 goto fail;
2171 }
2172
2173 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2174 if (res == NULL)
2175 goto fail;
2176 Py_DECREF(res);
2177
2178 /* The strategy of seek() is to go back to the safe start point
2179 * and replay the effect of read(chars_to_skip) from there.
2180 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002181 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182 goto fail;
2183
2184 /* Seek back to the safe start point. */
2185 posobj = PyLong_FromOff_t(cookie.start_pos);
2186 if (posobj == NULL)
2187 goto fail;
2188 res = PyObject_CallMethodObjArgs(self->buffer,
2189 _PyIO_str_seek, posobj, NULL);
2190 Py_DECREF(posobj);
2191 if (res == NULL)
2192 goto fail;
2193 Py_DECREF(res);
2194
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002195 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 Py_CLEAR(self->snapshot);
2197
2198 /* Restore the decoder to its state from the safe start point. */
2199 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002200 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201 goto fail;
2202 }
2203
2204 if (cookie.chars_to_skip) {
2205 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002206 PyObject *input_chunk = _PyObject_CallMethodId(
2207 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002208 PyObject *decoded;
2209
2210 if (input_chunk == NULL)
2211 goto fail;
2212
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002213 if (!PyBytes_Check(input_chunk)) {
2214 PyErr_Format(PyExc_TypeError,
2215 "underlying read() should have returned a bytes "
2216 "object, not '%.200s'",
2217 Py_TYPE(input_chunk)->tp_name);
2218 Py_DECREF(input_chunk);
2219 goto fail;
2220 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002221
2222 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2223 if (self->snapshot == NULL) {
2224 Py_DECREF(input_chunk);
2225 goto fail;
2226 }
2227
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002228 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2229 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002231 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 goto fail;
2233
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002234 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235
2236 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002237 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002238 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2239 goto fail;
2240 }
2241 self->decoded_chars_used = cookie.chars_to_skip;
2242 }
2243 else {
2244 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2245 if (self->snapshot == NULL)
2246 goto fail;
2247 }
2248
Antoine Pitroue4501852009-05-14 18:55:55 +00002249 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2250 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002251 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002252 goto fail;
2253 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002254 return cookieObj;
2255 fail:
2256 Py_XDECREF(cookieObj);
2257 return NULL;
2258
2259}
2260
2261static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002262textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263{
2264 PyObject *res;
2265 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002266 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267 PyObject *next_input;
2268 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002269 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270 PyObject *saved_state = NULL;
2271 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002272 char *dec_buffer;
2273 Py_ssize_t dec_buffer_len;
2274 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002275
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002276 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002277 CHECK_CLOSED(self);
2278
2279 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002280 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281 goto fail;
2282 }
2283 if (!self->telling) {
2284 PyErr_SetString(PyExc_IOError,
2285 "telling position disabled by next() call");
2286 goto fail;
2287 }
2288
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002289 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002290 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002291 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 if (res == NULL)
2293 goto fail;
2294 Py_DECREF(res);
2295
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002296 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002297 if (posobj == NULL)
2298 goto fail;
2299
2300 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002301 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302 return posobj;
2303 }
2304
2305#if defined(HAVE_LARGEFILE_SUPPORT)
2306 cookie.start_pos = PyLong_AsLongLong(posobj);
2307#else
2308 cookie.start_pos = PyLong_AsLong(posobj);
2309#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002310 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002311 if (PyErr_Occurred())
2312 goto fail;
2313
2314 /* Skip backward to the snapshot point (see _read_chunk). */
2315 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2316 goto fail;
2317
2318 assert (PyBytes_Check(next_input));
2319
2320 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2321
2322 /* How many decoded characters have been used up since the snapshot? */
2323 if (self->decoded_chars_used == 0) {
2324 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002325 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326 }
2327
2328 chars_to_skip = self->decoded_chars_used;
2329
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002331 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2332 _PyIO_str_getstate, NULL);
2333 if (saved_state == NULL)
2334 goto fail;
2335
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002336#define DECODER_GETSTATE() do { \
2337 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2338 _PyIO_str_getstate, NULL); \
2339 if (_state == NULL) \
2340 goto fail; \
2341 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2342 Py_DECREF(_state); \
2343 goto fail; \
2344 } \
2345 Py_DECREF(_state); \
2346 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002348#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002349 PyObject *_decoded = _PyObject_CallMethodId( \
2350 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002351 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002352 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002353 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002354 Py_DECREF(_decoded); \
2355 } while (0)
2356
2357 /* Fast search for an acceptable start point, close to our
2358 current pos */
2359 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2360 skip_back = 1;
2361 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2362 input = PyBytes_AS_STRING(next_input);
2363 while (skip_bytes > 0) {
2364 /* Decode up to temptative start point */
2365 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2366 goto fail;
2367 DECODER_DECODE(input, skip_bytes, chars_decoded);
2368 if (chars_decoded <= chars_to_skip) {
2369 DECODER_GETSTATE();
2370 if (dec_buffer_len == 0) {
2371 /* Before pos and no bytes buffered in decoder => OK */
2372 cookie.dec_flags = dec_flags;
2373 chars_to_skip -= chars_decoded;
2374 break;
2375 }
2376 /* Skip back by buffered amount and reset heuristic */
2377 skip_bytes -= dec_buffer_len;
2378 skip_back = 1;
2379 }
2380 else {
2381 /* We're too far ahead, skip back a bit */
2382 skip_bytes -= skip_back;
2383 skip_back *= 2;
2384 }
2385 }
2386 if (skip_bytes <= 0) {
2387 skip_bytes = 0;
2388 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2389 goto fail;
2390 }
2391
2392 /* Note our initial start point. */
2393 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002394 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002395 if (chars_to_skip == 0)
2396 goto finally;
2397
2398 /* We should be close to the desired position. Now feed the decoder one
2399 * byte at a time until we reach the `chars_to_skip` target.
2400 * As we go, note the nearest "safe start point" before the current
2401 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402 * can safely start from there and advance to this location).
2403 */
2404 chars_decoded = 0;
2405 input = PyBytes_AS_STRING(next_input);
2406 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002407 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002409 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002411 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002412 /* We got n chars for 1 byte */
2413 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002415 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002416
2417 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2418 /* Decoder buffer is empty, so this is a safe start point. */
2419 cookie.start_pos += cookie.bytes_to_feed;
2420 chars_to_skip -= chars_decoded;
2421 cookie.dec_flags = dec_flags;
2422 cookie.bytes_to_feed = 0;
2423 chars_decoded = 0;
2424 }
2425 if (chars_decoded >= chars_to_skip)
2426 break;
2427 input++;
2428 }
2429 if (input == input_end) {
2430 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002431 PyObject *decoded = _PyObject_CallMethodId(
2432 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002433 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002435 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 Py_DECREF(decoded);
2437 cookie.need_eof = 1;
2438
2439 if (chars_decoded < chars_to_skip) {
2440 PyErr_SetString(PyExc_IOError,
2441 "can't reconstruct logical file position");
2442 goto fail;
2443 }
2444 }
2445
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002446finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002447 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002448 Py_DECREF(saved_state);
2449 if (res == NULL)
2450 return NULL;
2451 Py_DECREF(res);
2452
2453 /* The returned cookie corresponds to the last safe start point. */
2454 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002455 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002457fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458 if (saved_state) {
2459 PyObject *type, *value, *traceback;
2460 PyErr_Fetch(&type, &value, &traceback);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002461 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002462 _PyErr_ChainExceptions(type, value, traceback);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 Py_DECREF(saved_state);
Serhiy Storchaka04d09eb2015-03-30 09:58:41 +03002464 Py_XDECREF(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465 }
2466 return NULL;
2467}
2468
2469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471{
2472 PyObject *pos = Py_None;
2473 PyObject *res;
2474
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002475 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2477 return NULL;
2478 }
2479
2480 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2481 if (res == NULL)
2482 return NULL;
2483 Py_DECREF(res);
2484
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002485 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486}
2487
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002488static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002489textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002490{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002491 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002492
2493 CHECK_INITIALIZED(self);
2494
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002495 res = PyUnicode_FromString("<_io.TextIOWrapper");
2496 if (res == NULL)
2497 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002498
Martin v. Löwis767046a2011-10-14 15:35:36 +02002499 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002500 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002501 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002502 PyErr_Clear();
2503 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002504 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002505 }
2506 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002507 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002508 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002509 if (s == NULL)
2510 goto error;
2511 PyUnicode_AppendAndDel(&res, s);
2512 if (res == NULL)
2513 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002514 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002515 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002516 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002517 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002518 PyErr_Clear();
2519 else
2520 goto error;
2521 }
2522 else {
2523 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2524 Py_DECREF(modeobj);
2525 if (s == NULL)
2526 goto error;
2527 PyUnicode_AppendAndDel(&res, s);
2528 if (res == NULL)
2529 return NULL;
2530 }
2531 s = PyUnicode_FromFormat("%U encoding=%R>",
2532 res, self->encoding);
2533 Py_DECREF(res);
2534 return s;
2535error:
2536 Py_XDECREF(res);
2537 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002538}
2539
2540
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541/* Inquiries */
2542
2543static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002544textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002546 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002547 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548}
2549
2550static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002553 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002554 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555}
2556
2557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002558textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002560 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002561 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562}
2563
2564static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002565textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002567 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002568 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569}
2570
2571static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002572textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002574 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002575 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576}
2577
2578static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002579textiowrapper_getstate(textio *self, PyObject *args)
2580{
2581 PyErr_Format(PyExc_TypeError,
2582 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2583 return NULL;
2584}
2585
2586static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002587textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002589 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 CHECK_CLOSED(self);
2591 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002594 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595}
2596
2597static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002598textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599{
2600 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002601 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002602 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603
Antoine Pitrou6be88762010-05-03 16:48:20 +00002604 res = textiowrapper_closed_get(self, NULL);
2605 if (res == NULL)
2606 return NULL;
2607 r = PyObject_IsTrue(res);
2608 Py_DECREF(res);
2609 if (r < 0)
2610 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002611
Antoine Pitrou6be88762010-05-03 16:48:20 +00002612 if (r > 0) {
2613 Py_RETURN_NONE; /* stream already closed */
2614 }
2615 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002616 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002617 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002618 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002619 if (res)
2620 Py_DECREF(res);
2621 else
2622 PyErr_Clear();
2623 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002624 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002625 if (res == NULL)
2626 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002627 else
2628 Py_DECREF(res);
2629
Benjamin Peterson68623612012-12-20 11:53:11 -06002630 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2631 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002632 _PyErr_ChainExceptions(exc, val, tb);
2633 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002634 }
2635 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002636 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637}
2638
2639static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002640textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002641{
2642 PyObject *line;
2643
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002644 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002645
2646 self->telling = 0;
2647 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2648 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002649 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002650 }
2651 else {
2652 line = PyObject_CallMethodObjArgs((PyObject *)self,
2653 _PyIO_str_readline, NULL);
2654 if (line && !PyUnicode_Check(line)) {
2655 PyErr_Format(PyExc_IOError,
2656 "readline() should have returned an str object, "
2657 "not '%.200s'", Py_TYPE(line)->tp_name);
2658 Py_DECREF(line);
2659 return NULL;
2660 }
2661 }
2662
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002663 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 return NULL;
2665
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002666 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667 /* Reached EOF or would have blocked */
2668 Py_DECREF(line);
2669 Py_CLEAR(self->snapshot);
2670 self->telling = self->seekable;
2671 return NULL;
2672 }
2673
2674 return line;
2675}
2676
2677static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002678textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002680 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002681 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682}
2683
2684static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002685textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002687 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2689}
2690
2691static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002692textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693{
2694 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002695 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 if (self->decoder == NULL)
2697 Py_RETURN_NONE;
2698 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2699 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002700 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2701 PyErr_Clear();
2702 Py_RETURN_NONE;
2703 }
2704 else {
2705 return NULL;
2706 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 }
2708 return res;
2709}
2710
2711static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002712textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002713{
2714 CHECK_INITIALIZED(self);
2715 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2716}
2717
2718static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002719textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002721 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722 return PyLong_FromSsize_t(self->chunk_size);
2723}
2724
2725static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002726textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002727{
2728 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002729 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002730 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002731 if (n == -1 && PyErr_Occurred())
2732 return -1;
2733 if (n <= 0) {
2734 PyErr_SetString(PyExc_ValueError,
2735 "a strictly positive integer is required");
2736 return -1;
2737 }
2738 self->chunk_size = n;
2739 return 0;
2740}
2741
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002742static PyMethodDef textiowrapper_methods[] = {
2743 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2744 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2745 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2746 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2747 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2748 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002750 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2751 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2752 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2753 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2754 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002755 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002757 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2758 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2759 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760 {NULL, NULL}
2761};
2762
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002763static PyMemberDef textiowrapper_members[] = {
2764 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2765 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2766 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002767 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768 {NULL}
2769};
2770
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002771static PyGetSetDef textiowrapper_getset[] = {
2772 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2773 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002774/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2775*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002776 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2777 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2778 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2779 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002780 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002781};
2782
2783PyTypeObject PyTextIOWrapper_Type = {
2784 PyVarObject_HEAD_INIT(NULL, 0)
2785 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002786 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002787 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002788 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /*tp_print*/
2790 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002791 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002792 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002793 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002794 0, /*tp_as_number*/
2795 0, /*tp_as_sequence*/
2796 0, /*tp_as_mapping*/
2797 0, /*tp_hash */
2798 0, /*tp_call*/
2799 0, /*tp_str*/
2800 0, /*tp_getattro*/
2801 0, /*tp_setattro*/
2802 0, /*tp_as_buffer*/
2803 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002804 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002805 textiowrapper_doc, /* tp_doc */
2806 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2807 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002808 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002809 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002810 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002811 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2812 textiowrapper_methods, /* tp_methods */
2813 textiowrapper_members, /* tp_members */
2814 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002815 0, /* tp_base */
2816 0, /* tp_dict */
2817 0, /* tp_descr_get */
2818 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002819 offsetof(textio, dict), /*tp_dictoffset*/
2820 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002821 0, /* tp_alloc */
2822 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002823 0, /* tp_free */
2824 0, /* tp_is_gc */
2825 0, /* tp_bases */
2826 0, /* tp_mro */
2827 0, /* tp_cache */
2828 0, /* tp_subclasses */
2829 0, /* tp_weaklist */
2830 0, /* tp_del */
2831 0, /* tp_version_tag */
2832 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002833};