blob: 9fb4ef92c38e314467098efedc96a1a2bf6be704 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
Antoine Pitrou712cb732013-12-21 15:51:54 +010048 _PyIO_State *state = IO_STATE();
49 if (state != NULL)
50 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000051 return NULL;
52}
53
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000055 "Separate the underlying buffer from the TextIOBase and return it.\n"
56 "\n"
57 "After the underlying buffer has been detached, the TextIO is in an\n"
58 "unusable state.\n"
59 );
60
61static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063{
64 return _unsupported("detach");
65}
66
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000067PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 "Read at most n characters from stream.\n"
69 "\n"
70 "Read from underlying buffer until we have n characters or we hit EOF.\n"
71 "If n is negative or omitted, read until EOF.\n"
72 );
73
74static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076{
77 return _unsupported("read");
78}
79
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000080PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081 "Read until newline or EOF.\n"
82 "\n"
83 "Returns an empty string if EOF is hit immediately.\n"
84 );
85
86static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088{
89 return _unsupported("readline");
90}
91
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000092PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 "Write string to stream.\n"
94 "Returns the number of characters written (which is always equal to\n"
95 "the length of the string).\n"
96 );
97
98static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100{
101 return _unsupported("write");
102}
103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000104PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 "Encoding of the text stream.\n"
106 "\n"
107 "Subclasses should override.\n"
108 );
109
110static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112{
113 Py_RETURN_NONE;
114}
115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000116PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117 "Line endings translated so far.\n"
118 "\n"
119 "Only line endings translated during reading are considered.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000131 "The error setting of the decoder or encoder.\n"
132 "\n"
133 "Subclasses should override.\n"
134 );
135
136static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138{
139 Py_RETURN_NONE;
140}
141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000143static PyMethodDef textiobase_methods[] = {
144 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
145 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
146 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
147 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 {NULL, NULL}
149};
150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyGetSetDef textiobase_getset[] = {
152 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
153 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
154 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000155 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156};
157
158PyTypeObject PyTextIOBase_Type = {
159 PyVarObject_HEAD_INIT(NULL, 0)
160 "_io._TextIOBase", /*tp_name*/
161 0, /*tp_basicsize*/
162 0, /*tp_itemsize*/
163 0, /*tp_dealloc*/
164 0, /*tp_print*/
165 0, /*tp_getattr*/
166 0, /*tp_setattr*/
167 0, /*tp_compare */
168 0, /*tp_repr*/
169 0, /*tp_as_number*/
170 0, /*tp_as_sequence*/
171 0, /*tp_as_mapping*/
172 0, /*tp_hash */
173 0, /*tp_call*/
174 0, /*tp_str*/
175 0, /*tp_getattro*/
176 0, /*tp_setattro*/
177 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
179 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000180 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 0, /* tp_traverse */
182 0, /* tp_clear */
183 0, /* tp_richcompare */
184 0, /* tp_weaklistoffset */
185 0, /* tp_iter */
186 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 &PyIOBase_Type, /* tp_base */
191 0, /* tp_dict */
192 0, /* tp_descr_get */
193 0, /* tp_descr_set */
194 0, /* tp_dictoffset */
195 0, /* tp_init */
196 0, /* tp_alloc */
197 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200198 0, /* tp_free */
199 0, /* tp_is_gc */
200 0, /* tp_bases */
201 0, /* tp_mro */
202 0, /* tp_cache */
203 0, /* tp_subclasses */
204 0, /* tp_weaklist */
205 0, /* tp_del */
206 0, /* tp_version_tag */
207 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208};
209
210
211/* IncrementalNewlineDecoder */
212
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000213PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 "Codec used when reading a file in universal newlines mode. It wraps\n"
215 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
216 "records the types of newlines encountered. When used with\n"
217 "translate=False, it ensures that the newline sequence is returned in\n"
218 "one piece. When used with decoder=None, it expects unicode strings as\n"
219 "decode input and translates newlines without first invoking an external\n"
220 "decoder.\n"
221 );
222
223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Victor Stinner7d7e7752014-06-17 23:31:25 +0200227 unsigned int pendingcr: 1;
228 unsigned int translate: 1;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000233incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 PyObject *args, PyObject *kwds)
235{
236 PyObject *decoder;
237 int translate;
238 PyObject *errors = NULL;
239 char *kwlist[] = {"decoder", "translate", "errors", NULL};
240
241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
242 kwlist, &decoder, &translate, &errors))
243 return -1;
244
245 self->decoder = decoder;
246 Py_INCREF(decoder);
247
248 if (errors == NULL) {
249 self->errors = PyUnicode_FromString("strict");
250 if (self->errors == NULL)
251 return -1;
252 }
253 else {
254 Py_INCREF(errors);
255 self->errors = errors;
256 }
257
258 self->translate = translate;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263}
264
265static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000266incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267{
268 Py_CLEAR(self->decoder);
269 Py_CLEAR(self->errors);
270 Py_TYPE(self)->tp_free((PyObject *)self);
271}
272
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200273static int
274check_decoded(PyObject *decoded)
275{
276 if (decoded == NULL)
277 return -1;
278 if (!PyUnicode_Check(decoded)) {
279 PyErr_Format(PyExc_TypeError,
280 "decoder should return a string result, not '%.200s'",
281 Py_TYPE(decoded)->tp_name);
282 Py_DECREF(decoded);
283 return -1;
284 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200285 if (PyUnicode_READY(decoded) < 0) {
286 Py_DECREF(decoded);
287 return -1;
288 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200289 return 0;
290}
291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292#define SEEN_CR 1
293#define SEEN_LF 2
294#define SEEN_CRLF 4
295#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
296
297PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200298_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 PyObject *input, int final)
300{
301 PyObject *output;
302 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200303 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 if (self->decoder == NULL) {
306 PyErr_SetString(PyExc_ValueError,
307 "IncrementalNewlineDecoder.__init__ not called");
308 return NULL;
309 }
310
311 /* decode input (with the eventual \r from a previous pass) */
312 if (self->decoder != Py_None) {
313 output = PyObject_CallMethodObjArgs(self->decoder,
314 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
315 }
316 else {
317 output = input;
318 Py_INCREF(output);
319 }
320
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200321 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return NULL;
323
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Prefix output with CR */
327 int kind;
328 PyObject *modified;
329 char *out;
330
331 modified = PyUnicode_New(output_len + 1,
332 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (modified == NULL)
334 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 kind = PyUnicode_KIND(modified);
336 out = PyUnicode_DATA(modified);
337 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200338 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200340 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self->pendingcr = 0;
342 output_len++;
343 }
344
345 /* retain last \r even when not translating data:
346 * then readline() is sure to get \r\n in one pass
347 */
348 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000349 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
351 {
352 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
353 if (modified == NULL)
354 goto error;
355 Py_DECREF(output);
356 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 self->pendingcr = 1;
358 }
359 }
360
361 /* Record which newlines are read and do newline translation if desired,
362 all in one pass. */
363 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_ssize_t len;
366 int seennl = self->seennl;
367 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 in_str = PyUnicode_DATA(output);
371 len = PyUnicode_GET_LENGTH(output);
372 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 if (len == 0)
375 return output;
376
377 /* If, up to now, newlines are consistently \n, do a quick check
378 for the \r *byte* with the libc's optimized memchr.
379 */
380 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200381 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 }
383
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 if (only_lf) {
385 /* If not already seen, quick scan for a possible "\n" character.
386 (there's nothing else to be done, even when in translation mode)
387 */
388 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100390 if (kind == PyUnicode_1BYTE_KIND)
391 seennl |= SEEN_LF;
392 else {
393 Py_ssize_t i = 0;
394 for (;;) {
395 Py_UCS4 c;
396 /* Fast loop for non-control characters */
397 while (PyUnicode_READ(kind, in_str, i) > '\n')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
400 if (c == '\n') {
401 seennl |= SEEN_LF;
402 break;
403 }
404 if (i >= len)
405 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000406 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000407 }
408 }
409 /* Finished: we have scanned for newlines, and none of them
410 need translating */
411 }
412 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (seennl == SEEN_ALL)
416 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 while (PyUnicode_READ(kind, in_str, i) > '\r')
421 i++;
422 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (c == '\n')
424 seennl |= SEEN_LF;
425 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 }
430 else
431 seennl |= SEEN_CR;
432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 break;
435 if (seennl == SEEN_ALL)
436 break;
437 }
438 endscan:
439 ;
440 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000441 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 void *translated;
443 int kind = PyUnicode_KIND(output);
444 void *in_str = PyUnicode_DATA(output);
445 Py_ssize_t in, out;
446 /* XXX: Previous in-place translation here is disabled as
447 resizing is not possible anymore */
448 /* We could try to optimize this so that we only do a copy
449 when there is something to translate. On the other hand,
450 we already know there is a \r byte, so chances are high
451 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (translated == NULL) {
454 PyErr_NoMemory();
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 seennl |= SEEN_LF;
466 continue;
467 }
468 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 in++;
471 seennl |= SEEN_CRLF;
472 }
473 else
474 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 continue;
477 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_DECREF(output);
483 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100484 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200486 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
488 self->seennl |= seennl;
489 }
490
491 return output;
492
493 error:
494 Py_DECREF(output);
495 return NULL;
496}
497
498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000499incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 PyObject *args, PyObject *kwds)
501{
502 char *kwlist[] = {"input", "final", NULL};
503 PyObject *input;
504 int final = 0;
505
506 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
507 kwlist, &input, &final))
508 return NULL;
509 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 PyObject *buffer;
516 unsigned PY_LONG_LONG flag;
517
518 if (self->decoder != Py_None) {
519 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
520 _PyIO_str_getstate, NULL);
521 if (state == NULL)
522 return NULL;
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
524 Py_DECREF(state);
525 return NULL;
526 }
527 Py_INCREF(buffer);
528 Py_DECREF(state);
529 }
530 else {
531 buffer = PyBytes_FromString("");
532 flag = 0;
533 }
534 flag <<= 1;
535 if (self->pendingcr)
536 flag |= 1;
537 return Py_BuildValue("NK", buffer, flag);
538}
539
540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000541incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542{
543 PyObject *buffer;
544 unsigned PY_LONG_LONG flag;
545
546 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
547 return NULL;
548
Victor Stinner7d7e7752014-06-17 23:31:25 +0200549 self->pendingcr = (int) (flag & 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550 flag >>= 1;
551
552 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200553 return _PyObject_CallMethodId(self->decoder,
554 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555 else
556 Py_RETURN_NONE;
557}
558
559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000560incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561{
562 self->seennl = 0;
563 self->pendingcr = 0;
564 if (self->decoder != Py_None)
565 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
566 else
567 Py_RETURN_NONE;
568}
569
570static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000571incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 switch (self->seennl) {
574 case SEEN_CR:
575 return PyUnicode_FromString("\r");
576 case SEEN_LF:
577 return PyUnicode_FromString("\n");
578 case SEEN_CRLF:
579 return PyUnicode_FromString("\r\n");
580 case SEEN_CR | SEEN_LF:
581 return Py_BuildValue("ss", "\r", "\n");
582 case SEEN_CR | SEEN_CRLF:
583 return Py_BuildValue("ss", "\r", "\r\n");
584 case SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("ss", "\n", "\r\n");
586 case SEEN_CR | SEEN_LF | SEEN_CRLF:
587 return Py_BuildValue("sss", "\r", "\n", "\r\n");
588 default:
589 Py_RETURN_NONE;
590 }
591
592}
593
594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595static PyMethodDef incrementalnewlinedecoder_methods[] = {
596 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
597 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
598 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
599 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000600 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601};
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603static PyGetSetDef incrementalnewlinedecoder_getset[] = {
604 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000605 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606};
607
608PyTypeObject PyIncrementalNewlineDecoder_Type = {
609 PyVarObject_HEAD_INIT(NULL, 0)
610 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /*tp_print*/
615 0, /*tp_getattr*/
616 0, /*tp_setattr*/
617 0, /*tp_compare */
618 0, /*tp_repr*/
619 0, /*tp_as_number*/
620 0, /*tp_as_sequence*/
621 0, /*tp_as_mapping*/
622 0, /*tp_hash */
623 0, /*tp_call*/
624 0, /*tp_str*/
625 0, /*tp_getattro*/
626 0, /*tp_setattro*/
627 0, /*tp_as_buffer*/
628 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 0, /* tp_traverse */
631 0, /* tp_clear */
632 0, /* tp_richcompare */
633 0, /*tp_weaklistoffset*/
634 0, /* tp_iter */
635 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000638 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 0, /* tp_base */
640 0, /* tp_dict */
641 0, /* tp_descr_get */
642 0, /* tp_descr_set */
643 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000644 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 0, /* tp_alloc */
646 PyType_GenericNew, /* tp_new */
647};
648
649
650/* TextIOWrapper */
651
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000652PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 "Character and line based layer over a BufferedIOBase object, buffer.\n"
654 "\n"
655 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200656 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400658 "errors determines the strictness of encoding and decoding (see\n"
659 "help(codecs.Codec) or the documentation for codecs.register) and\n"
660 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200662 "newline controls how line endings are handled. It can be None, '',\n"
663 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
664 "\n"
665 "* On input, if newline is None, universal newlines mode is\n"
666 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
667 " these are translated into '\\n' before being returned to the\n"
668 " caller. If it is '', universal newline mode is enabled, but line\n"
669 " endings are returned to the caller untranslated. If it has any of\n"
670 " the other legal values, input lines are only terminated by the given\n"
671 " string, and the line ending is returned to the caller untranslated.\n"
672 "\n"
673 "* On output, if newline is None, any '\\n' characters written are\n"
674 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300675 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200676 " of the other legal values, any '\\n' characters written are translated\n"
677 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 "\n"
679 "If line_buffering is True, a call to flush is implied when a call to\n"
680 "write contains a newline character."
681 );
682
683typedef PyObject *
684 (*encodefunc_t)(PyObject *, PyObject *);
685
686typedef struct
687{
688 PyObject_HEAD
689 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000690 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691 Py_ssize_t chunk_size;
692 PyObject *buffer;
693 PyObject *encoding;
694 PyObject *encoder;
695 PyObject *decoder;
696 PyObject *readnl;
697 PyObject *errors;
698 const char *writenl; /* utf-8 encoded, NULL stands for \n */
699 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200700 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 char readuniversal;
702 char readtranslate;
703 char writetranslate;
704 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200705 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200707 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 /* Specialized encoding func (see below) */
709 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000710 /* Whether or not it's the start of the stream */
711 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 /* Reads and writes are internally buffered in order to speed things up.
714 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000715
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716 Please also note that text to be written is first encoded before being
717 buffered. This is necessary so that encoding errors are immediately
718 reported to the caller, but it unfortunately means that the
719 IncrementalEncoder (whose encode() method is always written in Python)
720 becomes a bottleneck for small writes.
721 */
722 PyObject *decoded_chars; /* buffer for text returned from decoder */
723 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
724 PyObject *pending_bytes; /* list of bytes objects waiting to be
725 written, or NULL */
726 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 /* snapshot is either None, or a tuple (dec_flags, next_input) where
729 * dec_flags is the second (integer) item of the decoder state and
730 * next_input is the chunk of input bytes that comes next after the
731 * snapshot point. We use this to reconstruct decoder states in tell().
732 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000733 PyObject *snapshot;
734 /* Bytes-to-characters ratio for the current chunk. Serves as input for
735 the heuristic in tell(). */
736 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 /* Cache raw object if it's a FileIO object */
739 PyObject *raw;
740
741 PyObject *weakreflist;
742 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745
746/* A couple of specialized cases in order to bypass the slow incremental
747 encoding methods for the most popular encodings. */
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100758 return _PyUnicode_EncodeUTF16(text,
759 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF16(text,
766 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
Antoine Pitroue4501852009-05-14 18:55:55 +0000772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200774#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000779 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF16(text,
781 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782}
783
Antoine Pitroue4501852009-05-14 18:55:55 +0000784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100787 return _PyUnicode_EncodeUTF32(text,
788 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000793{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100794 return _PyUnicode_EncodeUTF32(text,
795 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000796}
797
798static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000799utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000800{
801 if (!self->encoding_start_of_stream) {
802 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200803#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 return utf32be_encode(self, text);
805#else
806 return utf32le_encode(self, text);
807#endif
808 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100809 return _PyUnicode_EncodeUTF32(text,
810 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000811}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812
813static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817}
818
819static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000820latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823}
824
825/* Map normalized encoding names onto the specialized encoding funcs */
826
827typedef struct {
828 const char *name;
829 encodefunc_t encodefunc;
830} encodefuncentry;
831
Antoine Pitrou24f36292009-03-28 22:16:42 +0000832static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 {"ascii", (encodefunc_t) ascii_encode},
834 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000835 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 {"utf-16-be", (encodefunc_t) utf16be_encode},
837 {"utf-16-le", (encodefunc_t) utf16le_encode},
838 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000839 {"utf-32-be", (encodefunc_t) utf32be_encode},
840 {"utf-32-le", (encodefunc_t) utf32le_encode},
841 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 {NULL, NULL}
843};
844
845
846static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000847textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848{
849 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 NULL};
Nick Coghlana9b15242014-02-04 22:11:18 +1000852 PyObject *buffer, *raw, *codec_info = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 char *encoding = NULL;
854 char *errors = NULL;
855 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200856 int line_buffering = 0, write_through = 0;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100857 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
859 PyObject *res;
860 int r;
861
862 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000863 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200866 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 return -1;
868
869 if (newline && newline[0] != '\0'
870 && !(newline[0] == '\n' && newline[1] == '\0')
871 && !(newline[0] == '\r' && newline[1] == '\0')
872 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
873 PyErr_Format(PyExc_ValueError,
874 "illegal newline value: %s", newline);
875 return -1;
876 }
877
878 Py_CLEAR(self->buffer);
879 Py_CLEAR(self->encoding);
880 Py_CLEAR(self->encoder);
881 Py_CLEAR(self->decoder);
882 Py_CLEAR(self->readnl);
883 Py_CLEAR(self->decoded_chars);
884 Py_CLEAR(self->pending_bytes);
885 Py_CLEAR(self->snapshot);
886 Py_CLEAR(self->errors);
887 Py_CLEAR(self->raw);
888 self->decoded_chars_used = 0;
889 self->pending_bytes_count = 0;
890 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000891 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 if (encoding == NULL) {
894 /* Try os.device_encoding(fileno) */
895 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100896 state = IO_STATE();
897 if (state == NULL)
898 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 /* Ignore only AttributeError and UnsupportedOperation */
901 if (fileno == NULL) {
902 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
903 PyErr_ExceptionMatches(state->unsupported_operation)) {
904 PyErr_Clear();
905 }
906 else {
907 goto error;
908 }
909 }
910 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200911 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500912 Py_DECREF(fileno);
913 if (fd == -1 && PyErr_Occurred()) {
914 goto error;
915 }
916
917 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 if (self->encoding == NULL)
919 goto error;
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
924 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200925 PyObject *locale_module = _PyIO_get_locale_module(state);
926 if (locale_module == NULL)
927 goto catch_ImportError;
928 self->encoding = _PyObject_CallMethodId(
929 locale_module, &PyId_getpreferredencoding, "O", Py_False);
930 Py_DECREF(locale_module);
931 if (self->encoding == NULL) {
932 catch_ImportError:
933 /*
934 Importing locale can raise a ImportError because of
935 _functools, and locale.getpreferredencoding can raise a
936 ImportError if _locale is not available. These will happen
937 during module building.
938 */
939 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
940 PyErr_Clear();
941 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200943 else
944 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200946 else if (!PyUnicode_Check(self->encoding))
947 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000951 if (encoding == NULL)
952 goto error;
953 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 else if (encoding != NULL) {
955 self->encoding = PyUnicode_FromString(encoding);
956 if (self->encoding == NULL)
957 goto error;
958 }
959 else {
960 PyErr_SetString(PyExc_IOError,
961 "could not determine default encoding");
962 }
963
Nick Coghlana9b15242014-02-04 22:11:18 +1000964 /* Check we have been asked for a real text encoding */
965 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
966 if (codec_info == NULL) {
967 Py_CLEAR(self->encoding);
968 goto error;
969 }
970
971 /* XXX: Failures beyond this point have the potential to leak elements
972 * of the partially constructed object (like self->encoding)
973 */
974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (errors == NULL)
976 errors = "strict";
977 self->errors = PyBytes_FromString(errors);
978 if (self->errors == NULL)
979 goto error;
980
981 self->chunk_size = 8192;
982 self->readuniversal = (newline == NULL || newline[0] == '\0');
983 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200984 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 self->readtranslate = (newline == NULL);
986 if (newline) {
987 self->readnl = PyUnicode_FromString(newline);
988 if (self->readnl == NULL)
Nick Coghlana9b15242014-02-04 22:11:18 +1000989 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 }
991 self->writetranslate = (newline == NULL || newline[0] != '\0');
992 if (!self->readuniversal && self->readnl) {
993 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000994 if (self->writenl == NULL)
995 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (!strcmp(self->writenl, "\n"))
997 self->writenl = NULL;
998 }
999#ifdef MS_WINDOWS
1000 else
1001 self->writenl = "\r\n";
1002#endif
1003
1004 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001005 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 if (res == NULL)
1007 goto error;
1008 r = PyObject_IsTrue(res);
1009 Py_DECREF(res);
1010 if (r == -1)
1011 goto error;
1012 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001013 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info,
1014 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 if (self->decoder == NULL)
1016 goto error;
1017
1018 if (self->readuniversal) {
1019 PyObject *incrementalDecoder = PyObject_CallFunction(
1020 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1021 "Oi", self->decoder, (int)self->readtranslate);
1022 if (incrementalDecoder == NULL)
1023 goto error;
1024 Py_CLEAR(self->decoder);
1025 self->decoder = incrementalDecoder;
1026 }
1027 }
1028
1029 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001030 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 if (res == NULL)
1032 goto error;
1033 r = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035 if (r == -1)
1036 goto error;
1037 if (r == 1) {
Nick Coghlana9b15242014-02-04 22:11:18 +10001038 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info,
1039 errors);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 if (self->encoder == NULL)
1041 goto error;
1042 /* Get the normalized named of the codec */
Nick Coghlana9b15242014-02-04 22:11:18 +10001043 res = _PyObject_GetAttrId(codec_info, &PyId_name);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (res == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (PyUnicode_Check(res)) {
1051 encodefuncentry *e = encodefuncs;
1052 while (e->name != NULL) {
1053 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1054 self->encodefunc = e->encodefunc;
1055 break;
1056 }
1057 e++;
1058 }
1059 }
1060 Py_XDECREF(res);
1061 }
1062
Nick Coghlana9b15242014-02-04 22:11:18 +10001063 /* Finished sorting out the codec details */
Benjamin Peterson6c14f232014-11-12 10:19:46 -05001064 Py_CLEAR(codec_info);
Nick Coghlana9b15242014-02-04 22:11:18 +10001065
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 self->buffer = buffer;
1067 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1070 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1071 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001072 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001074 if (raw == NULL) {
1075 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1076 PyErr_Clear();
1077 else
1078 goto error;
1079 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080 else if (Py_TYPE(raw) == &PyFileIO_Type)
1081 self->raw = raw;
1082 else
1083 Py_DECREF(raw);
1084 }
1085
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001086 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 if (res == NULL)
1088 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001089 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001091 if (r < 0)
1092 goto error;
1093 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094
Martin v. Löwis767046a2011-10-14 15:35:36 +02001095 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001096
Antoine Pitroue4501852009-05-14 18:55:55 +00001097 self->encoding_start_of_stream = 0;
1098 if (self->seekable && self->encoder) {
1099 PyObject *cookieObj;
1100 int cmp;
1101
1102 self->encoding_start_of_stream = 1;
1103
1104 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1105 if (cookieObj == NULL)
1106 goto error;
1107
1108 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1109 Py_DECREF(cookieObj);
1110 if (cmp < 0) {
1111 goto error;
1112 }
1113
1114 if (cmp == 0) {
1115 self->encoding_start_of_stream = 0;
1116 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1117 _PyIO_zero, NULL);
1118 if (res == NULL)
1119 goto error;
1120 Py_DECREF(res);
1121 }
1122 }
1123
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 self->ok = 1;
1125 return 0;
1126
1127 error:
Nick Coghlana9b15242014-02-04 22:11:18 +10001128 Py_XDECREF(codec_info);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001129 return -1;
1130}
1131
1132static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001133_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 self->ok = 0;
1136 Py_CLEAR(self->buffer);
1137 Py_CLEAR(self->encoding);
1138 Py_CLEAR(self->encoder);
1139 Py_CLEAR(self->decoder);
1140 Py_CLEAR(self->readnl);
1141 Py_CLEAR(self->decoded_chars);
1142 Py_CLEAR(self->pending_bytes);
1143 Py_CLEAR(self->snapshot);
1144 Py_CLEAR(self->errors);
1145 Py_CLEAR(self->raw);
1146 return 0;
1147}
1148
1149static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001150textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001152 self->finalizing = 1;
1153 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001155 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001156 _PyObject_GC_UNTRACK(self);
1157 if (self->weakreflist != NULL)
1158 PyObject_ClearWeakRefs((PyObject *)self);
1159 Py_CLEAR(self->dict);
1160 Py_TYPE(self)->tp_free((PyObject *)self);
1161}
1162
1163static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165{
1166 Py_VISIT(self->buffer);
1167 Py_VISIT(self->encoding);
1168 Py_VISIT(self->encoder);
1169 Py_VISIT(self->decoder);
1170 Py_VISIT(self->readnl);
1171 Py_VISIT(self->decoded_chars);
1172 Py_VISIT(self->pending_bytes);
1173 Py_VISIT(self->snapshot);
1174 Py_VISIT(self->errors);
1175 Py_VISIT(self->raw);
1176
1177 Py_VISIT(self->dict);
1178 return 0;
1179}
1180
1181static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001185 return -1;
1186 Py_CLEAR(self->dict);
1187 return 0;
1188}
1189
1190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001191textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192
1193/* This macro takes some shortcuts to make the common case faster. */
1194#define CHECK_CLOSED(self) \
1195 do { \
1196 int r; \
1197 PyObject *_res; \
1198 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1199 if (self->raw != NULL) \
1200 r = _PyFileIO_closed(self->raw); \
1201 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001202 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203 if (_res == NULL) \
1204 return NULL; \
1205 r = PyObject_IsTrue(_res); \
1206 Py_DECREF(_res); \
1207 if (r < 0) \
1208 return NULL; \
1209 } \
1210 if (r > 0) { \
1211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on closed file."); \
1213 return NULL; \
1214 } \
1215 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 return NULL; \
1218 } while (0)
1219
1220#define CHECK_INITIALIZED(self) \
1221 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001222 PyErr_SetString(PyExc_ValueError, \
1223 "I/O operation on uninitialized object"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224 return NULL; \
1225 }
1226
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001227#define CHECK_ATTACHED(self) \
1228 CHECK_INITIALIZED(self); \
1229 if (self->detached) { \
1230 PyErr_SetString(PyExc_ValueError, \
1231 "underlying buffer has been detached"); \
1232 return NULL; \
1233 }
1234
1235#define CHECK_ATTACHED_INT(self) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 if (self->ok <= 0) { \
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001237 PyErr_SetString(PyExc_ValueError, \
1238 "I/O operation on uninitialized object"); \
1239 return -1; \
1240 } else if (self->detached) { \
1241 PyErr_SetString(PyExc_ValueError, \
1242 "underlying buffer has been detached"); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return -1; \
1244 }
1245
1246
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001247static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001248textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001249{
1250 PyObject *buffer, *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001251 CHECK_ATTACHED(self);
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001252 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1253 if (res == NULL)
1254 return NULL;
1255 Py_DECREF(res);
1256 buffer = self->buffer;
1257 self->buffer = NULL;
1258 self->detached = 1;
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001259 return buffer;
1260}
1261
Antoine Pitrou24f36292009-03-28 22:16:42 +00001262/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 underlying buffered object, though. */
1264static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001265_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001267 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268
1269 if (self->pending_bytes == NULL)
1270 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001271
1272 pending = self->pending_bytes;
1273 Py_INCREF(pending);
1274 self->pending_bytes_count = 0;
1275 Py_CLEAR(self->pending_bytes);
1276
1277 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1278 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 if (b == NULL)
1280 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001281 ret = NULL;
1282 do {
1283 ret = PyObject_CallMethodObjArgs(self->buffer,
1284 _PyIO_str_write, b, NULL);
1285 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 Py_DECREF(b);
1287 if (ret == NULL)
1288 return -1;
1289 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 return 0;
1291}
1292
1293static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001294textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295{
1296 PyObject *ret;
1297 PyObject *text; /* owned reference */
1298 PyObject *b;
1299 Py_ssize_t textlen;
1300 int haslf = 0;
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001301 int needflush = 0, text_needflush = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001303 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1306 return NULL;
1307 }
1308
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001309 if (PyUnicode_READY(text) == -1)
1310 return NULL;
1311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 CHECK_CLOSED(self);
1313
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001314 if (self->encoder == NULL)
1315 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001316
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 Py_INCREF(text);
1318
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001319 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001320
1321 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 haslf = 1;
1324
1325 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001326 PyObject *newtext = _PyObject_CallMethodId(
1327 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 Py_DECREF(text);
1329 if (newtext == NULL)
1330 return NULL;
1331 text = newtext;
1332 }
1333
Antoine Pitroue96ec682011-07-23 21:46:35 +02001334 if (self->write_through)
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001335 text_needflush = 1;
1336 if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001337 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001338 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 needflush = 1;
1340
1341 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001342 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001344 self->encoding_start_of_stream = 0;
1345 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 else
1347 b = PyObject_CallMethodObjArgs(self->encoder,
1348 _PyIO_str_encode, text, NULL);
1349 Py_DECREF(text);
1350 if (b == NULL)
1351 return NULL;
1352
1353 if (self->pending_bytes == NULL) {
1354 self->pending_bytes = PyList_New(0);
1355 if (self->pending_bytes == NULL) {
1356 Py_DECREF(b);
1357 return NULL;
1358 }
1359 self->pending_bytes_count = 0;
1360 }
1361 if (PyList_Append(self->pending_bytes, b) < 0) {
1362 Py_DECREF(b);
1363 return NULL;
1364 }
1365 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1366 Py_DECREF(b);
Antoine Pitrouc644e7c2014-05-09 00:24:50 +02001367 if (self->pending_bytes_count > self->chunk_size || needflush ||
1368 text_needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001369 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001370 return NULL;
1371 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001372
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373 if (needflush) {
1374 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1375 if (ret == NULL)
1376 return NULL;
1377 Py_DECREF(ret);
1378 }
1379
1380 Py_CLEAR(self->snapshot);
1381
1382 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001383 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384 if (ret == NULL)
1385 return NULL;
1386 Py_DECREF(ret);
1387 }
1388
1389 return PyLong_FromSsize_t(textlen);
1390}
1391
1392/* Steal a reference to chars and store it in the decoded_char buffer;
1393 */
1394static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001395textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396{
1397 Py_CLEAR(self->decoded_chars);
1398 self->decoded_chars = chars;
1399 self->decoded_chars_used = 0;
1400}
1401
1402static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001403textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404{
1405 PyObject *chars;
1406 Py_ssize_t avail;
1407
1408 if (self->decoded_chars == NULL)
1409 return PyUnicode_FromStringAndSize(NULL, 0);
1410
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001411 /* decoded_chars is guaranteed to be "ready". */
1412 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001413 - self->decoded_chars_used);
1414
1415 assert(avail >= 0);
1416
1417 if (n < 0 || n > avail)
1418 n = avail;
1419
1420 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001421 chars = PyUnicode_Substring(self->decoded_chars,
1422 self->decoded_chars_used,
1423 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 if (chars == NULL)
1425 return NULL;
1426 }
1427 else {
1428 chars = self->decoded_chars;
1429 Py_INCREF(chars);
1430 }
1431
1432 self->decoded_chars_used += n;
1433 return chars;
1434}
1435
1436/* Read and decode the next chunk of data from the BufferedReader.
1437 */
1438static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001439textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440{
1441 PyObject *dec_buffer = NULL;
1442 PyObject *dec_flags = NULL;
1443 PyObject *input_chunk = NULL;
Antoine Pitroub8503892014-04-29 10:14:02 +02001444 Py_buffer input_chunk_buf;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001446 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447 int eof;
1448
1449 /* The return value is True unless EOF was reached. The decoded string is
1450 * placed in self._decoded_chars (replacing its previous value). The
1451 * entire input chunk is sent to the decoder, though some of it may remain
1452 * buffered in the decoder, yet to be converted.
1453 */
1454
1455 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001456 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001457 return -1;
1458 }
1459
1460 if (self->telling) {
1461 /* To prepare for tell(), we need to snapshot a point in the file
1462 * where the decoder's input buffer is empty.
1463 */
1464
1465 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1466 _PyIO_str_getstate, NULL);
1467 if (state == NULL)
1468 return -1;
1469 /* Given this, we know there was a valid snapshot point
1470 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1471 */
1472 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1473 Py_DECREF(state);
1474 return -1;
1475 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001476
1477 if (!PyBytes_Check(dec_buffer)) {
1478 PyErr_Format(PyExc_TypeError,
1479 "decoder getstate() should have returned a bytes "
1480 "object, not '%.200s'",
1481 Py_TYPE(dec_buffer)->tp_name);
1482 Py_DECREF(state);
1483 return -1;
1484 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001485 Py_INCREF(dec_buffer);
1486 Py_INCREF(dec_flags);
1487 Py_DECREF(state);
1488 }
1489
1490 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001491 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001492 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001493 }
1494 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495 if (chunk_size == NULL)
1496 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001497
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001499 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1500 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 Py_DECREF(chunk_size);
1502 if (input_chunk == NULL)
1503 goto fail;
Antoine Pitroub8503892014-04-29 10:14:02 +02001504
1505 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001506 PyErr_Format(PyExc_TypeError,
Antoine Pitroub8503892014-04-29 10:14:02 +02001507 "underlying %s() should have returned a bytes-like object, "
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001508 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1509 Py_TYPE(input_chunk)->tp_name);
1510 goto fail;
1511 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512
Antoine Pitroub8503892014-04-29 10:14:02 +02001513 nbytes = input_chunk_buf.len;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001514 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1516 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1517 self->decoder, input_chunk, eof);
1518 }
1519 else {
1520 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1521 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1522 }
Antoine Pitroub8503892014-04-29 10:14:02 +02001523 PyBuffer_Release(&input_chunk_buf);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001524
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001525 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001526 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001527 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001528 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001529 if (nchars > 0)
1530 self->b2cratio = (double) nbytes / nchars;
1531 else
1532 self->b2cratio = 0.0;
1533 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 eof = 0;
1535
1536 if (self->telling) {
1537 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1538 * next input to be decoded is dec_buffer + input_chunk.
1539 */
Antoine Pitroub8503892014-04-29 10:14:02 +02001540 PyObject *next_input = dec_buffer;
1541 PyBytes_Concat(&next_input, input_chunk);
1542 if (next_input == NULL) {
1543 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001544 goto fail;
1545 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 Py_CLEAR(self->snapshot);
1547 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1548 }
1549 Py_DECREF(input_chunk);
1550
1551 return (eof == 0);
1552
1553 fail:
1554 Py_XDECREF(dec_buffer);
1555 Py_XDECREF(dec_flags);
1556 Py_XDECREF(input_chunk);
1557 return -1;
1558}
1559
1560static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001561textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001562{
1563 Py_ssize_t n = -1;
1564 PyObject *result = NULL, *chunks = NULL;
1565
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001566 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001568 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 return NULL;
1570
1571 CHECK_CLOSED(self);
1572
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001573 if (self->decoder == NULL)
1574 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001575
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001576 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 return NULL;
1578
1579 if (n < 0) {
1580 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001581 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582 PyObject *decoded;
1583 if (bytes == NULL)
1584 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001585
1586 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1587 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1588 bytes, 1);
1589 else
1590 decoded = PyObject_CallMethodObjArgs(
1591 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001593 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594 goto fail;
1595
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001596 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597
1598 if (result == NULL) {
1599 Py_DECREF(decoded);
1600 return NULL;
1601 }
1602
1603 PyUnicode_AppendAndDel(&result, decoded);
1604 if (result == NULL)
1605 goto fail;
1606
1607 Py_CLEAR(self->snapshot);
1608 return result;
1609 }
1610 else {
1611 int res = 1;
1612 Py_ssize_t remaining = n;
1613
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001614 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615 if (result == NULL)
1616 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001617 if (PyUnicode_READY(result) == -1)
1618 goto fail;
1619 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620
1621 /* Keep reading chunks until we have n characters to return */
1622 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001623 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001624 if (res < 0) {
1625 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1626 when EINTR occurs so we needn't do it ourselves. */
1627 if (_PyIO_trap_eintr()) {
1628 continue;
1629 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001631 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 if (res == 0) /* EOF */
1633 break;
1634 if (chunks == NULL) {
1635 chunks = PyList_New(0);
1636 if (chunks == NULL)
1637 goto fail;
1638 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001639 if (PyUnicode_GET_LENGTH(result) > 0 &&
1640 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 goto fail;
1642 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001643 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 if (result == NULL)
1645 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001646 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 }
1648 if (chunks != NULL) {
1649 if (result != NULL && PyList_Append(chunks, result) < 0)
1650 goto fail;
1651 Py_CLEAR(result);
1652 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1653 if (result == NULL)
1654 goto fail;
1655 Py_CLEAR(chunks);
1656 }
1657 return result;
1658 }
1659 fail:
1660 Py_XDECREF(result);
1661 Py_XDECREF(chunks);
1662 return NULL;
1663}
1664
1665
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001666/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 that is to the NUL character. Otherwise the function will produce
1668 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001669static char *
1670find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001672 if (kind == PyUnicode_1BYTE_KIND) {
1673 assert(ch < 256);
1674 return (char *) memchr((void *) s, (char) ch, end - s);
1675 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001677 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001678 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001679 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 return s;
1681 if (s == end)
1682 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001683 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 }
1685}
1686
1687Py_ssize_t
1688_PyIO_find_line_ending(
1689 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693
1694 if (translated) {
1695 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 else {
1700 *consumed = len;
1701 return -1;
1702 }
1703 }
1704 else if (universal) {
1705 /* Universal newline search. Find any of \r, \r\n, \n
1706 * The decoder ensures that \r\n are not split in two pieces
1707 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001708 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001710 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001712 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001713 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001714 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 if (s >= end) {
1716 *consumed = len;
1717 return -1;
1718 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001719 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001720 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001722 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001724 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001725 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001727 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 }
1729 }
1730 }
1731 else {
1732 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001733 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
Victor Stinner706768c2014-08-16 01:03:39 +02001734 Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001735 /* Assume that readnl is an ASCII character. */
1736 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001738 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001740 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 *consumed = len;
1742 return -1;
1743 }
1744 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001746 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001747 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748 if (e < s)
1749 e = s;
1750 while (s < e) {
1751 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001752 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 if (pos == NULL || pos >= e)
1754 break;
1755 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001756 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757 break;
1758 }
1759 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001760 return (pos - start)/kind + readnl_len;
1761 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 if (pos == NULL)
1765 *consumed = len;
1766 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001767 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 return -1;
1769 }
1770 }
1771}
1772
1773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001774_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775{
1776 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1777 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1778 int res;
1779
1780 CHECK_CLOSED(self);
1781
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001782 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 return NULL;
1784
1785 chunked = 0;
1786
1787 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001788 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 Py_ssize_t consumed = 0;
1792
1793 /* First, get some data if necessary */
1794 res = 1;
1795 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001796 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001797 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001798 if (res < 0) {
1799 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1800 when EINTR occurs so we needn't do it ourselves. */
1801 if (_PyIO_trap_eintr()) {
1802 continue;
1803 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001804 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001805 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 if (res == 0)
1807 break;
1808 }
1809 if (res == 0) {
1810 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001811 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 Py_CLEAR(self->snapshot);
1813 start = endpos = offset_to_buffer = 0;
1814 break;
1815 }
1816
1817 if (remaining == NULL) {
1818 line = self->decoded_chars;
1819 start = self->decoded_chars_used;
1820 offset_to_buffer = 0;
1821 Py_INCREF(line);
1822 }
1823 else {
1824 assert(self->decoded_chars_used == 0);
1825 line = PyUnicode_Concat(remaining, self->decoded_chars);
1826 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001827 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 Py_CLEAR(remaining);
1829 if (line == NULL)
1830 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001831 if (PyUnicode_READY(line) == -1)
1832 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833 }
1834
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001835 ptr = PyUnicode_DATA(line);
1836 line_len = PyUnicode_GET_LENGTH(line);
1837 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838
1839 endpos = _PyIO_find_line_ending(
1840 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001841 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001842 ptr + kind * start,
1843 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001844 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845 if (endpos >= 0) {
1846 endpos += start;
1847 if (limit >= 0 && (endpos - start) + chunked >= limit)
1848 endpos = start + limit - chunked;
1849 break;
1850 }
1851
1852 /* We can put aside up to `endpos` */
1853 endpos = consumed + start;
1854 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1855 /* Didn't find line ending, but reached length limit */
1856 endpos = start + limit - chunked;
1857 break;
1858 }
1859
1860 if (endpos > start) {
1861 /* No line ending seen yet - put aside current data */
1862 PyObject *s;
1863 if (chunks == NULL) {
1864 chunks = PyList_New(0);
1865 if (chunks == NULL)
1866 goto error;
1867 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001868 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 if (s == NULL)
1870 goto error;
1871 if (PyList_Append(chunks, s) < 0) {
1872 Py_DECREF(s);
1873 goto error;
1874 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001876 Py_DECREF(s);
1877 }
1878 /* There may be some remaining bytes we'll have to prepend to the
1879 next chunk of data */
1880 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001881 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882 if (remaining == NULL)
1883 goto error;
1884 }
1885 Py_CLEAR(line);
1886 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001887 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888 }
1889
1890 if (line != NULL) {
1891 /* Our line ends in the current buffer */
1892 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001893 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1894 PyObject *s = PyUnicode_Substring(line, start, endpos);
1895 Py_CLEAR(line);
1896 if (s == NULL)
1897 goto error;
1898 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 }
1900 }
1901 if (remaining != NULL) {
1902 if (chunks == NULL) {
1903 chunks = PyList_New(0);
1904 if (chunks == NULL)
1905 goto error;
1906 }
1907 if (PyList_Append(chunks, remaining) < 0)
1908 goto error;
1909 Py_CLEAR(remaining);
1910 }
1911 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001912 if (line != NULL) {
1913 if (PyList_Append(chunks, line) < 0)
1914 goto error;
1915 Py_DECREF(line);
1916 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1918 if (line == NULL)
1919 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001920 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001922 if (line == NULL) {
1923 Py_INCREF(_PyIO_empty_str);
1924 line = _PyIO_empty_str;
1925 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001926
1927 return line;
1928
1929 error:
1930 Py_XDECREF(chunks);
1931 Py_XDECREF(remaining);
1932 Py_XDECREF(line);
1933 return NULL;
1934}
1935
1936static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001937textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001938{
1939 Py_ssize_t limit = -1;
1940
Benjamin Peterson10e76b62014-12-21 20:51:50 -06001941 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1943 return NULL;
1944 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001945 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946}
1947
1948/* Seek and Tell */
1949
1950typedef struct {
1951 Py_off_t start_pos;
1952 int dec_flags;
1953 int bytes_to_feed;
1954 int chars_to_skip;
1955 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001956} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957
1958/*
1959 To speed up cookie packing/unpacking, we store the fields in a temporary
1960 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1961 The following macros define at which offsets in the intermediary byte
1962 string the various CookieStruct fields will be stored.
1963 */
1964
1965#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1966
Christian Heimes743e0cd2012-10-17 23:52:17 +02001967#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968/* We want the least significant byte of start_pos to also be the least
1969 significant byte of the cookie, which means that in big-endian mode we
1970 must copy the fields in reverse order. */
1971
1972# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1973# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1974# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1975# define OFF_CHARS_TO_SKIP (sizeof(char))
1976# define OFF_NEED_EOF 0
1977
1978#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979/* Little-endian mode: the least significant byte of start_pos will
1980 naturally end up the least significant byte of the cookie. */
1981
1982# define OFF_START_POS 0
1983# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1984# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1985# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1986# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1987
1988#endif
1989
1990static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001991textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992{
1993 unsigned char buffer[COOKIE_BUF_LEN];
1994 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1995 if (cookieLong == NULL)
1996 return -1;
1997
1998 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001999 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 Py_DECREF(cookieLong);
2001 return -1;
2002 }
2003 Py_DECREF(cookieLong);
2004
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002005 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2006 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2007 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2008 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2009 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010
2011 return 0;
2012}
2013
2014static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002015textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016{
2017 unsigned char buffer[COOKIE_BUF_LEN];
2018
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002019 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2020 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2021 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2022 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2023 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024
Christian Heimes743e0cd2012-10-17 23:52:17 +02002025 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2026 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028
2029static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002030_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031{
2032 PyObject *res;
2033 /* When seeking to the start of the stream, we call decoder.reset()
2034 rather than decoder.getstate().
2035 This is for a few decoders such as utf-16 for which the state value
2036 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2037 utf-16, that we are expecting a BOM).
2038 */
2039 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2040 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2041 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002042 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2043 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (res == NULL)
2045 return -1;
2046 Py_DECREF(res);
2047 return 0;
2048}
2049
Antoine Pitroue4501852009-05-14 18:55:55 +00002050static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002051_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002052{
2053 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002054 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002055 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2056 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2057 self->encoding_start_of_stream = 1;
2058 }
2059 else {
2060 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2061 _PyIO_zero, NULL);
2062 self->encoding_start_of_stream = 0;
2063 }
2064 if (res == NULL)
2065 return -1;
2066 Py_DECREF(res);
2067 return 0;
2068}
2069
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002071textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072{
2073 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002074 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 PyObject *res;
2077 int cmp;
2078
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002079 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2082 return NULL;
2083 CHECK_CLOSED(self);
2084
2085 Py_INCREF(cookieObj);
2086
2087 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002088 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 goto fail;
2090 }
2091
2092 if (whence == 1) {
2093 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002094 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 if (cmp < 0)
2096 goto fail;
2097
2098 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002099 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002100 goto fail;
2101 }
2102
2103 /* Seeking to the current position should attempt to
2104 * sync the underlying buffer with the current position.
2105 */
2106 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002107 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 if (cookieObj == NULL)
2109 goto fail;
2110 }
2111 else if (whence == 2) {
2112 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002113 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 if (cmp < 0)
2115 goto fail;
2116
2117 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002118 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 goto fail;
2120 }
2121
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002122 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 if (res == NULL)
2124 goto fail;
2125 Py_DECREF(res);
2126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002127 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128 Py_CLEAR(self->snapshot);
2129 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002130 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002131 if (res == NULL)
2132 goto fail;
2133 Py_DECREF(res);
2134 }
2135
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002136 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002137 Py_XDECREF(cookieObj);
2138 return res;
2139 }
2140 else if (whence != 0) {
2141 PyErr_Format(PyExc_ValueError,
2142 "invalid whence (%d, should be 0, 1 or 2)", whence);
2143 goto fail;
2144 }
2145
Antoine Pitroue4501852009-05-14 18:55:55 +00002146 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 if (cmp < 0)
2148 goto fail;
2149
2150 if (cmp == 1) {
2151 PyErr_Format(PyExc_ValueError,
2152 "negative seek position %R", cookieObj);
2153 goto fail;
2154 }
2155
2156 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2157 if (res == NULL)
2158 goto fail;
2159 Py_DECREF(res);
2160
2161 /* The strategy of seek() is to go back to the safe start point
2162 * and replay the effect of read(chars_to_skip) from there.
2163 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002164 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002165 goto fail;
2166
2167 /* Seek back to the safe start point. */
2168 posobj = PyLong_FromOff_t(cookie.start_pos);
2169 if (posobj == NULL)
2170 goto fail;
2171 res = PyObject_CallMethodObjArgs(self->buffer,
2172 _PyIO_str_seek, posobj, NULL);
2173 Py_DECREF(posobj);
2174 if (res == NULL)
2175 goto fail;
2176 Py_DECREF(res);
2177
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002178 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002179 Py_CLEAR(self->snapshot);
2180
2181 /* Restore the decoder to its state from the safe start point. */
2182 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002183 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002184 goto fail;
2185 }
2186
2187 if (cookie.chars_to_skip) {
2188 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002189 PyObject *input_chunk = _PyObject_CallMethodId(
2190 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 PyObject *decoded;
2192
2193 if (input_chunk == NULL)
2194 goto fail;
2195
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002196 if (!PyBytes_Check(input_chunk)) {
2197 PyErr_Format(PyExc_TypeError,
2198 "underlying read() should have returned a bytes "
2199 "object, not '%.200s'",
2200 Py_TYPE(input_chunk)->tp_name);
2201 Py_DECREF(input_chunk);
2202 goto fail;
2203 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002204
2205 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2206 if (self->snapshot == NULL) {
2207 Py_DECREF(input_chunk);
2208 goto fail;
2209 }
2210
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002211 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2212 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002214 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 goto fail;
2216
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002217 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002218
2219 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002220 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002221 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2222 goto fail;
2223 }
2224 self->decoded_chars_used = cookie.chars_to_skip;
2225 }
2226 else {
2227 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2228 if (self->snapshot == NULL)
2229 goto fail;
2230 }
2231
Antoine Pitroue4501852009-05-14 18:55:55 +00002232 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2233 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002234 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002235 goto fail;
2236 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237 return cookieObj;
2238 fail:
2239 Py_XDECREF(cookieObj);
2240 return NULL;
2241
2242}
2243
2244static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002245textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246{
2247 PyObject *res;
2248 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002249 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002250 PyObject *next_input;
2251 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002252 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253 PyObject *saved_state = NULL;
2254 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002255 char *dec_buffer;
2256 Py_ssize_t dec_buffer_len;
2257 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002259 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260 CHECK_CLOSED(self);
2261
2262 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002263 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002264 goto fail;
2265 }
2266 if (!self->telling) {
2267 PyErr_SetString(PyExc_IOError,
2268 "telling position disabled by next() call");
2269 goto fail;
2270 }
2271
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002272 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002274 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002275 if (res == NULL)
2276 goto fail;
2277 Py_DECREF(res);
2278
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002279 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002280 if (posobj == NULL)
2281 goto fail;
2282
2283 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002284 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285 return posobj;
2286 }
2287
2288#if defined(HAVE_LARGEFILE_SUPPORT)
2289 cookie.start_pos = PyLong_AsLongLong(posobj);
2290#else
2291 cookie.start_pos = PyLong_AsLong(posobj);
2292#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002293 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294 if (PyErr_Occurred())
2295 goto fail;
2296
2297 /* Skip backward to the snapshot point (see _read_chunk). */
2298 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2299 goto fail;
2300
2301 assert (PyBytes_Check(next_input));
2302
2303 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2304
2305 /* How many decoded characters have been used up since the snapshot? */
2306 if (self->decoded_chars_used == 0) {
2307 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002308 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309 }
2310
2311 chars_to_skip = self->decoded_chars_used;
2312
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002313 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2315 _PyIO_str_getstate, NULL);
2316 if (saved_state == NULL)
2317 goto fail;
2318
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002319#define DECODER_GETSTATE() do { \
2320 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2321 _PyIO_str_getstate, NULL); \
2322 if (_state == NULL) \
2323 goto fail; \
2324 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2325 Py_DECREF(_state); \
2326 goto fail; \
2327 } \
2328 Py_DECREF(_state); \
2329 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002331#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002332 PyObject *_decoded = _PyObject_CallMethodId( \
2333 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002334 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002335 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002336 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002337 Py_DECREF(_decoded); \
2338 } while (0)
2339
2340 /* Fast search for an acceptable start point, close to our
2341 current pos */
2342 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2343 skip_back = 1;
2344 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2345 input = PyBytes_AS_STRING(next_input);
2346 while (skip_bytes > 0) {
2347 /* Decode up to temptative start point */
2348 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2349 goto fail;
2350 DECODER_DECODE(input, skip_bytes, chars_decoded);
2351 if (chars_decoded <= chars_to_skip) {
2352 DECODER_GETSTATE();
2353 if (dec_buffer_len == 0) {
2354 /* Before pos and no bytes buffered in decoder => OK */
2355 cookie.dec_flags = dec_flags;
2356 chars_to_skip -= chars_decoded;
2357 break;
2358 }
2359 /* Skip back by buffered amount and reset heuristic */
2360 skip_bytes -= dec_buffer_len;
2361 skip_back = 1;
2362 }
2363 else {
2364 /* We're too far ahead, skip back a bit */
2365 skip_bytes -= skip_back;
2366 skip_back *= 2;
2367 }
2368 }
2369 if (skip_bytes <= 0) {
2370 skip_bytes = 0;
2371 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2372 goto fail;
2373 }
2374
2375 /* Note our initial start point. */
2376 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002377 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002378 if (chars_to_skip == 0)
2379 goto finally;
2380
2381 /* We should be close to the desired position. Now feed the decoder one
2382 * byte at a time until we reach the `chars_to_skip` target.
2383 * As we go, note the nearest "safe start point" before the current
2384 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002385 * can safely start from there and advance to this location).
2386 */
2387 chars_decoded = 0;
2388 input = PyBytes_AS_STRING(next_input);
2389 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002390 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002392 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002394 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002395 /* We got n chars for 1 byte */
2396 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002398 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399
2400 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2401 /* Decoder buffer is empty, so this is a safe start point. */
2402 cookie.start_pos += cookie.bytes_to_feed;
2403 chars_to_skip -= chars_decoded;
2404 cookie.dec_flags = dec_flags;
2405 cookie.bytes_to_feed = 0;
2406 chars_decoded = 0;
2407 }
2408 if (chars_decoded >= chars_to_skip)
2409 break;
2410 input++;
2411 }
2412 if (input == input_end) {
2413 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002414 PyObject *decoded = _PyObject_CallMethodId(
2415 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002416 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002417 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002418 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002419 Py_DECREF(decoded);
2420 cookie.need_eof = 1;
2421
2422 if (chars_decoded < chars_to_skip) {
2423 PyErr_SetString(PyExc_IOError,
2424 "can't reconstruct logical file position");
2425 goto fail;
2426 }
2427 }
2428
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002429finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002430 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002431 Py_DECREF(saved_state);
2432 if (res == NULL)
2433 return NULL;
2434 Py_DECREF(res);
2435
2436 /* The returned cookie corresponds to the last safe start point. */
2437 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002438 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002439
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002440fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002441 if (saved_state) {
2442 PyObject *type, *value, *traceback;
2443 PyErr_Fetch(&type, &value, &traceback);
2444
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002445 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446 Py_DECREF(saved_state);
2447 if (res == NULL)
2448 return NULL;
2449 Py_DECREF(res);
2450
2451 PyErr_Restore(type, value, traceback);
2452 }
2453 return NULL;
2454}
2455
2456static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002457textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458{
2459 PyObject *pos = Py_None;
2460 PyObject *res;
2461
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002462 CHECK_ATTACHED(self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2464 return NULL;
2465 }
2466
2467 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2468 if (res == NULL)
2469 return NULL;
2470 Py_DECREF(res);
2471
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002472 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473}
2474
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002476textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002477{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002478 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002479
2480 CHECK_INITIALIZED(self);
2481
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002482 res = PyUnicode_FromString("<_io.TextIOWrapper");
2483 if (res == NULL)
2484 return NULL;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002485
Martin v. Löwis767046a2011-10-14 15:35:36 +02002486 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002487 if (nameobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002488 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitrou716c4442009-05-23 19:04:03 +00002489 PyErr_Clear();
2490 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002491 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002492 }
2493 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002494 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002495 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002496 if (s == NULL)
2497 goto error;
2498 PyUnicode_AppendAndDel(&res, s);
2499 if (res == NULL)
2500 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002501 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002502 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002503 if (modeobj == NULL) {
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002504 if (PyErr_ExceptionMatches(PyExc_Exception))
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002505 PyErr_Clear();
2506 else
2507 goto error;
2508 }
2509 else {
2510 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2511 Py_DECREF(modeobj);
2512 if (s == NULL)
2513 goto error;
2514 PyUnicode_AppendAndDel(&res, s);
2515 if (res == NULL)
2516 return NULL;
2517 }
2518 s = PyUnicode_FromFormat("%U encoding=%R>",
2519 res, self->encoding);
2520 Py_DECREF(res);
2521 return s;
2522error:
2523 Py_XDECREF(res);
2524 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002525}
2526
2527
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528/* Inquiries */
2529
2530static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002531textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002533 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002534 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535}
2536
2537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002538textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002540 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002541 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542}
2543
2544static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002545textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002547 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002548 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549}
2550
2551static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002552textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002554 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002555 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556}
2557
2558static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002559textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002561 CHECK_ATTACHED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002562 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563}
2564
2565static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002566textiowrapper_getstate(textio *self, PyObject *args)
2567{
2568 PyErr_Format(PyExc_TypeError,
2569 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2570 return NULL;
2571}
2572
2573static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002574textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002576 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002577 CHECK_CLOSED(self);
2578 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002581 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002582}
2583
2584static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586{
2587 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002588 int r;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002589 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590
Antoine Pitrou6be88762010-05-03 16:48:20 +00002591 res = textiowrapper_closed_get(self, NULL);
2592 if (res == NULL)
2593 return NULL;
2594 r = PyObject_IsTrue(res);
2595 Py_DECREF(res);
2596 if (r < 0)
2597 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002598
Antoine Pitrou6be88762010-05-03 16:48:20 +00002599 if (r > 0) {
2600 Py_RETURN_NONE; /* stream already closed */
2601 }
2602 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002603 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002604 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002605 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002606 if (res)
2607 Py_DECREF(res);
2608 else
2609 PyErr_Clear();
2610 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002611 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002612 if (res == NULL)
2613 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002614 else
2615 Py_DECREF(res);
2616
Benjamin Peterson68623612012-12-20 11:53:11 -06002617 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2618 if (exc != NULL) {
Serhiy Storchakae2bd2a72014-10-08 22:31:52 +03002619 _PyErr_ChainExceptions(exc, val, tb);
2620 Py_CLEAR(res);
Benjamin Peterson68623612012-12-20 11:53:11 -06002621 }
2622 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002623 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624}
2625
2626static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002627textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002628{
2629 PyObject *line;
2630
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002631 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632
2633 self->telling = 0;
2634 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2635 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002636 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637 }
2638 else {
2639 line = PyObject_CallMethodObjArgs((PyObject *)self,
2640 _PyIO_str_readline, NULL);
2641 if (line && !PyUnicode_Check(line)) {
2642 PyErr_Format(PyExc_IOError,
2643 "readline() should have returned an str object, "
2644 "not '%.200s'", Py_TYPE(line)->tp_name);
2645 Py_DECREF(line);
2646 return NULL;
2647 }
2648 }
2649
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002650 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002651 return NULL;
2652
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002653 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654 /* Reached EOF or would have blocked */
2655 Py_DECREF(line);
2656 Py_CLEAR(self->snapshot);
2657 self->telling = self->seekable;
2658 return NULL;
2659 }
2660
2661 return line;
2662}
2663
2664static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002665textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002666{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002667 CHECK_ATTACHED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002668 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669}
2670
2671static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002672textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002674 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2676}
2677
2678static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002679textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680{
2681 PyObject *res;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002682 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002683 if (self->decoder == NULL)
2684 Py_RETURN_NONE;
2685 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2686 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002687 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2688 PyErr_Clear();
2689 Py_RETURN_NONE;
2690 }
2691 else {
2692 return NULL;
2693 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694 }
2695 return res;
2696}
2697
2698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002699textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002700{
2701 CHECK_INITIALIZED(self);
2702 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2703}
2704
2705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002706textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707{
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002708 CHECK_ATTACHED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002709 return PyLong_FromSsize_t(self->chunk_size);
2710}
2711
2712static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002713textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714{
2715 Py_ssize_t n;
Benjamin Peterson10e76b62014-12-21 20:51:50 -06002716 CHECK_ATTACHED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002717 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718 if (n == -1 && PyErr_Occurred())
2719 return -1;
2720 if (n <= 0) {
2721 PyErr_SetString(PyExc_ValueError,
2722 "a strictly positive integer is required");
2723 return -1;
2724 }
2725 self->chunk_size = n;
2726 return 0;
2727}
2728
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002729static PyMethodDef textiowrapper_methods[] = {
2730 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2731 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2732 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2733 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2734 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2735 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002737 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2738 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2739 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2740 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2741 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002742 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002744 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2745 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2746 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002747 {NULL, NULL}
2748};
2749
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002750static PyMemberDef textiowrapper_members[] = {
2751 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2752 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2753 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002754 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002755 {NULL}
2756};
2757
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002758static PyGetSetDef textiowrapper_getset[] = {
2759 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2760 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2762*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002763 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2764 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2765 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2766 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002767 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768};
2769
2770PyTypeObject PyTextIOWrapper_Type = {
2771 PyVarObject_HEAD_INIT(NULL, 0)
2772 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002773 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002774 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002775 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002776 0, /*tp_print*/
2777 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002778 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002779 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002780 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002781 0, /*tp_as_number*/
2782 0, /*tp_as_sequence*/
2783 0, /*tp_as_mapping*/
2784 0, /*tp_hash */
2785 0, /*tp_call*/
2786 0, /*tp_str*/
2787 0, /*tp_getattro*/
2788 0, /*tp_setattro*/
2789 0, /*tp_as_buffer*/
2790 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002791 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002792 textiowrapper_doc, /* tp_doc */
2793 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2794 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002795 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002796 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002797 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002798 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2799 textiowrapper_methods, /* tp_methods */
2800 textiowrapper_members, /* tp_members */
2801 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002802 0, /* tp_base */
2803 0, /* tp_dict */
2804 0, /* tp_descr_get */
2805 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002806 offsetof(textio, dict), /*tp_dictoffset*/
2807 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002808 0, /* tp_alloc */
2809 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002810 0, /* tp_free */
2811 0, /* tp_is_gc */
2812 0, /* tp_bases */
2813 0, /* tp_mro */
2814 0, /* tp_cache */
2815 0, /* tp_subclasses */
2816 0, /* tp_weaklist */
2817 0, /* tp_del */
2818 0, /* tp_version_tag */
2819 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002820};