blob: 747f62323cbc16d3e2bcb1dfc4f1e17c48a6ce8c [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
Antoine Pitrou712cb732013-12-21 15:51:54 +010048 _PyIO_State *state = IO_STATE();
49 if (state != NULL)
50 PyErr_SetString(state->unsupported_operation, message);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000051 return NULL;
52}
53
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000054PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000055 "Separate the underlying buffer from the TextIOBase and return it.\n"
56 "\n"
57 "After the underlying buffer has been detached, the TextIO is in an\n"
58 "unusable state.\n"
59 );
60
61static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000063{
64 return _unsupported("detach");
65}
66
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000067PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000068 "Read at most n characters from stream.\n"
69 "\n"
70 "Read from underlying buffer until we have n characters or we hit EOF.\n"
71 "If n is negative or omitted, read until EOF.\n"
72 );
73
74static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076{
77 return _unsupported("read");
78}
79
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000080PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000081 "Read until newline or EOF.\n"
82 "\n"
83 "Returns an empty string if EOF is hit immediately.\n"
84 );
85
86static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088{
89 return _unsupported("readline");
90}
91
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000092PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000093 "Write string to stream.\n"
94 "Returns the number of characters written (which is always equal to\n"
95 "the length of the string).\n"
96 );
97
98static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100{
101 return _unsupported("write");
102}
103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000104PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000105 "Encoding of the text stream.\n"
106 "\n"
107 "Subclasses should override.\n"
108 );
109
110static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112{
113 Py_RETURN_NONE;
114}
115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000116PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000117 "Line endings translated so far.\n"
118 "\n"
119 "Only line endings translated during reading are considered.\n"
120 "\n"
121 "Subclasses should override.\n"
122 );
123
124static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000126{
127 Py_RETURN_NONE;
128}
129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000130PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000131 "The error setting of the decoder or encoder.\n"
132 "\n"
133 "Subclasses should override.\n"
134 );
135
136static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000137textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000138{
139 Py_RETURN_NONE;
140}
141
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000142
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000143static PyMethodDef textiobase_methods[] = {
144 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
145 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
146 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
147 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000148 {NULL, NULL}
149};
150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000151static PyGetSetDef textiobase_getset[] = {
152 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
153 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
154 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000155 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000156};
157
158PyTypeObject PyTextIOBase_Type = {
159 PyVarObject_HEAD_INIT(NULL, 0)
160 "_io._TextIOBase", /*tp_name*/
161 0, /*tp_basicsize*/
162 0, /*tp_itemsize*/
163 0, /*tp_dealloc*/
164 0, /*tp_print*/
165 0, /*tp_getattr*/
166 0, /*tp_setattr*/
167 0, /*tp_compare */
168 0, /*tp_repr*/
169 0, /*tp_as_number*/
170 0, /*tp_as_sequence*/
171 0, /*tp_as_mapping*/
172 0, /*tp_hash */
173 0, /*tp_call*/
174 0, /*tp_str*/
175 0, /*tp_getattro*/
176 0, /*tp_setattro*/
177 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200178 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
179 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000180 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000181 0, /* tp_traverse */
182 0, /* tp_clear */
183 0, /* tp_richcompare */
184 0, /* tp_weaklistoffset */
185 0, /* tp_iter */
186 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000189 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000190 &PyIOBase_Type, /* tp_base */
191 0, /* tp_dict */
192 0, /* tp_descr_get */
193 0, /* tp_descr_set */
194 0, /* tp_dictoffset */
195 0, /* tp_init */
196 0, /* tp_alloc */
197 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200198 0, /* tp_free */
199 0, /* tp_is_gc */
200 0, /* tp_bases */
201 0, /* tp_mro */
202 0, /* tp_cache */
203 0, /* tp_subclasses */
204 0, /* tp_weaklist */
205 0, /* tp_del */
206 0, /* tp_version_tag */
207 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000208};
209
210
211/* IncrementalNewlineDecoder */
212
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000213PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000214 "Codec used when reading a file in universal newlines mode. It wraps\n"
215 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
216 "records the types of newlines encountered. When used with\n"
217 "translate=False, it ensures that the newline sequence is returned in\n"
218 "one piece. When used with decoder=None, it expects unicode strings as\n"
219 "decode input and translates newlines without first invoking an external\n"
220 "decoder.\n"
221 );
222
223typedef struct {
224 PyObject_HEAD
225 PyObject *decoder;
226 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000227 signed int pendingcr: 1;
228 signed int translate: 1;
229 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000230} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000231
232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000233incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000234 PyObject *args, PyObject *kwds)
235{
236 PyObject *decoder;
237 int translate;
238 PyObject *errors = NULL;
239 char *kwlist[] = {"decoder", "translate", "errors", NULL};
240
241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
242 kwlist, &decoder, &translate, &errors))
243 return -1;
244
245 self->decoder = decoder;
246 Py_INCREF(decoder);
247
248 if (errors == NULL) {
249 self->errors = PyUnicode_FromString("strict");
250 if (self->errors == NULL)
251 return -1;
252 }
253 else {
254 Py_INCREF(errors);
255 self->errors = errors;
256 }
257
258 self->translate = translate;
259 self->seennl = 0;
260 self->pendingcr = 0;
261
262 return 0;
263}
264
265static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000266incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267{
268 Py_CLEAR(self->decoder);
269 Py_CLEAR(self->errors);
270 Py_TYPE(self)->tp_free((PyObject *)self);
271}
272
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200273static int
274check_decoded(PyObject *decoded)
275{
276 if (decoded == NULL)
277 return -1;
278 if (!PyUnicode_Check(decoded)) {
279 PyErr_Format(PyExc_TypeError,
280 "decoder should return a string result, not '%.200s'",
281 Py_TYPE(decoded)->tp_name);
282 Py_DECREF(decoded);
283 return -1;
284 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200285 if (PyUnicode_READY(decoded) < 0) {
286 Py_DECREF(decoded);
287 return -1;
288 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200289 return 0;
290}
291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000292#define SEEN_CR 1
293#define SEEN_LF 2
294#define SEEN_CRLF 4
295#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
296
297PyObject *
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200298_PyIncrementalNewlineDecoder_decode(PyObject *myself,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 PyObject *input, int final)
300{
301 PyObject *output;
302 Py_ssize_t output_len;
Antoine Pitrou09fcb722013-10-23 19:20:21 +0200303 nldecoder_object *self = (nldecoder_object *) myself;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000304
305 if (self->decoder == NULL) {
306 PyErr_SetString(PyExc_ValueError,
307 "IncrementalNewlineDecoder.__init__ not called");
308 return NULL;
309 }
310
311 /* decode input (with the eventual \r from a previous pass) */
312 if (self->decoder != Py_None) {
313 output = PyObject_CallMethodObjArgs(self->decoder,
314 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
315 }
316 else {
317 output = input;
318 Py_INCREF(output);
319 }
320
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200321 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000322 return NULL;
323
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 /* Prefix output with CR */
327 int kind;
328 PyObject *modified;
329 char *out;
330
331 modified = PyUnicode_New(output_len + 1,
332 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000333 if (modified == NULL)
334 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200335 kind = PyUnicode_KIND(modified);
336 out = PyUnicode_DATA(modified);
337 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200338 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200340 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000341 self->pendingcr = 0;
342 output_len++;
343 }
344
345 /* retain last \r even when not translating data:
346 * then readline() is sure to get \r\n in one pass
347 */
348 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000349 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
351 {
352 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
353 if (modified == NULL)
354 goto error;
355 Py_DECREF(output);
356 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000357 self->pendingcr = 1;
358 }
359 }
360
361 /* Record which newlines are read and do newline translation if desired,
362 all in one pass. */
363 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_ssize_t len;
366 int seennl = self->seennl;
367 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 in_str = PyUnicode_DATA(output);
371 len = PyUnicode_GET_LENGTH(output);
372 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373
374 if (len == 0)
375 return output;
376
377 /* If, up to now, newlines are consistently \n, do a quick check
378 for the \r *byte* with the libc's optimized memchr.
379 */
380 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200381 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000382 }
383
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 if (only_lf) {
385 /* If not already seen, quick scan for a possible "\n" character.
386 (there's nothing else to be done, even when in translation mode)
387 */
388 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200389 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100390 if (kind == PyUnicode_1BYTE_KIND)
391 seennl |= SEEN_LF;
392 else {
393 Py_ssize_t i = 0;
394 for (;;) {
395 Py_UCS4 c;
396 /* Fast loop for non-control characters */
397 while (PyUnicode_READ(kind, in_str, i) > '\n')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
400 if (c == '\n') {
401 seennl |= SEEN_LF;
402 break;
403 }
404 if (i >= len)
405 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000406 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000407 }
408 }
409 /* Finished: we have scanned for newlines, and none of them
410 need translating */
411 }
412 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000414 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (seennl == SEEN_ALL)
416 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 while (PyUnicode_READ(kind, in_str, i) > '\r')
421 i++;
422 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000423 if (c == '\n')
424 seennl |= SEEN_LF;
425 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200428 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000429 }
430 else
431 seennl |= SEEN_CR;
432 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 break;
435 if (seennl == SEEN_ALL)
436 break;
437 }
438 endscan:
439 ;
440 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000441 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 void *translated;
443 int kind = PyUnicode_KIND(output);
444 void *in_str = PyUnicode_DATA(output);
445 Py_ssize_t in, out;
446 /* XXX: Previous in-place translation here is disabled as
447 resizing is not possible anymore */
448 /* We could try to optimize this so that we only do a copy
449 when there is something to translate. On the other hand,
450 we already know there is a \r byte, so chances are high
451 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200452 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 if (translated == NULL) {
454 PyErr_NoMemory();
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200461 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200464 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 seennl |= SEEN_LF;
466 continue;
467 }
468 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000470 in++;
471 seennl |= SEEN_CRLF;
472 }
473 else
474 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200475 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000476 continue;
477 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000481 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200482 Py_DECREF(output);
483 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100484 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200485 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200486 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 }
488 self->seennl |= seennl;
489 }
490
491 return output;
492
493 error:
494 Py_DECREF(output);
495 return NULL;
496}
497
498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000499incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000500 PyObject *args, PyObject *kwds)
501{
502 char *kwlist[] = {"input", "final", NULL};
503 PyObject *input;
504 int final = 0;
505
506 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
507 kwlist, &input, &final))
508 return NULL;
509 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 PyObject *buffer;
516 unsigned PY_LONG_LONG flag;
517
518 if (self->decoder != Py_None) {
519 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
520 _PyIO_str_getstate, NULL);
521 if (state == NULL)
522 return NULL;
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
524 Py_DECREF(state);
525 return NULL;
526 }
527 Py_INCREF(buffer);
528 Py_DECREF(state);
529 }
530 else {
531 buffer = PyBytes_FromString("");
532 flag = 0;
533 }
534 flag <<= 1;
535 if (self->pendingcr)
536 flag |= 1;
537 return Py_BuildValue("NK", buffer, flag);
538}
539
540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000541incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542{
543 PyObject *buffer;
544 unsigned PY_LONG_LONG flag;
545
546 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
547 return NULL;
548
549 self->pendingcr = (int) flag & 1;
550 flag >>= 1;
551
552 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200553 return _PyObject_CallMethodId(self->decoder,
554 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000555 else
556 Py_RETURN_NONE;
557}
558
559static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000560incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000561{
562 self->seennl = 0;
563 self->pendingcr = 0;
564 if (self->decoder != Py_None)
565 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
566 else
567 Py_RETURN_NONE;
568}
569
570static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000571incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000572{
573 switch (self->seennl) {
574 case SEEN_CR:
575 return PyUnicode_FromString("\r");
576 case SEEN_LF:
577 return PyUnicode_FromString("\n");
578 case SEEN_CRLF:
579 return PyUnicode_FromString("\r\n");
580 case SEEN_CR | SEEN_LF:
581 return Py_BuildValue("ss", "\r", "\n");
582 case SEEN_CR | SEEN_CRLF:
583 return Py_BuildValue("ss", "\r", "\r\n");
584 case SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("ss", "\n", "\r\n");
586 case SEEN_CR | SEEN_LF | SEEN_CRLF:
587 return Py_BuildValue("sss", "\r", "\n", "\r\n");
588 default:
589 Py_RETURN_NONE;
590 }
591
592}
593
594
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595static PyMethodDef incrementalnewlinedecoder_methods[] = {
596 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
597 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
598 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
599 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000600 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601};
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603static PyGetSetDef incrementalnewlinedecoder_getset[] = {
604 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000605 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606};
607
608PyTypeObject PyIncrementalNewlineDecoder_Type = {
609 PyVarObject_HEAD_INIT(NULL, 0)
610 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /*tp_print*/
615 0, /*tp_getattr*/
616 0, /*tp_setattr*/
617 0, /*tp_compare */
618 0, /*tp_repr*/
619 0, /*tp_as_number*/
620 0, /*tp_as_sequence*/
621 0, /*tp_as_mapping*/
622 0, /*tp_hash */
623 0, /*tp_call*/
624 0, /*tp_str*/
625 0, /*tp_getattro*/
626 0, /*tp_setattro*/
627 0, /*tp_as_buffer*/
628 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 0, /* tp_traverse */
631 0, /* tp_clear */
632 0, /* tp_richcompare */
633 0, /*tp_weaklistoffset*/
634 0, /* tp_iter */
635 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000638 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 0, /* tp_base */
640 0, /* tp_dict */
641 0, /* tp_descr_get */
642 0, /* tp_descr_set */
643 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000644 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 0, /* tp_alloc */
646 PyType_GenericNew, /* tp_new */
647};
648
649
650/* TextIOWrapper */
651
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000652PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 "Character and line based layer over a BufferedIOBase object, buffer.\n"
654 "\n"
655 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200656 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400658 "errors determines the strictness of encoding and decoding (see\n"
659 "help(codecs.Codec) or the documentation for codecs.register) and\n"
660 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200662 "newline controls how line endings are handled. It can be None, '',\n"
663 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
664 "\n"
665 "* On input, if newline is None, universal newlines mode is\n"
666 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
667 " these are translated into '\\n' before being returned to the\n"
668 " caller. If it is '', universal newline mode is enabled, but line\n"
669 " endings are returned to the caller untranslated. If it has any of\n"
670 " the other legal values, input lines are only terminated by the given\n"
671 " string, and the line ending is returned to the caller untranslated.\n"
672 "\n"
673 "* On output, if newline is None, any '\\n' characters written are\n"
674 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300675 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200676 " of the other legal values, any '\\n' characters written are translated\n"
677 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 "\n"
679 "If line_buffering is True, a call to flush is implied when a call to\n"
680 "write contains a newline character."
681 );
682
683typedef PyObject *
684 (*encodefunc_t)(PyObject *, PyObject *);
685
686typedef struct
687{
688 PyObject_HEAD
689 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000690 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000691 Py_ssize_t chunk_size;
692 PyObject *buffer;
693 PyObject *encoding;
694 PyObject *encoder;
695 PyObject *decoder;
696 PyObject *readnl;
697 PyObject *errors;
698 const char *writenl; /* utf-8 encoded, NULL stands for \n */
699 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200700 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701 char readuniversal;
702 char readtranslate;
703 char writetranslate;
704 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200705 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200707 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708 /* Specialized encoding func (see below) */
709 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000710 /* Whether or not it's the start of the stream */
711 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713 /* Reads and writes are internally buffered in order to speed things up.
714 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000715
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716 Please also note that text to be written is first encoded before being
717 buffered. This is necessary so that encoding errors are immediately
718 reported to the caller, but it unfortunately means that the
719 IncrementalEncoder (whose encode() method is always written in Python)
720 becomes a bottleneck for small writes.
721 */
722 PyObject *decoded_chars; /* buffer for text returned from decoder */
723 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
724 PyObject *pending_bytes; /* list of bytes objects waiting to be
725 written, or NULL */
726 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000727
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728 /* snapshot is either None, or a tuple (dec_flags, next_input) where
729 * dec_flags is the second (integer) item of the decoder state and
730 * next_input is the chunk of input bytes that comes next after the
731 * snapshot point. We use this to reconstruct decoder states in tell().
732 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000733 PyObject *snapshot;
734 /* Bytes-to-characters ratio for the current chunk. Serves as input for
735 the heuristic in tell(). */
736 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737
738 /* Cache raw object if it's a FileIO object */
739 PyObject *raw;
740
741 PyObject *weakreflist;
742 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744
745
746/* A couple of specialized cases in order to bypass the slow incremental
747 encoding methods for the most popular encodings. */
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200752 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100758 return _PyUnicode_EncodeUTF16(text,
759 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760}
761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100765 return _PyUnicode_EncodeUTF16(text,
766 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000771{
Antoine Pitroue4501852009-05-14 18:55:55 +0000772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200774#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000779 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF16(text,
781 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782}
783
Antoine Pitroue4501852009-05-14 18:55:55 +0000784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100787 return _PyUnicode_EncodeUTF32(text,
788 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000793{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100794 return _PyUnicode_EncodeUTF32(text,
795 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000796}
797
798static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000799utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000800{
801 if (!self->encoding_start_of_stream) {
802 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200803#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 return utf32be_encode(self, text);
805#else
806 return utf32le_encode(self, text);
807#endif
808 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100809 return _PyUnicode_EncodeUTF32(text,
810 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000811}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812
813static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200816 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817}
818
819static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000820latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200822 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823}
824
825/* Map normalized encoding names onto the specialized encoding funcs */
826
827typedef struct {
828 const char *name;
829 encodefunc_t encodefunc;
830} encodefuncentry;
831
Antoine Pitrou24f36292009-03-28 22:16:42 +0000832static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 {"ascii", (encodefunc_t) ascii_encode},
834 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000835 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 {"utf-16-be", (encodefunc_t) utf16be_encode},
837 {"utf-16-le", (encodefunc_t) utf16le_encode},
838 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000839 {"utf-32-be", (encodefunc_t) utf32be_encode},
840 {"utf-32-le", (encodefunc_t) utf32le_encode},
841 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000842 {NULL, NULL}
843};
844
845
846static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000847textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000848{
849 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 NULL};
852 PyObject *buffer, *raw;
853 char *encoding = NULL;
854 char *errors = NULL;
855 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200856 int line_buffering = 0, write_through = 0;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100857 _PyIO_State *state = NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000858
859 PyObject *res;
860 int r;
861
862 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000863 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200866 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 return -1;
868
869 if (newline && newline[0] != '\0'
870 && !(newline[0] == '\n' && newline[1] == '\0')
871 && !(newline[0] == '\r' && newline[1] == '\0')
872 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
873 PyErr_Format(PyExc_ValueError,
874 "illegal newline value: %s", newline);
875 return -1;
876 }
877
878 Py_CLEAR(self->buffer);
879 Py_CLEAR(self->encoding);
880 Py_CLEAR(self->encoder);
881 Py_CLEAR(self->decoder);
882 Py_CLEAR(self->readnl);
883 Py_CLEAR(self->decoded_chars);
884 Py_CLEAR(self->pending_bytes);
885 Py_CLEAR(self->snapshot);
886 Py_CLEAR(self->errors);
887 Py_CLEAR(self->raw);
888 self->decoded_chars_used = 0;
889 self->pending_bytes_count = 0;
890 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000891 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000892
893 if (encoding == NULL) {
894 /* Try os.device_encoding(fileno) */
895 PyObject *fileno;
Antoine Pitrou712cb732013-12-21 15:51:54 +0100896 state = IO_STATE();
897 if (state == NULL)
898 goto error;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 /* Ignore only AttributeError and UnsupportedOperation */
901 if (fileno == NULL) {
902 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
903 PyErr_ExceptionMatches(state->unsupported_operation)) {
904 PyErr_Clear();
905 }
906 else {
907 goto error;
908 }
909 }
910 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200911 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500912 Py_DECREF(fileno);
913 if (fd == -1 && PyErr_Occurred()) {
914 goto error;
915 }
916
917 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000918 if (self->encoding == NULL)
919 goto error;
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
924 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200925 PyObject *locale_module = _PyIO_get_locale_module(state);
926 if (locale_module == NULL)
927 goto catch_ImportError;
928 self->encoding = _PyObject_CallMethodId(
929 locale_module, &PyId_getpreferredencoding, "O", Py_False);
930 Py_DECREF(locale_module);
931 if (self->encoding == NULL) {
932 catch_ImportError:
933 /*
934 Importing locale can raise a ImportError because of
935 _functools, and locale.getpreferredencoding can raise a
936 ImportError if _locale is not available. These will happen
937 during module building.
938 */
939 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
940 PyErr_Clear();
941 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000942 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200943 else
944 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200946 else if (!PyUnicode_Check(self->encoding))
947 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000948 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000950 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000951 if (encoding == NULL)
952 goto error;
953 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 else if (encoding != NULL) {
955 self->encoding = PyUnicode_FromString(encoding);
956 if (self->encoding == NULL)
957 goto error;
958 }
959 else {
960 PyErr_SetString(PyExc_IOError,
961 "could not determine default encoding");
962 }
963
964 if (errors == NULL)
965 errors = "strict";
966 self->errors = PyBytes_FromString(errors);
967 if (self->errors == NULL)
968 goto error;
969
970 self->chunk_size = 8192;
971 self->readuniversal = (newline == NULL || newline[0] == '\0');
972 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200973 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 self->readtranslate = (newline == NULL);
975 if (newline) {
976 self->readnl = PyUnicode_FromString(newline);
977 if (self->readnl == NULL)
978 return -1;
979 }
980 self->writetranslate = (newline == NULL || newline[0] != '\0');
981 if (!self->readuniversal && self->readnl) {
982 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000983 if (self->writenl == NULL)
984 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 if (!strcmp(self->writenl, "\n"))
986 self->writenl = NULL;
987 }
988#ifdef MS_WINDOWS
989 else
990 self->writenl = "\r\n";
991#endif
992
993 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200994 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000995 if (res == NULL)
996 goto error;
997 r = PyObject_IsTrue(res);
998 Py_DECREF(res);
999 if (r == -1)
1000 goto error;
1001 if (r == 1) {
1002 self->decoder = PyCodec_IncrementalDecoder(
1003 encoding, errors);
1004 if (self->decoder == NULL)
1005 goto error;
1006
1007 if (self->readuniversal) {
1008 PyObject *incrementalDecoder = PyObject_CallFunction(
1009 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1010 "Oi", self->decoder, (int)self->readtranslate);
1011 if (incrementalDecoder == NULL)
1012 goto error;
1013 Py_CLEAR(self->decoder);
1014 self->decoder = incrementalDecoder;
1015 }
1016 }
1017
1018 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001019 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001020 if (res == NULL)
1021 goto error;
1022 r = PyObject_IsTrue(res);
1023 Py_DECREF(res);
1024 if (r == -1)
1025 goto error;
1026 if (r == 1) {
1027 PyObject *ci;
1028 self->encoder = PyCodec_IncrementalEncoder(
1029 encoding, errors);
1030 if (self->encoder == NULL)
1031 goto error;
1032 /* Get the normalized named of the codec */
1033 ci = _PyCodec_Lookup(encoding);
1034 if (ci == NULL)
1035 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001036 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001038 if (res == NULL) {
1039 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1040 PyErr_Clear();
1041 else
1042 goto error;
1043 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001044 else if (PyUnicode_Check(res)) {
1045 encodefuncentry *e = encodefuncs;
1046 while (e->name != NULL) {
1047 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1048 self->encodefunc = e->encodefunc;
1049 break;
1050 }
1051 e++;
1052 }
1053 }
1054 Py_XDECREF(res);
1055 }
1056
1057 self->buffer = buffer;
1058 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1061 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1062 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001063 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001065 if (raw == NULL) {
1066 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1067 PyErr_Clear();
1068 else
1069 goto error;
1070 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 else if (Py_TYPE(raw) == &PyFileIO_Type)
1072 self->raw = raw;
1073 else
1074 Py_DECREF(raw);
1075 }
1076
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001077 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 if (res == NULL)
1079 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001080 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001082 if (r < 0)
1083 goto error;
1084 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001085
Martin v. Löwis767046a2011-10-14 15:35:36 +02001086 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001087
Antoine Pitroue4501852009-05-14 18:55:55 +00001088 self->encoding_start_of_stream = 0;
1089 if (self->seekable && self->encoder) {
1090 PyObject *cookieObj;
1091 int cmp;
1092
1093 self->encoding_start_of_stream = 1;
1094
1095 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1096 if (cookieObj == NULL)
1097 goto error;
1098
1099 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1100 Py_DECREF(cookieObj);
1101 if (cmp < 0) {
1102 goto error;
1103 }
1104
1105 if (cmp == 0) {
1106 self->encoding_start_of_stream = 0;
1107 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1108 _PyIO_zero, NULL);
1109 if (res == NULL)
1110 goto error;
1111 Py_DECREF(res);
1112 }
1113 }
1114
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115 self->ok = 1;
1116 return 0;
1117
1118 error:
1119 return -1;
1120}
1121
1122static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001123_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 self->ok = 0;
1126 Py_CLEAR(self->buffer);
1127 Py_CLEAR(self->encoding);
1128 Py_CLEAR(self->encoder);
1129 Py_CLEAR(self->decoder);
1130 Py_CLEAR(self->readnl);
1131 Py_CLEAR(self->decoded_chars);
1132 Py_CLEAR(self->pending_bytes);
1133 Py_CLEAR(self->snapshot);
1134 Py_CLEAR(self->errors);
1135 Py_CLEAR(self->raw);
1136 return 0;
1137}
1138
1139static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001140textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001142 self->finalizing = 1;
1143 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001145 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 _PyObject_GC_UNTRACK(self);
1147 if (self->weakreflist != NULL)
1148 PyObject_ClearWeakRefs((PyObject *)self);
1149 Py_CLEAR(self->dict);
1150 Py_TYPE(self)->tp_free((PyObject *)self);
1151}
1152
1153static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155{
1156 Py_VISIT(self->buffer);
1157 Py_VISIT(self->encoding);
1158 Py_VISIT(self->encoder);
1159 Py_VISIT(self->decoder);
1160 Py_VISIT(self->readnl);
1161 Py_VISIT(self->decoded_chars);
1162 Py_VISIT(self->pending_bytes);
1163 Py_VISIT(self->snapshot);
1164 Py_VISIT(self->errors);
1165 Py_VISIT(self->raw);
1166
1167 Py_VISIT(self->dict);
1168 return 0;
1169}
1170
1171static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001174 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 return -1;
1176 Py_CLEAR(self->dict);
1177 return 0;
1178}
1179
1180static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001181textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001182
1183/* This macro takes some shortcuts to make the common case faster. */
1184#define CHECK_CLOSED(self) \
1185 do { \
1186 int r; \
1187 PyObject *_res; \
1188 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1189 if (self->raw != NULL) \
1190 r = _PyFileIO_closed(self->raw); \
1191 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001192 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001193 if (_res == NULL) \
1194 return NULL; \
1195 r = PyObject_IsTrue(_res); \
1196 Py_DECREF(_res); \
1197 if (r < 0) \
1198 return NULL; \
1199 } \
1200 if (r > 0) { \
1201 PyErr_SetString(PyExc_ValueError, \
1202 "I/O operation on closed file."); \
1203 return NULL; \
1204 } \
1205 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001206 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001207 return NULL; \
1208 } while (0)
1209
1210#define CHECK_INITIALIZED(self) \
1211 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001212 if (self->detached) { \
1213 PyErr_SetString(PyExc_ValueError, \
1214 "underlying buffer has been detached"); \
1215 } else { \
1216 PyErr_SetString(PyExc_ValueError, \
1217 "I/O operation on uninitialized object"); \
1218 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 return NULL; \
1220 }
1221
1222#define CHECK_INITIALIZED_INT(self) \
1223 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001224 if (self->detached) { \
1225 PyErr_SetString(PyExc_ValueError, \
1226 "underlying buffer has been detached"); \
1227 } else { \
1228 PyErr_SetString(PyExc_ValueError, \
1229 "I/O operation on uninitialized object"); \
1230 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231 return -1; \
1232 }
1233
1234
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001235static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001236textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001237{
1238 PyObject *buffer, *res;
1239 CHECK_INITIALIZED(self);
1240 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1241 if (res == NULL)
1242 return NULL;
1243 Py_DECREF(res);
1244 buffer = self->buffer;
1245 self->buffer = NULL;
1246 self->detached = 1;
1247 self->ok = 0;
1248 return buffer;
1249}
1250
Antoine Pitrou24f36292009-03-28 22:16:42 +00001251/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001252 underlying buffered object, though. */
1253static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001254_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001256 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257
1258 if (self->pending_bytes == NULL)
1259 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001260
1261 pending = self->pending_bytes;
1262 Py_INCREF(pending);
1263 self->pending_bytes_count = 0;
1264 Py_CLEAR(self->pending_bytes);
1265
1266 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1267 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 if (b == NULL)
1269 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001270 ret = NULL;
1271 do {
1272 ret = PyObject_CallMethodObjArgs(self->buffer,
1273 _PyIO_str_write, b, NULL);
1274 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275 Py_DECREF(b);
1276 if (ret == NULL)
1277 return -1;
1278 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 return 0;
1280}
1281
1282static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001283textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284{
1285 PyObject *ret;
1286 PyObject *text; /* owned reference */
1287 PyObject *b;
1288 Py_ssize_t textlen;
1289 int haslf = 0;
1290 int needflush = 0;
1291
1292 CHECK_INITIALIZED(self);
1293
1294 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1295 return NULL;
1296 }
1297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 if (PyUnicode_READY(text) == -1)
1299 return NULL;
1300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 CHECK_CLOSED(self);
1302
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001303 if (self->encoder == NULL)
1304 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001305
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306 Py_INCREF(text);
1307
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001308 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309
1310 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001311 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 haslf = 1;
1313
1314 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001315 PyObject *newtext = _PyObject_CallMethodId(
1316 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 Py_DECREF(text);
1318 if (newtext == NULL)
1319 return NULL;
1320 text = newtext;
1321 }
1322
Antoine Pitroue96ec682011-07-23 21:46:35 +02001323 if (self->write_through)
1324 needflush = 1;
1325 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001327 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 needflush = 1;
1329
1330 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001331 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001332 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001333 self->encoding_start_of_stream = 0;
1334 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335 else
1336 b = PyObject_CallMethodObjArgs(self->encoder,
1337 _PyIO_str_encode, text, NULL);
1338 Py_DECREF(text);
1339 if (b == NULL)
1340 return NULL;
1341
1342 if (self->pending_bytes == NULL) {
1343 self->pending_bytes = PyList_New(0);
1344 if (self->pending_bytes == NULL) {
1345 Py_DECREF(b);
1346 return NULL;
1347 }
1348 self->pending_bytes_count = 0;
1349 }
1350 if (PyList_Append(self->pending_bytes, b) < 0) {
1351 Py_DECREF(b);
1352 return NULL;
1353 }
1354 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1355 Py_DECREF(b);
1356 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001357 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358 return NULL;
1359 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001360
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361 if (needflush) {
1362 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1363 if (ret == NULL)
1364 return NULL;
1365 Py_DECREF(ret);
1366 }
1367
1368 Py_CLEAR(self->snapshot);
1369
1370 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001371 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372 if (ret == NULL)
1373 return NULL;
1374 Py_DECREF(ret);
1375 }
1376
1377 return PyLong_FromSsize_t(textlen);
1378}
1379
1380/* Steal a reference to chars and store it in the decoded_char buffer;
1381 */
1382static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001383textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001384{
1385 Py_CLEAR(self->decoded_chars);
1386 self->decoded_chars = chars;
1387 self->decoded_chars_used = 0;
1388}
1389
1390static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001391textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392{
1393 PyObject *chars;
1394 Py_ssize_t avail;
1395
1396 if (self->decoded_chars == NULL)
1397 return PyUnicode_FromStringAndSize(NULL, 0);
1398
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 /* decoded_chars is guaranteed to be "ready". */
1400 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 - self->decoded_chars_used);
1402
1403 assert(avail >= 0);
1404
1405 if (n < 0 || n > avail)
1406 n = avail;
1407
1408 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001409 chars = PyUnicode_Substring(self->decoded_chars,
1410 self->decoded_chars_used,
1411 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001412 if (chars == NULL)
1413 return NULL;
1414 }
1415 else {
1416 chars = self->decoded_chars;
1417 Py_INCREF(chars);
1418 }
1419
1420 self->decoded_chars_used += n;
1421 return chars;
1422}
1423
1424/* Read and decode the next chunk of data from the BufferedReader.
1425 */
1426static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001427textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001428{
1429 PyObject *dec_buffer = NULL;
1430 PyObject *dec_flags = NULL;
1431 PyObject *input_chunk = NULL;
1432 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001433 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 int eof;
1435
1436 /* The return value is True unless EOF was reached. The decoded string is
1437 * placed in self._decoded_chars (replacing its previous value). The
1438 * entire input chunk is sent to the decoder, though some of it may remain
1439 * buffered in the decoder, yet to be converted.
1440 */
1441
1442 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001443 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001444 return -1;
1445 }
1446
1447 if (self->telling) {
1448 /* To prepare for tell(), we need to snapshot a point in the file
1449 * where the decoder's input buffer is empty.
1450 */
1451
1452 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1453 _PyIO_str_getstate, NULL);
1454 if (state == NULL)
1455 return -1;
1456 /* Given this, we know there was a valid snapshot point
1457 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1458 */
1459 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1460 Py_DECREF(state);
1461 return -1;
1462 }
1463 Py_INCREF(dec_buffer);
1464 Py_INCREF(dec_flags);
1465 Py_DECREF(state);
1466 }
1467
1468 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001469 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001470 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001471 }
1472 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 if (chunk_size == NULL)
1474 goto fail;
1475 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001476 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1477 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001478 Py_DECREF(chunk_size);
1479 if (input_chunk == NULL)
1480 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001481 if (!PyBytes_Check(input_chunk)) {
1482 PyErr_Format(PyExc_TypeError,
1483 "underlying %s() should have returned a bytes object, "
1484 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1485 Py_TYPE(input_chunk)->tp_name);
1486 goto fail;
1487 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001489 nbytes = PyBytes_Size(input_chunk);
1490 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491
1492 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1493 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1494 self->decoder, input_chunk, eof);
1495 }
1496 else {
1497 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1498 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1499 }
1500
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001501 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001502 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001503 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001504 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001505 if (nchars > 0)
1506 self->b2cratio = (double) nbytes / nchars;
1507 else
1508 self->b2cratio = 0.0;
1509 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 eof = 0;
1511
1512 if (self->telling) {
1513 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1514 * next input to be decoded is dec_buffer + input_chunk.
1515 */
1516 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1517 if (next_input == NULL)
1518 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001519 if (!PyBytes_Check(next_input)) {
1520 PyErr_Format(PyExc_TypeError,
1521 "decoder getstate() should have returned a bytes "
1522 "object, not '%.200s'",
1523 Py_TYPE(next_input)->tp_name);
1524 Py_DECREF(next_input);
1525 goto fail;
1526 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527 Py_DECREF(dec_buffer);
1528 Py_CLEAR(self->snapshot);
1529 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1530 }
1531 Py_DECREF(input_chunk);
1532
1533 return (eof == 0);
1534
1535 fail:
1536 Py_XDECREF(dec_buffer);
1537 Py_XDECREF(dec_flags);
1538 Py_XDECREF(input_chunk);
1539 return -1;
1540}
1541
1542static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001543textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544{
1545 Py_ssize_t n = -1;
1546 PyObject *result = NULL, *chunks = NULL;
1547
1548 CHECK_INITIALIZED(self);
1549
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001550 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001551 return NULL;
1552
1553 CHECK_CLOSED(self);
1554
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001555 if (self->decoder == NULL)
1556 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001557
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001558 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 return NULL;
1560
1561 if (n < 0) {
1562 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001563 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 PyObject *decoded;
1565 if (bytes == NULL)
1566 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001567
1568 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1569 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1570 bytes, 1);
1571 else
1572 decoded = PyObject_CallMethodObjArgs(
1573 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001575 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 goto fail;
1577
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001578 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579
1580 if (result == NULL) {
1581 Py_DECREF(decoded);
1582 return NULL;
1583 }
1584
1585 PyUnicode_AppendAndDel(&result, decoded);
1586 if (result == NULL)
1587 goto fail;
1588
1589 Py_CLEAR(self->snapshot);
1590 return result;
1591 }
1592 else {
1593 int res = 1;
1594 Py_ssize_t remaining = n;
1595
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001596 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597 if (result == NULL)
1598 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001599 if (PyUnicode_READY(result) == -1)
1600 goto fail;
1601 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602
1603 /* Keep reading chunks until we have n characters to return */
1604 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001605 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001606 if (res < 0) {
1607 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1608 when EINTR occurs so we needn't do it ourselves. */
1609 if (_PyIO_trap_eintr()) {
1610 continue;
1611 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001613 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 if (res == 0) /* EOF */
1615 break;
1616 if (chunks == NULL) {
1617 chunks = PyList_New(0);
1618 if (chunks == NULL)
1619 goto fail;
1620 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001621 if (PyUnicode_GET_LENGTH(result) > 0 &&
1622 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 goto fail;
1624 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001625 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 if (result == NULL)
1627 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 }
1630 if (chunks != NULL) {
1631 if (result != NULL && PyList_Append(chunks, result) < 0)
1632 goto fail;
1633 Py_CLEAR(result);
1634 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1635 if (result == NULL)
1636 goto fail;
1637 Py_CLEAR(chunks);
1638 }
1639 return result;
1640 }
1641 fail:
1642 Py_XDECREF(result);
1643 Py_XDECREF(chunks);
1644 return NULL;
1645}
1646
1647
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001648/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 that is to the NUL character. Otherwise the function will produce
1650 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001651static char *
1652find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001654 if (kind == PyUnicode_1BYTE_KIND) {
1655 assert(ch < 256);
1656 return (char *) memchr((void *) s, (char) ch, end - s);
1657 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001660 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001661 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 return s;
1663 if (s == end)
1664 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001665 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666 }
1667}
1668
1669Py_ssize_t
1670_PyIO_find_line_ending(
1671 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001672 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001674 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675
1676 if (translated) {
1677 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001678 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001680 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 else {
1682 *consumed = len;
1683 return -1;
1684 }
1685 }
1686 else if (universal) {
1687 /* Universal newline search. Find any of \r, \r\n, \n
1688 * The decoder ensures that \r\n are not split in two pieces
1689 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001692 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001694 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001696 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (s >= end) {
1698 *consumed = len;
1699 return -1;
1700 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001702 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001704 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001706 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001707 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001709 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 }
1711 }
1712 }
1713 else {
1714 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001715 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1716 char *nl = PyUnicode_DATA(readnl);
1717 /* Assume that readnl is an ASCII character. */
1718 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001722 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 *consumed = len;
1724 return -1;
1725 }
1726 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001728 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (e < s)
1731 e = s;
1732 while (s < e) {
1733 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 if (pos == NULL || pos >= e)
1736 break;
1737 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001738 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 break;
1740 }
1741 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001742 return (pos - start)/kind + readnl_len;
1743 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 if (pos == NULL)
1747 *consumed = len;
1748 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001749 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 return -1;
1751 }
1752 }
1753}
1754
1755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001756_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757{
1758 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1759 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1760 int res;
1761
1762 CHECK_CLOSED(self);
1763
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001764 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001765 return NULL;
1766
1767 chunked = 0;
1768
1769 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001772 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001773 Py_ssize_t consumed = 0;
1774
1775 /* First, get some data if necessary */
1776 res = 1;
1777 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001778 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001779 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001780 if (res < 0) {
1781 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1782 when EINTR occurs so we needn't do it ourselves. */
1783 if (_PyIO_trap_eintr()) {
1784 continue;
1785 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001787 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001788 if (res == 0)
1789 break;
1790 }
1791 if (res == 0) {
1792 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001793 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 Py_CLEAR(self->snapshot);
1795 start = endpos = offset_to_buffer = 0;
1796 break;
1797 }
1798
1799 if (remaining == NULL) {
1800 line = self->decoded_chars;
1801 start = self->decoded_chars_used;
1802 offset_to_buffer = 0;
1803 Py_INCREF(line);
1804 }
1805 else {
1806 assert(self->decoded_chars_used == 0);
1807 line = PyUnicode_Concat(remaining, self->decoded_chars);
1808 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001809 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810 Py_CLEAR(remaining);
1811 if (line == NULL)
1812 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 if (PyUnicode_READY(line) == -1)
1814 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 }
1816
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 ptr = PyUnicode_DATA(line);
1818 line_len = PyUnicode_GET_LENGTH(line);
1819 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001820
1821 endpos = _PyIO_find_line_ending(
1822 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001823 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001824 ptr + kind * start,
1825 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001826 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 if (endpos >= 0) {
1828 endpos += start;
1829 if (limit >= 0 && (endpos - start) + chunked >= limit)
1830 endpos = start + limit - chunked;
1831 break;
1832 }
1833
1834 /* We can put aside up to `endpos` */
1835 endpos = consumed + start;
1836 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1837 /* Didn't find line ending, but reached length limit */
1838 endpos = start + limit - chunked;
1839 break;
1840 }
1841
1842 if (endpos > start) {
1843 /* No line ending seen yet - put aside current data */
1844 PyObject *s;
1845 if (chunks == NULL) {
1846 chunks = PyList_New(0);
1847 if (chunks == NULL)
1848 goto error;
1849 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001850 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 if (s == NULL)
1852 goto error;
1853 if (PyList_Append(chunks, s) < 0) {
1854 Py_DECREF(s);
1855 goto error;
1856 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001857 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858 Py_DECREF(s);
1859 }
1860 /* There may be some remaining bytes we'll have to prepend to the
1861 next chunk of data */
1862 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001863 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001864 if (remaining == NULL)
1865 goto error;
1866 }
1867 Py_CLEAR(line);
1868 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001869 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 }
1871
1872 if (line != NULL) {
1873 /* Our line ends in the current buffer */
1874 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001875 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1876 PyObject *s = PyUnicode_Substring(line, start, endpos);
1877 Py_CLEAR(line);
1878 if (s == NULL)
1879 goto error;
1880 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881 }
1882 }
1883 if (remaining != NULL) {
1884 if (chunks == NULL) {
1885 chunks = PyList_New(0);
1886 if (chunks == NULL)
1887 goto error;
1888 }
1889 if (PyList_Append(chunks, remaining) < 0)
1890 goto error;
1891 Py_CLEAR(remaining);
1892 }
1893 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001894 if (line != NULL) {
1895 if (PyList_Append(chunks, line) < 0)
1896 goto error;
1897 Py_DECREF(line);
1898 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1900 if (line == NULL)
1901 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001902 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001904 if (line == NULL) {
1905 Py_INCREF(_PyIO_empty_str);
1906 line = _PyIO_empty_str;
1907 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908
1909 return line;
1910
1911 error:
1912 Py_XDECREF(chunks);
1913 Py_XDECREF(remaining);
1914 Py_XDECREF(line);
1915 return NULL;
1916}
1917
1918static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001919textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920{
1921 Py_ssize_t limit = -1;
1922
1923 CHECK_INITIALIZED(self);
1924 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1925 return NULL;
1926 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001927 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001928}
1929
1930/* Seek and Tell */
1931
1932typedef struct {
1933 Py_off_t start_pos;
1934 int dec_flags;
1935 int bytes_to_feed;
1936 int chars_to_skip;
1937 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001938} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939
1940/*
1941 To speed up cookie packing/unpacking, we store the fields in a temporary
1942 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1943 The following macros define at which offsets in the intermediary byte
1944 string the various CookieStruct fields will be stored.
1945 */
1946
1947#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1948
Christian Heimes743e0cd2012-10-17 23:52:17 +02001949#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950/* We want the least significant byte of start_pos to also be the least
1951 significant byte of the cookie, which means that in big-endian mode we
1952 must copy the fields in reverse order. */
1953
1954# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1955# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1956# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1957# define OFF_CHARS_TO_SKIP (sizeof(char))
1958# define OFF_NEED_EOF 0
1959
1960#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961/* Little-endian mode: the least significant byte of start_pos will
1962 naturally end up the least significant byte of the cookie. */
1963
1964# define OFF_START_POS 0
1965# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1966# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1967# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1968# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1969
1970#endif
1971
1972static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001973textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974{
1975 unsigned char buffer[COOKIE_BUF_LEN];
1976 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1977 if (cookieLong == NULL)
1978 return -1;
1979
1980 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001981 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 Py_DECREF(cookieLong);
1983 return -1;
1984 }
1985 Py_DECREF(cookieLong);
1986
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001987 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1988 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1989 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1990 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1991 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001992
1993 return 0;
1994}
1995
1996static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001997textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998{
1999 unsigned char buffer[COOKIE_BUF_LEN];
2000
Antoine Pitrou2db74c22009-03-06 21:49:02 +00002001 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2002 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2003 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2004 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2005 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002006
Christian Heimes743e0cd2012-10-17 23:52:17 +02002007 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2008 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010
2011static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002012_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013{
2014 PyObject *res;
2015 /* When seeking to the start of the stream, we call decoder.reset()
2016 rather than decoder.getstate().
2017 This is for a few decoders such as utf-16 for which the state value
2018 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2019 utf-16, that we are expecting a BOM).
2020 */
2021 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2022 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2023 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002024 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2025 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 if (res == NULL)
2027 return -1;
2028 Py_DECREF(res);
2029 return 0;
2030}
2031
Antoine Pitroue4501852009-05-14 18:55:55 +00002032static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002033_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002034{
2035 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002036 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002037 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2038 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2039 self->encoding_start_of_stream = 1;
2040 }
2041 else {
2042 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2043 _PyIO_zero, NULL);
2044 self->encoding_start_of_stream = 0;
2045 }
2046 if (res == NULL)
2047 return -1;
2048 Py_DECREF(res);
2049 return 0;
2050}
2051
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002053textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054{
2055 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002056 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002058 PyObject *res;
2059 int cmp;
2060
2061 CHECK_INITIALIZED(self);
2062
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2064 return NULL;
2065 CHECK_CLOSED(self);
2066
2067 Py_INCREF(cookieObj);
2068
2069 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002070 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 goto fail;
2072 }
2073
2074 if (whence == 1) {
2075 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002076 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 if (cmp < 0)
2078 goto fail;
2079
2080 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002081 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 goto fail;
2083 }
2084
2085 /* Seeking to the current position should attempt to
2086 * sync the underlying buffer with the current position.
2087 */
2088 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002089 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 if (cookieObj == NULL)
2091 goto fail;
2092 }
2093 else if (whence == 2) {
2094 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002095 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 if (cmp < 0)
2097 goto fail;
2098
2099 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002100 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 goto fail;
2102 }
2103
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002104 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 if (res == NULL)
2106 goto fail;
2107 Py_DECREF(res);
2108
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002109 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 Py_CLEAR(self->snapshot);
2111 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002112 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002113 if (res == NULL)
2114 goto fail;
2115 Py_DECREF(res);
2116 }
2117
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002118 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 Py_XDECREF(cookieObj);
2120 return res;
2121 }
2122 else if (whence != 0) {
2123 PyErr_Format(PyExc_ValueError,
2124 "invalid whence (%d, should be 0, 1 or 2)", whence);
2125 goto fail;
2126 }
2127
Antoine Pitroue4501852009-05-14 18:55:55 +00002128 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 if (cmp < 0)
2130 goto fail;
2131
2132 if (cmp == 1) {
2133 PyErr_Format(PyExc_ValueError,
2134 "negative seek position %R", cookieObj);
2135 goto fail;
2136 }
2137
2138 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2139 if (res == NULL)
2140 goto fail;
2141 Py_DECREF(res);
2142
2143 /* The strategy of seek() is to go back to the safe start point
2144 * and replay the effect of read(chars_to_skip) from there.
2145 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002146 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147 goto fail;
2148
2149 /* Seek back to the safe start point. */
2150 posobj = PyLong_FromOff_t(cookie.start_pos);
2151 if (posobj == NULL)
2152 goto fail;
2153 res = PyObject_CallMethodObjArgs(self->buffer,
2154 _PyIO_str_seek, posobj, NULL);
2155 Py_DECREF(posobj);
2156 if (res == NULL)
2157 goto fail;
2158 Py_DECREF(res);
2159
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002160 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161 Py_CLEAR(self->snapshot);
2162
2163 /* Restore the decoder to its state from the safe start point. */
2164 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002165 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 goto fail;
2167 }
2168
2169 if (cookie.chars_to_skip) {
2170 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002171 PyObject *input_chunk = _PyObject_CallMethodId(
2172 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 PyObject *decoded;
2174
2175 if (input_chunk == NULL)
2176 goto fail;
2177
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002178 if (!PyBytes_Check(input_chunk)) {
2179 PyErr_Format(PyExc_TypeError,
2180 "underlying read() should have returned a bytes "
2181 "object, not '%.200s'",
2182 Py_TYPE(input_chunk)->tp_name);
2183 Py_DECREF(input_chunk);
2184 goto fail;
2185 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002186
2187 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2188 if (self->snapshot == NULL) {
2189 Py_DECREF(input_chunk);
2190 goto fail;
2191 }
2192
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002193 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2194 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002196 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197 goto fail;
2198
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002199 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200
2201 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002202 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002203 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2204 goto fail;
2205 }
2206 self->decoded_chars_used = cookie.chars_to_skip;
2207 }
2208 else {
2209 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2210 if (self->snapshot == NULL)
2211 goto fail;
2212 }
2213
Antoine Pitroue4501852009-05-14 18:55:55 +00002214 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2215 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002216 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002217 goto fail;
2218 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002219 return cookieObj;
2220 fail:
2221 Py_XDECREF(cookieObj);
2222 return NULL;
2223
2224}
2225
2226static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002227textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002228{
2229 PyObject *res;
2230 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002231 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 PyObject *next_input;
2233 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002234 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 PyObject *saved_state = NULL;
2236 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002237 char *dec_buffer;
2238 Py_ssize_t dec_buffer_len;
2239 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240
2241 CHECK_INITIALIZED(self);
2242 CHECK_CLOSED(self);
2243
2244 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002245 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 goto fail;
2247 }
2248 if (!self->telling) {
2249 PyErr_SetString(PyExc_IOError,
2250 "telling position disabled by next() call");
2251 goto fail;
2252 }
2253
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002254 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002256 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257 if (res == NULL)
2258 goto fail;
2259 Py_DECREF(res);
2260
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002261 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262 if (posobj == NULL)
2263 goto fail;
2264
2265 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002266 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267 return posobj;
2268 }
2269
2270#if defined(HAVE_LARGEFILE_SUPPORT)
2271 cookie.start_pos = PyLong_AsLongLong(posobj);
2272#else
2273 cookie.start_pos = PyLong_AsLong(posobj);
2274#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002275 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002276 if (PyErr_Occurred())
2277 goto fail;
2278
2279 /* Skip backward to the snapshot point (see _read_chunk). */
2280 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2281 goto fail;
2282
2283 assert (PyBytes_Check(next_input));
2284
2285 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2286
2287 /* How many decoded characters have been used up since the snapshot? */
2288 if (self->decoded_chars_used == 0) {
2289 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002290 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002291 }
2292
2293 chars_to_skip = self->decoded_chars_used;
2294
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002295 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2297 _PyIO_str_getstate, NULL);
2298 if (saved_state == NULL)
2299 goto fail;
2300
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002301#define DECODER_GETSTATE() do { \
2302 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2303 _PyIO_str_getstate, NULL); \
2304 if (_state == NULL) \
2305 goto fail; \
2306 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2307 Py_DECREF(_state); \
2308 goto fail; \
2309 } \
2310 Py_DECREF(_state); \
2311 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002313#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002314 PyObject *_decoded = _PyObject_CallMethodId( \
2315 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002316 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002317 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002318 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002319 Py_DECREF(_decoded); \
2320 } while (0)
2321
2322 /* Fast search for an acceptable start point, close to our
2323 current pos */
2324 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2325 skip_back = 1;
2326 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2327 input = PyBytes_AS_STRING(next_input);
2328 while (skip_bytes > 0) {
2329 /* Decode up to temptative start point */
2330 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2331 goto fail;
2332 DECODER_DECODE(input, skip_bytes, chars_decoded);
2333 if (chars_decoded <= chars_to_skip) {
2334 DECODER_GETSTATE();
2335 if (dec_buffer_len == 0) {
2336 /* Before pos and no bytes buffered in decoder => OK */
2337 cookie.dec_flags = dec_flags;
2338 chars_to_skip -= chars_decoded;
2339 break;
2340 }
2341 /* Skip back by buffered amount and reset heuristic */
2342 skip_bytes -= dec_buffer_len;
2343 skip_back = 1;
2344 }
2345 else {
2346 /* We're too far ahead, skip back a bit */
2347 skip_bytes -= skip_back;
2348 skip_back *= 2;
2349 }
2350 }
2351 if (skip_bytes <= 0) {
2352 skip_bytes = 0;
2353 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2354 goto fail;
2355 }
2356
2357 /* Note our initial start point. */
2358 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002359 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002360 if (chars_to_skip == 0)
2361 goto finally;
2362
2363 /* We should be close to the desired position. Now feed the decoder one
2364 * byte at a time until we reach the `chars_to_skip` target.
2365 * As we go, note the nearest "safe start point" before the current
2366 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002367 * can safely start from there and advance to this location).
2368 */
2369 chars_decoded = 0;
2370 input = PyBytes_AS_STRING(next_input);
2371 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002372 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002374 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002376 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002377 /* We got n chars for 1 byte */
2378 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002380 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002381
2382 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2383 /* Decoder buffer is empty, so this is a safe start point. */
2384 cookie.start_pos += cookie.bytes_to_feed;
2385 chars_to_skip -= chars_decoded;
2386 cookie.dec_flags = dec_flags;
2387 cookie.bytes_to_feed = 0;
2388 chars_decoded = 0;
2389 }
2390 if (chars_decoded >= chars_to_skip)
2391 break;
2392 input++;
2393 }
2394 if (input == input_end) {
2395 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002396 PyObject *decoded = _PyObject_CallMethodId(
2397 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002398 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002400 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401 Py_DECREF(decoded);
2402 cookie.need_eof = 1;
2403
2404 if (chars_decoded < chars_to_skip) {
2405 PyErr_SetString(PyExc_IOError,
2406 "can't reconstruct logical file position");
2407 goto fail;
2408 }
2409 }
2410
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002411finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002412 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413 Py_DECREF(saved_state);
2414 if (res == NULL)
2415 return NULL;
2416 Py_DECREF(res);
2417
2418 /* The returned cookie corresponds to the last safe start point. */
2419 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002420 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002421
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002422fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002423 if (saved_state) {
2424 PyObject *type, *value, *traceback;
2425 PyErr_Fetch(&type, &value, &traceback);
2426
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002427 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002428 Py_DECREF(saved_state);
2429 if (res == NULL)
2430 return NULL;
2431 Py_DECREF(res);
2432
2433 PyErr_Restore(type, value, traceback);
2434 }
2435 return NULL;
2436}
2437
2438static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002439textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440{
2441 PyObject *pos = Py_None;
2442 PyObject *res;
2443
2444 CHECK_INITIALIZED(self)
2445 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2446 return NULL;
2447 }
2448
2449 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2450 if (res == NULL)
2451 return NULL;
2452 Py_DECREF(res);
2453
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002454 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455}
2456
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002457static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002458textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002459{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002460 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002461
2462 CHECK_INITIALIZED(self);
2463
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002464 res = PyUnicode_FromString("<_io.TextIOWrapper");
2465 if (res == NULL)
2466 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002467 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002468 if (nameobj == NULL) {
2469 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2470 PyErr_Clear();
2471 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002472 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002473 }
2474 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002475 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002476 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002477 if (s == NULL)
2478 goto error;
2479 PyUnicode_AppendAndDel(&res, s);
2480 if (res == NULL)
2481 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002482 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002483 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002484 if (modeobj == NULL) {
2485 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2486 PyErr_Clear();
2487 else
2488 goto error;
2489 }
2490 else {
2491 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2492 Py_DECREF(modeobj);
2493 if (s == NULL)
2494 goto error;
2495 PyUnicode_AppendAndDel(&res, s);
2496 if (res == NULL)
2497 return NULL;
2498 }
2499 s = PyUnicode_FromFormat("%U encoding=%R>",
2500 res, self->encoding);
2501 Py_DECREF(res);
2502 return s;
2503error:
2504 Py_XDECREF(res);
2505 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002506}
2507
2508
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509/* Inquiries */
2510
2511static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002512textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513{
2514 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002515 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002516}
2517
2518static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002519textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520{
2521 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002522 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523}
2524
2525static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002526textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527{
2528 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002529 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530}
2531
2532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002533textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534{
2535 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002536 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537}
2538
2539static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002540textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541{
2542 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002543 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544}
2545
2546static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002547textiowrapper_getstate(textio *self, PyObject *args)
2548{
2549 PyErr_Format(PyExc_TypeError,
2550 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2551 return NULL;
2552}
2553
2554static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002555textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556{
2557 CHECK_INITIALIZED(self);
2558 CHECK_CLOSED(self);
2559 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002560 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002562 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563}
2564
2565static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567{
2568 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002569 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002571
Antoine Pitrou6be88762010-05-03 16:48:20 +00002572 res = textiowrapper_closed_get(self, NULL);
2573 if (res == NULL)
2574 return NULL;
2575 r = PyObject_IsTrue(res);
2576 Py_DECREF(res);
2577 if (r < 0)
2578 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002579
Antoine Pitrou6be88762010-05-03 16:48:20 +00002580 if (r > 0) {
2581 Py_RETURN_NONE; /* stream already closed */
2582 }
2583 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002584 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002585 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002586 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002587 if (res)
2588 Py_DECREF(res);
2589 else
2590 PyErr_Clear();
2591 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002592 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002593 if (res == NULL)
2594 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002595 else
2596 Py_DECREF(res);
2597
Benjamin Peterson68623612012-12-20 11:53:11 -06002598 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2599 if (exc != NULL) {
2600 if (res != NULL) {
2601 Py_CLEAR(res);
2602 PyErr_Restore(exc, val, tb);
2603 }
2604 else {
2605 PyObject *val2;
2606 Py_DECREF(exc);
2607 Py_XDECREF(tb);
2608 PyErr_Fetch(&exc, &val2, &tb);
2609 PyErr_NormalizeException(&exc, &val2, &tb);
2610 PyException_SetContext(val2, val);
2611 PyErr_Restore(exc, val2, tb);
2612 }
2613 }
2614 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002615 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002616}
2617
2618static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002620{
2621 PyObject *line;
2622
2623 CHECK_INITIALIZED(self);
2624
2625 self->telling = 0;
2626 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2627 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002628 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002629 }
2630 else {
2631 line = PyObject_CallMethodObjArgs((PyObject *)self,
2632 _PyIO_str_readline, NULL);
2633 if (line && !PyUnicode_Check(line)) {
2634 PyErr_Format(PyExc_IOError,
2635 "readline() should have returned an str object, "
2636 "not '%.200s'", Py_TYPE(line)->tp_name);
2637 Py_DECREF(line);
2638 return NULL;
2639 }
2640 }
2641
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002642 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 return NULL;
2644
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002645 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002646 /* Reached EOF or would have blocked */
2647 Py_DECREF(line);
2648 Py_CLEAR(self->snapshot);
2649 self->telling = self->seekable;
2650 return NULL;
2651 }
2652
2653 return line;
2654}
2655
2656static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002657textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002658{
2659 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002660 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661}
2662
2663static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002664textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665{
2666 CHECK_INITIALIZED(self);
2667 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2668}
2669
2670static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002671textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002672{
2673 PyObject *res;
2674 CHECK_INITIALIZED(self);
2675 if (self->decoder == NULL)
2676 Py_RETURN_NONE;
2677 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2678 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002679 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2680 PyErr_Clear();
2681 Py_RETURN_NONE;
2682 }
2683 else {
2684 return NULL;
2685 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 }
2687 return res;
2688}
2689
2690static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002691textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002692{
2693 CHECK_INITIALIZED(self);
2694 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2695}
2696
2697static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699{
2700 CHECK_INITIALIZED(self);
2701 return PyLong_FromSsize_t(self->chunk_size);
2702}
2703
2704static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002705textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706{
2707 Py_ssize_t n;
2708 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002709 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002710 if (n == -1 && PyErr_Occurred())
2711 return -1;
2712 if (n <= 0) {
2713 PyErr_SetString(PyExc_ValueError,
2714 "a strictly positive integer is required");
2715 return -1;
2716 }
2717 self->chunk_size = n;
2718 return 0;
2719}
2720
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721static PyMethodDef textiowrapper_methods[] = {
2722 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2723 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2724 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2725 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2726 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2727 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002729 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2730 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2731 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2732 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2733 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002734 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002735
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002736 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2737 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2738 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002739 {NULL, NULL}
2740};
2741
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002742static PyMemberDef textiowrapper_members[] = {
2743 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2744 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2745 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002746 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002747 {NULL}
2748};
2749
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002750static PyGetSetDef textiowrapper_getset[] = {
2751 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2752 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2754*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002755 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2756 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2757 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2758 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002759 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760};
2761
2762PyTypeObject PyTextIOWrapper_Type = {
2763 PyVarObject_HEAD_INIT(NULL, 0)
2764 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002765 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002766 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002767 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768 0, /*tp_print*/
2769 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002770 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002771 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002772 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002773 0, /*tp_as_number*/
2774 0, /*tp_as_sequence*/
2775 0, /*tp_as_mapping*/
2776 0, /*tp_hash */
2777 0, /*tp_call*/
2778 0, /*tp_str*/
2779 0, /*tp_getattro*/
2780 0, /*tp_setattro*/
2781 0, /*tp_as_buffer*/
2782 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002783 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002784 textiowrapper_doc, /* tp_doc */
2785 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2786 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002787 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002788 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002790 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2791 textiowrapper_methods, /* tp_methods */
2792 textiowrapper_members, /* tp_members */
2793 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002794 0, /* tp_base */
2795 0, /* tp_dict */
2796 0, /* tp_descr_get */
2797 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002798 offsetof(textio, dict), /*tp_dictoffset*/
2799 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002800 0, /* tp_alloc */
2801 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002802 0, /* tp_free */
2803 0, /* tp_is_gc */
2804 0, /* tp_bases */
2805 0, /* tp_mro */
2806 0, /* tp_cache */
2807 0, /* tp_subclasses */
2808 0, /* tp_weaklist */
2809 0, /* tp_del */
2810 0, /* tp_version_tag */
2811 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002812};