blob: aa29ffb4136dcaa4decff3533759ea245ae860e0 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200277 if (PyUnicode_READY(output) == -1)
278 goto error;
279
280 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000281 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200282 /* Prefix output with CR */
283 int kind;
284 PyObject *modified;
285 char *out;
286
287 modified = PyUnicode_New(output_len + 1,
288 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289 if (modified == NULL)
290 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200291 kind = PyUnicode_KIND(modified);
292 out = PyUnicode_DATA(modified);
293 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200294 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000295 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200296 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297 self->pendingcr = 0;
298 output_len++;
299 }
300
301 /* retain last \r even when not translating data:
302 * then readline() is sure to get \r\n in one pass
303 */
304 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000305 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200306 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
307 {
308 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
309 if (modified == NULL)
310 goto error;
311 Py_DECREF(output);
312 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200320 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000325
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 in_str = PyUnicode_DATA(output);
327 len = PyUnicode_GET_LENGTH(output);
328 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000329
330 if (len == 0)
331 return output;
332
333 /* If, up to now, newlines are consistently \n, do a quick check
334 for the \r *byte* with the libc's optimized memchr.
335 */
336 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200337 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 }
339
Antoine Pitrou66913e22009-03-06 23:40:56 +0000340 if (only_lf) {
341 /* If not already seen, quick scan for a possible "\n" character.
342 (there's nothing else to be done, even when in translation mode)
343 */
344 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200345 memchr(in_str, '\n', kind * len) != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000347 for (;;) {
Victor Stinnerf7b8cb62011-09-29 03:28:17 +0200348 Py_UCS4 c;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000349 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200350 while (PyUnicode_READ(kind, in_str, i) > '\n')
351 i++;
352 c = PyUnicode_READ(kind, in_str, i++);
Antoine Pitrou66913e22009-03-06 23:40:56 +0000353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 if (i >= len)
Antoine Pitrou66913e22009-03-06 23:40:56 +0000358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200365 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200370 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 while (PyUnicode_READ(kind, in_str, i) > '\r')
373 i++;
374 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000375 if (c == '\n')
376 seennl |= SEEN_LF;
377 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200378 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000379 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200380 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 }
382 else
383 seennl |= SEEN_CR;
384 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200385 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000386 break;
387 if (seennl == SEEN_ALL)
388 break;
389 }
390 endscan:
391 ;
392 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200394 void *translated;
395 int kind = PyUnicode_KIND(output);
396 void *in_str = PyUnicode_DATA(output);
397 Py_ssize_t in, out;
398 /* XXX: Previous in-place translation here is disabled as
399 resizing is not possible anymore */
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 we already know there is a \r byte, so chances are high
403 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200404 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 if (translated == NULL) {
406 PyErr_NoMemory();
407 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200409 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
414 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200416 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 seennl |= SEEN_LF;
418 continue;
419 }
420 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200421 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000422 in++;
423 seennl |= SEEN_CRLF;
424 }
425 else
426 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 continue;
429 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000431 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 Py_DECREF(output);
435 output = PyUnicode_FromKindAndData(kind, translated, out);
436 if (!output)
437 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 }
439 self->seennl |= seennl;
440 }
441
442 return output;
443
444 error:
445 Py_DECREF(output);
446 return NULL;
447}
448
449static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000450incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 PyObject *args, PyObject *kwds)
452{
453 char *kwlist[] = {"input", "final", NULL};
454 PyObject *input;
455 int final = 0;
456
457 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
458 kwlist, &input, &final))
459 return NULL;
460 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
461}
462
463static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000464incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465{
466 PyObject *buffer;
467 unsigned PY_LONG_LONG flag;
468
469 if (self->decoder != Py_None) {
470 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
471 _PyIO_str_getstate, NULL);
472 if (state == NULL)
473 return NULL;
474 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
475 Py_DECREF(state);
476 return NULL;
477 }
478 Py_INCREF(buffer);
479 Py_DECREF(state);
480 }
481 else {
482 buffer = PyBytes_FromString("");
483 flag = 0;
484 }
485 flag <<= 1;
486 if (self->pendingcr)
487 flag |= 1;
488 return Py_BuildValue("NK", buffer, flag);
489}
490
491static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000492incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000493{
494 PyObject *buffer;
495 unsigned PY_LONG_LONG flag;
496
497 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
498 return NULL;
499
500 self->pendingcr = (int) flag & 1;
501 flag >>= 1;
502
503 if (self->decoder != Py_None)
504 return PyObject_CallMethod(self->decoder,
505 "setstate", "((OK))", buffer, flag);
506 else
507 Py_RETURN_NONE;
508}
509
510static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000511incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512{
513 self->seennl = 0;
514 self->pendingcr = 0;
515 if (self->decoder != Py_None)
516 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
517 else
518 Py_RETURN_NONE;
519}
520
521static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000522incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000523{
524 switch (self->seennl) {
525 case SEEN_CR:
526 return PyUnicode_FromString("\r");
527 case SEEN_LF:
528 return PyUnicode_FromString("\n");
529 case SEEN_CRLF:
530 return PyUnicode_FromString("\r\n");
531 case SEEN_CR | SEEN_LF:
532 return Py_BuildValue("ss", "\r", "\n");
533 case SEEN_CR | SEEN_CRLF:
534 return Py_BuildValue("ss", "\r", "\r\n");
535 case SEEN_LF | SEEN_CRLF:
536 return Py_BuildValue("ss", "\n", "\r\n");
537 case SEEN_CR | SEEN_LF | SEEN_CRLF:
538 return Py_BuildValue("sss", "\r", "\n", "\r\n");
539 default:
540 Py_RETURN_NONE;
541 }
542
543}
544
545
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000546static PyMethodDef incrementalnewlinedecoder_methods[] = {
547 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
548 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
549 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
550 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000551 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000552};
553
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000554static PyGetSetDef incrementalnewlinedecoder_getset[] = {
555 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000556 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557};
558
559PyTypeObject PyIncrementalNewlineDecoder_Type = {
560 PyVarObject_HEAD_INIT(NULL, 0)
561 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000562 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000563 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000564 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565 0, /*tp_print*/
566 0, /*tp_getattr*/
567 0, /*tp_setattr*/
568 0, /*tp_compare */
569 0, /*tp_repr*/
570 0, /*tp_as_number*/
571 0, /*tp_as_sequence*/
572 0, /*tp_as_mapping*/
573 0, /*tp_hash */
574 0, /*tp_call*/
575 0, /*tp_str*/
576 0, /*tp_getattro*/
577 0, /*tp_setattro*/
578 0, /*tp_as_buffer*/
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 0, /* tp_traverse */
582 0, /* tp_clear */
583 0, /* tp_richcompare */
584 0, /*tp_weaklistoffset*/
585 0, /* tp_iter */
586 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000587 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000589 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 0, /* tp_base */
591 0, /* tp_dict */
592 0, /* tp_descr_get */
593 0, /* tp_descr_set */
594 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000595 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000596 0, /* tp_alloc */
597 PyType_GenericNew, /* tp_new */
598};
599
600
601/* TextIOWrapper */
602
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604 "Character and line based layer over a BufferedIOBase object, buffer.\n"
605 "\n"
606 "encoding gives the name of the encoding that the stream will be\n"
607 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
608 "\n"
609 "errors determines the strictness of encoding and decoding (see the\n"
610 "codecs.register) and defaults to \"strict\".\n"
611 "\n"
612 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
613 "handling of line endings. If it is None, universal newlines is\n"
614 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
615 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
616 "caller. Conversely, on output, '\\n' is translated to the system\n"
617 "default line seperator, os.linesep. If newline is any other of its\n"
618 "legal values, that newline becomes the newline when the file is read\n"
619 "and it is returned untranslated. On output, '\\n' is converted to the\n"
620 "newline.\n"
621 "\n"
622 "If line_buffering is True, a call to flush is implied when a call to\n"
623 "write contains a newline character."
624 );
625
626typedef PyObject *
627 (*encodefunc_t)(PyObject *, PyObject *);
628
629typedef struct
630{
631 PyObject_HEAD
632 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000633 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 Py_ssize_t chunk_size;
635 PyObject *buffer;
636 PyObject *encoding;
637 PyObject *encoder;
638 PyObject *decoder;
639 PyObject *readnl;
640 PyObject *errors;
641 const char *writenl; /* utf-8 encoded, NULL stands for \n */
642 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200643 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 char readuniversal;
645 char readtranslate;
646 char writetranslate;
647 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200648 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000649 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000650 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 /* Specialized encoding func (see below) */
652 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000653 /* Whether or not it's the start of the stream */
654 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655
656 /* Reads and writes are internally buffered in order to speed things up.
657 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000658
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 Please also note that text to be written is first encoded before being
660 buffered. This is necessary so that encoding errors are immediately
661 reported to the caller, but it unfortunately means that the
662 IncrementalEncoder (whose encode() method is always written in Python)
663 becomes a bottleneck for small writes.
664 */
665 PyObject *decoded_chars; /* buffer for text returned from decoder */
666 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
667 PyObject *pending_bytes; /* list of bytes objects waiting to be
668 written, or NULL */
669 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000670
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 /* snapshot is either None, or a tuple (dec_flags, next_input) where
672 * dec_flags is the second (integer) item of the decoder state and
673 * next_input is the chunk of input bytes that comes next after the
674 * snapshot point. We use this to reconstruct decoder states in tell().
675 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000676 PyObject *snapshot;
677 /* Bytes-to-characters ratio for the current chunk. Serves as input for
678 the heuristic in tell(). */
679 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680
681 /* Cache raw object if it's a FileIO object */
682 PyObject *raw;
683
684 PyObject *weakreflist;
685 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000686} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000687
688
689/* A couple of specialized cases in order to bypass the slow incremental
690 encoding methods for the most popular encodings. */
691
692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000693ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200695 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696}
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
701 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors), 1);
704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), -1);
712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
Antoine Pitroue4501852009-05-14 18:55:55 +0000717 if (!self->encoding_start_of_stream) {
718 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000720 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000722 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000724 }
725 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
726 PyUnicode_GET_SIZE(text),
727 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728}
729
Antoine Pitroue4501852009-05-14 18:55:55 +0000730static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000731utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000732{
733 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 1);
736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), -1);
744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
749 if (!self->encoding_start_of_stream) {
750 /* Skip the BOM and use native byte ordering */
751#if defined(WORDS_BIGENDIAN)
752 return utf32be_encode(self, text);
753#else
754 return utf32le_encode(self, text);
755#endif
756 }
757 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
758 PyUnicode_GET_SIZE(text),
759 PyBytes_AS_STRING(self->errors), 0);
760}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000761
762static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000763utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200765 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766}
767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200771 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772}
773
774/* Map normalized encoding names onto the specialized encoding funcs */
775
776typedef struct {
777 const char *name;
778 encodefunc_t encodefunc;
779} encodefuncentry;
780
Antoine Pitrou24f36292009-03-28 22:16:42 +0000781static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782 {"ascii", (encodefunc_t) ascii_encode},
783 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000784 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785 {"utf-16-be", (encodefunc_t) utf16be_encode},
786 {"utf-16-le", (encodefunc_t) utf16le_encode},
787 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000788 {"utf-32-be", (encodefunc_t) utf32be_encode},
789 {"utf-32-le", (encodefunc_t) utf32le_encode},
790 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791 {NULL, NULL}
792};
793
794
795static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
798 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200799 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 NULL};
801 PyObject *buffer, *raw;
802 char *encoding = NULL;
803 char *errors = NULL;
804 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200805 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 _PyIO_State *state = IO_STATE;
807
808 PyObject *res;
809 int r;
810
811 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000812 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200813 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000814 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200815 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000816 return -1;
817
818 if (newline && newline[0] != '\0'
819 && !(newline[0] == '\n' && newline[1] == '\0')
820 && !(newline[0] == '\r' && newline[1] == '\0')
821 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
822 PyErr_Format(PyExc_ValueError,
823 "illegal newline value: %s", newline);
824 return -1;
825 }
826
827 Py_CLEAR(self->buffer);
828 Py_CLEAR(self->encoding);
829 Py_CLEAR(self->encoder);
830 Py_CLEAR(self->decoder);
831 Py_CLEAR(self->readnl);
832 Py_CLEAR(self->decoded_chars);
833 Py_CLEAR(self->pending_bytes);
834 Py_CLEAR(self->snapshot);
835 Py_CLEAR(self->errors);
836 Py_CLEAR(self->raw);
837 self->decoded_chars_used = 0;
838 self->pending_bytes_count = 0;
839 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000840 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841
842 if (encoding == NULL) {
843 /* Try os.device_encoding(fileno) */
844 PyObject *fileno;
845 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
846 /* Ignore only AttributeError and UnsupportedOperation */
847 if (fileno == NULL) {
848 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
849 PyErr_ExceptionMatches(state->unsupported_operation)) {
850 PyErr_Clear();
851 }
852 else {
853 goto error;
854 }
855 }
856 else {
857 self->encoding = PyObject_CallMethod(state->os_module,
858 "device_encoding",
859 "N", fileno);
860 if (self->encoding == NULL)
861 goto error;
862 else if (!PyUnicode_Check(self->encoding))
863 Py_CLEAR(self->encoding);
864 }
865 }
866 if (encoding == NULL && self->encoding == NULL) {
867 if (state->locale_module == NULL) {
868 state->locale_module = PyImport_ImportModule("locale");
869 if (state->locale_module == NULL)
870 goto catch_ImportError;
871 else
872 goto use_locale;
873 }
874 else {
875 use_locale:
876 self->encoding = PyObject_CallMethod(
877 state->locale_module, "getpreferredencoding", NULL);
878 if (self->encoding == NULL) {
879 catch_ImportError:
880 /*
881 Importing locale can raise a ImportError because of
882 _functools, and locale.getpreferredencoding can raise a
883 ImportError if _locale is not available. These will happen
884 during module building.
885 */
886 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
887 PyErr_Clear();
888 self->encoding = PyUnicode_FromString("ascii");
889 }
890 else
891 goto error;
892 }
893 else if (!PyUnicode_Check(self->encoding))
894 Py_CLEAR(self->encoding);
895 }
896 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000897 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000898 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000899 if (encoding == NULL)
900 goto error;
901 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 else if (encoding != NULL) {
903 self->encoding = PyUnicode_FromString(encoding);
904 if (self->encoding == NULL)
905 goto error;
906 }
907 else {
908 PyErr_SetString(PyExc_IOError,
909 "could not determine default encoding");
910 }
911
912 if (errors == NULL)
913 errors = "strict";
914 self->errors = PyBytes_FromString(errors);
915 if (self->errors == NULL)
916 goto error;
917
918 self->chunk_size = 8192;
919 self->readuniversal = (newline == NULL || newline[0] == '\0');
920 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200921 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 self->readtranslate = (newline == NULL);
923 if (newline) {
924 self->readnl = PyUnicode_FromString(newline);
925 if (self->readnl == NULL)
926 return -1;
927 }
928 self->writetranslate = (newline == NULL || newline[0] != '\0');
929 if (!self->readuniversal && self->readnl) {
930 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000931 if (self->writenl == NULL)
932 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 if (!strcmp(self->writenl, "\n"))
934 self->writenl = NULL;
935 }
936#ifdef MS_WINDOWS
937 else
938 self->writenl = "\r\n";
939#endif
940
941 /* Build the decoder object */
942 res = PyObject_CallMethod(buffer, "readable", NULL);
943 if (res == NULL)
944 goto error;
945 r = PyObject_IsTrue(res);
946 Py_DECREF(res);
947 if (r == -1)
948 goto error;
949 if (r == 1) {
950 self->decoder = PyCodec_IncrementalDecoder(
951 encoding, errors);
952 if (self->decoder == NULL)
953 goto error;
954
955 if (self->readuniversal) {
956 PyObject *incrementalDecoder = PyObject_CallFunction(
957 (PyObject *)&PyIncrementalNewlineDecoder_Type,
958 "Oi", self->decoder, (int)self->readtranslate);
959 if (incrementalDecoder == NULL)
960 goto error;
961 Py_CLEAR(self->decoder);
962 self->decoder = incrementalDecoder;
963 }
964 }
965
966 /* Build the encoder object */
967 res = PyObject_CallMethod(buffer, "writable", NULL);
968 if (res == NULL)
969 goto error;
970 r = PyObject_IsTrue(res);
971 Py_DECREF(res);
972 if (r == -1)
973 goto error;
974 if (r == 1) {
975 PyObject *ci;
976 self->encoder = PyCodec_IncrementalEncoder(
977 encoding, errors);
978 if (self->encoder == NULL)
979 goto error;
980 /* Get the normalized named of the codec */
981 ci = _PyCodec_Lookup(encoding);
982 if (ci == NULL)
983 goto error;
984 res = PyObject_GetAttrString(ci, "name");
985 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000986 if (res == NULL) {
987 if (PyErr_ExceptionMatches(PyExc_AttributeError))
988 PyErr_Clear();
989 else
990 goto error;
991 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992 else if (PyUnicode_Check(res)) {
993 encodefuncentry *e = encodefuncs;
994 while (e->name != NULL) {
995 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
996 self->encodefunc = e->encodefunc;
997 break;
998 }
999 e++;
1000 }
1001 }
1002 Py_XDECREF(res);
1003 }
1004
1005 self->buffer = buffer;
1006 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001007
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001008 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1009 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1010 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1011 raw = PyObject_GetAttrString(buffer, "raw");
1012 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001013 if (raw == NULL) {
1014 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1015 PyErr_Clear();
1016 else
1017 goto error;
1018 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001019 else if (Py_TYPE(raw) == &PyFileIO_Type)
1020 self->raw = raw;
1021 else
1022 Py_DECREF(raw);
1023 }
1024
1025 res = PyObject_CallMethod(buffer, "seekable", NULL);
1026 if (res == NULL)
1027 goto error;
1028 self->seekable = self->telling = PyObject_IsTrue(res);
1029 Py_DECREF(res);
1030
Antoine Pitroue96ec682011-07-23 21:46:35 +02001031 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1032
Antoine Pitroue4501852009-05-14 18:55:55 +00001033 self->encoding_start_of_stream = 0;
1034 if (self->seekable && self->encoder) {
1035 PyObject *cookieObj;
1036 int cmp;
1037
1038 self->encoding_start_of_stream = 1;
1039
1040 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1041 if (cookieObj == NULL)
1042 goto error;
1043
1044 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1045 Py_DECREF(cookieObj);
1046 if (cmp < 0) {
1047 goto error;
1048 }
1049
1050 if (cmp == 0) {
1051 self->encoding_start_of_stream = 0;
1052 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1053 _PyIO_zero, NULL);
1054 if (res == NULL)
1055 goto error;
1056 Py_DECREF(res);
1057 }
1058 }
1059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 self->ok = 1;
1061 return 0;
1062
1063 error:
1064 return -1;
1065}
1066
1067static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001068_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069{
1070 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1071 return -1;
1072 self->ok = 0;
1073 Py_CLEAR(self->buffer);
1074 Py_CLEAR(self->encoding);
1075 Py_CLEAR(self->encoder);
1076 Py_CLEAR(self->decoder);
1077 Py_CLEAR(self->readnl);
1078 Py_CLEAR(self->decoded_chars);
1079 Py_CLEAR(self->pending_bytes);
1080 Py_CLEAR(self->snapshot);
1081 Py_CLEAR(self->errors);
1082 Py_CLEAR(self->raw);
1083 return 0;
1084}
1085
1086static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001087textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088{
Antoine Pitroue033e062010-10-29 10:38:18 +00001089 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001090 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091 return;
1092 _PyObject_GC_UNTRACK(self);
1093 if (self->weakreflist != NULL)
1094 PyObject_ClearWeakRefs((PyObject *)self);
1095 Py_CLEAR(self->dict);
1096 Py_TYPE(self)->tp_free((PyObject *)self);
1097}
1098
1099static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001100textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001101{
1102 Py_VISIT(self->buffer);
1103 Py_VISIT(self->encoding);
1104 Py_VISIT(self->encoder);
1105 Py_VISIT(self->decoder);
1106 Py_VISIT(self->readnl);
1107 Py_VISIT(self->decoded_chars);
1108 Py_VISIT(self->pending_bytes);
1109 Py_VISIT(self->snapshot);
1110 Py_VISIT(self->errors);
1111 Py_VISIT(self->raw);
1112
1113 Py_VISIT(self->dict);
1114 return 0;
1115}
1116
1117static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001118textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001120 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121 return -1;
1122 Py_CLEAR(self->dict);
1123 return 0;
1124}
1125
1126static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001127textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128
1129/* This macro takes some shortcuts to make the common case faster. */
1130#define CHECK_CLOSED(self) \
1131 do { \
1132 int r; \
1133 PyObject *_res; \
1134 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1135 if (self->raw != NULL) \
1136 r = _PyFileIO_closed(self->raw); \
1137 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001138 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 if (_res == NULL) \
1140 return NULL; \
1141 r = PyObject_IsTrue(_res); \
1142 Py_DECREF(_res); \
1143 if (r < 0) \
1144 return NULL; \
1145 } \
1146 if (r > 0) { \
1147 PyErr_SetString(PyExc_ValueError, \
1148 "I/O operation on closed file."); \
1149 return NULL; \
1150 } \
1151 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001152 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001153 return NULL; \
1154 } while (0)
1155
1156#define CHECK_INITIALIZED(self) \
1157 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001158 if (self->detached) { \
1159 PyErr_SetString(PyExc_ValueError, \
1160 "underlying buffer has been detached"); \
1161 } else { \
1162 PyErr_SetString(PyExc_ValueError, \
1163 "I/O operation on uninitialized object"); \
1164 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 return NULL; \
1166 }
1167
1168#define CHECK_INITIALIZED_INT(self) \
1169 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001170 if (self->detached) { \
1171 PyErr_SetString(PyExc_ValueError, \
1172 "underlying buffer has been detached"); \
1173 } else { \
1174 PyErr_SetString(PyExc_ValueError, \
1175 "I/O operation on uninitialized object"); \
1176 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 return -1; \
1178 }
1179
1180
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001181static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001183{
1184 PyObject *buffer, *res;
1185 CHECK_INITIALIZED(self);
1186 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1187 if (res == NULL)
1188 return NULL;
1189 Py_DECREF(res);
1190 buffer = self->buffer;
1191 self->buffer = NULL;
1192 self->detached = 1;
1193 self->ok = 0;
1194 return buffer;
1195}
1196
Antoine Pitrou24f36292009-03-28 22:16:42 +00001197/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 underlying buffered object, though. */
1199static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001200_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001202 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203
1204 if (self->pending_bytes == NULL)
1205 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001206
1207 pending = self->pending_bytes;
1208 Py_INCREF(pending);
1209 self->pending_bytes_count = 0;
1210 Py_CLEAR(self->pending_bytes);
1211
1212 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1213 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 if (b == NULL)
1215 return -1;
1216 ret = PyObject_CallMethodObjArgs(self->buffer,
1217 _PyIO_str_write, b, NULL);
1218 Py_DECREF(b);
1219 if (ret == NULL)
1220 return -1;
1221 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 return 0;
1223}
1224
1225static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001226textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227{
1228 PyObject *ret;
1229 PyObject *text; /* owned reference */
1230 PyObject *b;
1231 Py_ssize_t textlen;
1232 int haslf = 0;
1233 int needflush = 0;
1234
1235 CHECK_INITIALIZED(self);
1236
1237 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1238 return NULL;
1239 }
1240
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001241 if (PyUnicode_READY(text) == -1)
1242 return NULL;
1243
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244 CHECK_CLOSED(self);
1245
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001246 if (self->encoder == NULL)
1247 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001248
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001249 Py_INCREF(text);
1250
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001251 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001252
1253 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001254 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 haslf = 1;
1256
1257 if (haslf && self->writetranslate && self->writenl != NULL) {
1258 PyObject *newtext = PyObject_CallMethod(
1259 text, "replace", "ss", "\n", self->writenl);
1260 Py_DECREF(text);
1261 if (newtext == NULL)
1262 return NULL;
1263 text = newtext;
1264 }
1265
Antoine Pitroue96ec682011-07-23 21:46:35 +02001266 if (self->write_through)
1267 needflush = 1;
1268 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001270 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 needflush = 1;
1272
1273 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001274 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001276 self->encoding_start_of_stream = 0;
1277 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 else
1279 b = PyObject_CallMethodObjArgs(self->encoder,
1280 _PyIO_str_encode, text, NULL);
1281 Py_DECREF(text);
1282 if (b == NULL)
1283 return NULL;
1284
1285 if (self->pending_bytes == NULL) {
1286 self->pending_bytes = PyList_New(0);
1287 if (self->pending_bytes == NULL) {
1288 Py_DECREF(b);
1289 return NULL;
1290 }
1291 self->pending_bytes_count = 0;
1292 }
1293 if (PyList_Append(self->pending_bytes, b) < 0) {
1294 Py_DECREF(b);
1295 return NULL;
1296 }
1297 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1298 Py_DECREF(b);
1299 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001300 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 return NULL;
1302 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001303
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304 if (needflush) {
1305 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1306 if (ret == NULL)
1307 return NULL;
1308 Py_DECREF(ret);
1309 }
1310
1311 Py_CLEAR(self->snapshot);
1312
1313 if (self->decoder) {
1314 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1315 if (ret == NULL)
1316 return NULL;
1317 Py_DECREF(ret);
1318 }
1319
1320 return PyLong_FromSsize_t(textlen);
1321}
1322
1323/* Steal a reference to chars and store it in the decoded_char buffer;
1324 */
1325static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001326textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327{
1328 Py_CLEAR(self->decoded_chars);
1329 self->decoded_chars = chars;
1330 self->decoded_chars_used = 0;
1331}
1332
1333static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001334textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335{
1336 PyObject *chars;
1337 Py_ssize_t avail;
1338
1339 if (self->decoded_chars == NULL)
1340 return PyUnicode_FromStringAndSize(NULL, 0);
1341
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001342 /* decoded_chars is guaranteed to be "ready". */
1343 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001344 - self->decoded_chars_used);
1345
1346 assert(avail >= 0);
1347
1348 if (n < 0 || n > avail)
1349 n = avail;
1350
1351 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001352 chars = PyUnicode_Substring(self->decoded_chars,
1353 self->decoded_chars_used,
1354 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001355 if (chars == NULL)
1356 return NULL;
1357 }
1358 else {
1359 chars = self->decoded_chars;
1360 Py_INCREF(chars);
1361 }
1362
1363 self->decoded_chars_used += n;
1364 return chars;
1365}
1366
1367/* Read and decode the next chunk of data from the BufferedReader.
1368 */
1369static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001370textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371{
1372 PyObject *dec_buffer = NULL;
1373 PyObject *dec_flags = NULL;
1374 PyObject *input_chunk = NULL;
1375 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001376 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 int eof;
1378
1379 /* The return value is True unless EOF was reached. The decoded string is
1380 * placed in self._decoded_chars (replacing its previous value). The
1381 * entire input chunk is sent to the decoder, though some of it may remain
1382 * buffered in the decoder, yet to be converted.
1383 */
1384
1385 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001386 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001387 return -1;
1388 }
1389
1390 if (self->telling) {
1391 /* To prepare for tell(), we need to snapshot a point in the file
1392 * where the decoder's input buffer is empty.
1393 */
1394
1395 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1396 _PyIO_str_getstate, NULL);
1397 if (state == NULL)
1398 return -1;
1399 /* Given this, we know there was a valid snapshot point
1400 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1401 */
1402 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1403 Py_DECREF(state);
1404 return -1;
1405 }
1406 Py_INCREF(dec_buffer);
1407 Py_INCREF(dec_flags);
1408 Py_DECREF(state);
1409 }
1410
1411 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1412 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1413 if (chunk_size == NULL)
1414 goto fail;
1415 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001416 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1417 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418 Py_DECREF(chunk_size);
1419 if (input_chunk == NULL)
1420 goto fail;
1421 assert(PyBytes_Check(input_chunk));
1422
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001423 nbytes = PyBytes_Size(input_chunk);
1424 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425
1426 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1427 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1428 self->decoder, input_chunk, eof);
1429 }
1430 else {
1431 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1432 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1433 }
1434
1435 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1436 if (decoded_chars == NULL)
1437 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001438 if (PyUnicode_READY(decoded_chars) == -1)
1439 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001440 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001441 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001442 if (nchars > 0)
1443 self->b2cratio = (double) nbytes / nchars;
1444 else
1445 self->b2cratio = 0.0;
1446 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447 eof = 0;
1448
1449 if (self->telling) {
1450 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1451 * next input to be decoded is dec_buffer + input_chunk.
1452 */
1453 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1454 if (next_input == NULL)
1455 goto fail;
1456 assert (PyBytes_Check(next_input));
1457 Py_DECREF(dec_buffer);
1458 Py_CLEAR(self->snapshot);
1459 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1460 }
1461 Py_DECREF(input_chunk);
1462
1463 return (eof == 0);
1464
1465 fail:
1466 Py_XDECREF(dec_buffer);
1467 Py_XDECREF(dec_flags);
1468 Py_XDECREF(input_chunk);
1469 return -1;
1470}
1471
1472static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001473textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474{
1475 Py_ssize_t n = -1;
1476 PyObject *result = NULL, *chunks = NULL;
1477
1478 CHECK_INITIALIZED(self);
1479
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001480 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481 return NULL;
1482
1483 CHECK_CLOSED(self);
1484
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001485 if (self->decoder == NULL)
1486 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001487
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001488 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001489 return NULL;
1490
1491 if (n < 0) {
1492 /* Read everything */
1493 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1494 PyObject *decoded;
1495 if (bytes == NULL)
1496 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001497
1498 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1499 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1500 bytes, 1);
1501 else
1502 decoded = PyObject_CallMethodObjArgs(
1503 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001504 Py_DECREF(bytes);
1505 if (decoded == NULL)
1506 goto fail;
1507
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001508 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001509
1510 if (result == NULL) {
1511 Py_DECREF(decoded);
1512 return NULL;
1513 }
1514
1515 PyUnicode_AppendAndDel(&result, decoded);
1516 if (result == NULL)
1517 goto fail;
1518
1519 Py_CLEAR(self->snapshot);
1520 return result;
1521 }
1522 else {
1523 int res = 1;
1524 Py_ssize_t remaining = n;
1525
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001526 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527 if (result == NULL)
1528 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001529 if (PyUnicode_READY(result) == -1)
1530 goto fail;
1531 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001532
1533 /* Keep reading chunks until we have n characters to return */
1534 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536 if (res < 0)
1537 goto fail;
1538 if (res == 0) /* EOF */
1539 break;
1540 if (chunks == NULL) {
1541 chunks = PyList_New(0);
1542 if (chunks == NULL)
1543 goto fail;
1544 }
1545 if (PyList_Append(chunks, result) < 0)
1546 goto fail;
1547 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 if (result == NULL)
1550 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001551 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001552 }
1553 if (chunks != NULL) {
1554 if (result != NULL && PyList_Append(chunks, result) < 0)
1555 goto fail;
1556 Py_CLEAR(result);
1557 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1558 if (result == NULL)
1559 goto fail;
1560 Py_CLEAR(chunks);
1561 }
1562 return result;
1563 }
1564 fail:
1565 Py_XDECREF(result);
1566 Py_XDECREF(chunks);
1567 return NULL;
1568}
1569
1570
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001571/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572 that is to the NUL character. Otherwise the function will produce
1573 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001574static char *
1575find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001578 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001579 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001580 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581 return s;
1582 if (s == end)
1583 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001584 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001585 }
1586}
1587
1588Py_ssize_t
1589_PyIO_find_line_ending(
1590 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001591 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001593 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594
1595 if (translated) {
1596 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001597 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001599 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001600 else {
1601 *consumed = len;
1602 return -1;
1603 }
1604 }
1605 else if (universal) {
1606 /* Universal newline search. Find any of \r, \r\n, \n
1607 * The decoder ensures that \r\n are not split in two pieces
1608 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001609 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001611 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001613 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001614 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001615 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 if (s >= end) {
1617 *consumed = len;
1618 return -1;
1619 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001620 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001621 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001622 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001623 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001625 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001626 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001628 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 }
1630 }
1631 }
1632 else {
1633 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001634 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1635 char *nl = PyUnicode_DATA(readnl);
1636 /* Assume that readnl is an ASCII character. */
1637 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001641 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642 *consumed = len;
1643 return -1;
1644 }
1645 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001646 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001647 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001648 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 if (e < s)
1650 e = s;
1651 while (s < e) {
1652 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001653 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 if (pos == NULL || pos >= e)
1655 break;
1656 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 break;
1659 }
1660 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001661 return (pos - start)/kind + readnl_len;
1662 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001664 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665 if (pos == NULL)
1666 *consumed = len;
1667 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001668 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 return -1;
1670 }
1671 }
1672}
1673
1674static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001675_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676{
1677 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1678 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1679 int res;
1680
1681 CHECK_CLOSED(self);
1682
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001683 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 return NULL;
1685
1686 chunked = 0;
1687
1688 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001689 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 Py_ssize_t consumed = 0;
1693
1694 /* First, get some data if necessary */
1695 res = 1;
1696 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001697 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001698 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 if (res < 0)
1700 goto error;
1701 if (res == 0)
1702 break;
1703 }
1704 if (res == 0) {
1705 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001706 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 Py_CLEAR(self->snapshot);
1708 start = endpos = offset_to_buffer = 0;
1709 break;
1710 }
1711
1712 if (remaining == NULL) {
1713 line = self->decoded_chars;
1714 start = self->decoded_chars_used;
1715 offset_to_buffer = 0;
1716 Py_INCREF(line);
1717 }
1718 else {
1719 assert(self->decoded_chars_used == 0);
1720 line = PyUnicode_Concat(remaining, self->decoded_chars);
1721 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001722 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 Py_CLEAR(remaining);
1724 if (line == NULL)
1725 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001726 if (PyUnicode_READY(line) == -1)
1727 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 }
1729
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001730 ptr = PyUnicode_DATA(line);
1731 line_len = PyUnicode_GET_LENGTH(line);
1732 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001733
1734 endpos = _PyIO_find_line_ending(
1735 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001736 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001737 ptr + kind * start,
1738 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001739 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 if (endpos >= 0) {
1741 endpos += start;
1742 if (limit >= 0 && (endpos - start) + chunked >= limit)
1743 endpos = start + limit - chunked;
1744 break;
1745 }
1746
1747 /* We can put aside up to `endpos` */
1748 endpos = consumed + start;
1749 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1750 /* Didn't find line ending, but reached length limit */
1751 endpos = start + limit - chunked;
1752 break;
1753 }
1754
1755 if (endpos > start) {
1756 /* No line ending seen yet - put aside current data */
1757 PyObject *s;
1758 if (chunks == NULL) {
1759 chunks = PyList_New(0);
1760 if (chunks == NULL)
1761 goto error;
1762 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 if (s == NULL)
1765 goto error;
1766 if (PyList_Append(chunks, s) < 0) {
1767 Py_DECREF(s);
1768 goto error;
1769 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 Py_DECREF(s);
1772 }
1773 /* There may be some remaining bytes we'll have to prepend to the
1774 next chunk of data */
1775 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001776 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 if (remaining == NULL)
1778 goto error;
1779 }
1780 Py_CLEAR(line);
1781 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001782 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 }
1784
1785 if (line != NULL) {
1786 /* Our line ends in the current buffer */
1787 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001788 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1789 PyObject *s = PyUnicode_Substring(line, start, endpos);
1790 Py_CLEAR(line);
1791 if (s == NULL)
1792 goto error;
1793 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 }
1795 }
1796 if (remaining != NULL) {
1797 if (chunks == NULL) {
1798 chunks = PyList_New(0);
1799 if (chunks == NULL)
1800 goto error;
1801 }
1802 if (PyList_Append(chunks, remaining) < 0)
1803 goto error;
1804 Py_CLEAR(remaining);
1805 }
1806 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001807 if (line != NULL) {
1808 if (PyList_Append(chunks, line) < 0)
1809 goto error;
1810 Py_DECREF(line);
1811 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1813 if (line == NULL)
1814 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001815 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 if (line == NULL) {
1818 Py_INCREF(_PyIO_empty_str);
1819 line = _PyIO_empty_str;
1820 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001821
1822 return line;
1823
1824 error:
1825 Py_XDECREF(chunks);
1826 Py_XDECREF(remaining);
1827 Py_XDECREF(line);
1828 return NULL;
1829}
1830
1831static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001832textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833{
1834 Py_ssize_t limit = -1;
1835
1836 CHECK_INITIALIZED(self);
1837 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1838 return NULL;
1839 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001840 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841}
1842
1843/* Seek and Tell */
1844
1845typedef struct {
1846 Py_off_t start_pos;
1847 int dec_flags;
1848 int bytes_to_feed;
1849 int chars_to_skip;
1850 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001851} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001852
1853/*
1854 To speed up cookie packing/unpacking, we store the fields in a temporary
1855 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1856 The following macros define at which offsets in the intermediary byte
1857 string the various CookieStruct fields will be stored.
1858 */
1859
1860#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1861
1862#if defined(WORDS_BIGENDIAN)
1863
1864# define IS_LITTLE_ENDIAN 0
1865
1866/* We want the least significant byte of start_pos to also be the least
1867 significant byte of the cookie, which means that in big-endian mode we
1868 must copy the fields in reverse order. */
1869
1870# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1871# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1872# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1873# define OFF_CHARS_TO_SKIP (sizeof(char))
1874# define OFF_NEED_EOF 0
1875
1876#else
1877
1878# define IS_LITTLE_ENDIAN 1
1879
1880/* Little-endian mode: the least significant byte of start_pos will
1881 naturally end up the least significant byte of the cookie. */
1882
1883# define OFF_START_POS 0
1884# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1885# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1886# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1887# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1888
1889#endif
1890
1891static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001892textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893{
1894 unsigned char buffer[COOKIE_BUF_LEN];
1895 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1896 if (cookieLong == NULL)
1897 return -1;
1898
1899 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1900 IS_LITTLE_ENDIAN, 0) < 0) {
1901 Py_DECREF(cookieLong);
1902 return -1;
1903 }
1904 Py_DECREF(cookieLong);
1905
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001906 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1907 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1908 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1909 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1910 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911
1912 return 0;
1913}
1914
1915static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001916textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917{
1918 unsigned char buffer[COOKIE_BUF_LEN];
1919
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001920 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1921 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1922 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1923 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1924 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925
1926 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1927}
1928#undef IS_LITTLE_ENDIAN
1929
1930static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001931_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001932{
1933 PyObject *res;
1934 /* When seeking to the start of the stream, we call decoder.reset()
1935 rather than decoder.getstate().
1936 This is for a few decoders such as utf-16 for which the state value
1937 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1938 utf-16, that we are expecting a BOM).
1939 */
1940 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1941 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1942 else
1943 res = PyObject_CallMethod(self->decoder, "setstate",
1944 "((yi))", "", cookie->dec_flags);
1945 if (res == NULL)
1946 return -1;
1947 Py_DECREF(res);
1948 return 0;
1949}
1950
Antoine Pitroue4501852009-05-14 18:55:55 +00001951static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001952_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001953{
1954 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001955 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001956 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1957 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1958 self->encoding_start_of_stream = 1;
1959 }
1960 else {
1961 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1962 _PyIO_zero, NULL);
1963 self->encoding_start_of_stream = 0;
1964 }
1965 if (res == NULL)
1966 return -1;
1967 Py_DECREF(res);
1968 return 0;
1969}
1970
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001972textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973{
1974 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001975 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 PyObject *res;
1978 int cmp;
1979
1980 CHECK_INITIALIZED(self);
1981
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1983 return NULL;
1984 CHECK_CLOSED(self);
1985
1986 Py_INCREF(cookieObj);
1987
1988 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001989 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001990 goto fail;
1991 }
1992
1993 if (whence == 1) {
1994 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001995 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996 if (cmp < 0)
1997 goto fail;
1998
1999 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002000 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001 goto fail;
2002 }
2003
2004 /* Seeking to the current position should attempt to
2005 * sync the underlying buffer with the current position.
2006 */
2007 Py_DECREF(cookieObj);
2008 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2009 if (cookieObj == NULL)
2010 goto fail;
2011 }
2012 else if (whence == 2) {
2013 /* seek relative to end of file */
2014
Antoine Pitroue4501852009-05-14 18:55:55 +00002015 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 if (cmp < 0)
2017 goto fail;
2018
2019 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002020 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 goto fail;
2022 }
2023
2024 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2025 if (res == NULL)
2026 goto fail;
2027 Py_DECREF(res);
2028
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 Py_CLEAR(self->snapshot);
2031 if (self->decoder) {
2032 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2033 if (res == NULL)
2034 goto fail;
2035 Py_DECREF(res);
2036 }
2037
2038 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2039 Py_XDECREF(cookieObj);
2040 return res;
2041 }
2042 else if (whence != 0) {
2043 PyErr_Format(PyExc_ValueError,
2044 "invalid whence (%d, should be 0, 1 or 2)", whence);
2045 goto fail;
2046 }
2047
Antoine Pitroue4501852009-05-14 18:55:55 +00002048 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 if (cmp < 0)
2050 goto fail;
2051
2052 if (cmp == 1) {
2053 PyErr_Format(PyExc_ValueError,
2054 "negative seek position %R", cookieObj);
2055 goto fail;
2056 }
2057
2058 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2059 if (res == NULL)
2060 goto fail;
2061 Py_DECREF(res);
2062
2063 /* The strategy of seek() is to go back to the safe start point
2064 * and replay the effect of read(chars_to_skip) from there.
2065 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002066 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 goto fail;
2068
2069 /* Seek back to the safe start point. */
2070 posobj = PyLong_FromOff_t(cookie.start_pos);
2071 if (posobj == NULL)
2072 goto fail;
2073 res = PyObject_CallMethodObjArgs(self->buffer,
2074 _PyIO_str_seek, posobj, NULL);
2075 Py_DECREF(posobj);
2076 if (res == NULL)
2077 goto fail;
2078 Py_DECREF(res);
2079
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002080 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 Py_CLEAR(self->snapshot);
2082
2083 /* Restore the decoder to its state from the safe start point. */
2084 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002085 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 goto fail;
2087 }
2088
2089 if (cookie.chars_to_skip) {
2090 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2091 PyObject *input_chunk = PyObject_CallMethod(
2092 self->buffer, "read", "i", cookie.bytes_to_feed);
2093 PyObject *decoded;
2094
2095 if (input_chunk == NULL)
2096 goto fail;
2097
2098 assert (PyBytes_Check(input_chunk));
2099
2100 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2101 if (self->snapshot == NULL) {
2102 Py_DECREF(input_chunk);
2103 goto fail;
2104 }
2105
2106 decoded = PyObject_CallMethod(self->decoder, "decode",
2107 "Oi", input_chunk, (int)cookie.need_eof);
2108
2109 if (decoded == NULL)
2110 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002111 if (PyUnicode_READY(decoded) == -1) {
2112 Py_DECREF(decoded);
2113 goto fail;
2114 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002116 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117
2118 /* Skip chars_to_skip of the decoded characters. */
2119 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2120 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2121 goto fail;
2122 }
2123 self->decoded_chars_used = cookie.chars_to_skip;
2124 }
2125 else {
2126 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2127 if (self->snapshot == NULL)
2128 goto fail;
2129 }
2130
Antoine Pitroue4501852009-05-14 18:55:55 +00002131 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2132 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002133 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002134 goto fail;
2135 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 return cookieObj;
2137 fail:
2138 Py_XDECREF(cookieObj);
2139 return NULL;
2140
2141}
2142
2143static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002144textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145{
2146 PyObject *res;
2147 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002148 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002149 PyObject *next_input;
2150 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002151 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152 PyObject *saved_state = NULL;
2153 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002154 char *dec_buffer;
2155 Py_ssize_t dec_buffer_len;
2156 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157
2158 CHECK_INITIALIZED(self);
2159 CHECK_CLOSED(self);
2160
2161 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002162 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163 goto fail;
2164 }
2165 if (!self->telling) {
2166 PyErr_SetString(PyExc_IOError,
2167 "telling position disabled by next() call");
2168 goto fail;
2169 }
2170
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002171 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002172 return NULL;
2173 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2174 if (res == NULL)
2175 goto fail;
2176 Py_DECREF(res);
2177
2178 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2179 if (posobj == NULL)
2180 goto fail;
2181
2182 if (self->decoder == NULL || self->snapshot == NULL) {
2183 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2184 return posobj;
2185 }
2186
2187#if defined(HAVE_LARGEFILE_SUPPORT)
2188 cookie.start_pos = PyLong_AsLongLong(posobj);
2189#else
2190 cookie.start_pos = PyLong_AsLong(posobj);
2191#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002192 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 if (PyErr_Occurred())
2194 goto fail;
2195
2196 /* Skip backward to the snapshot point (see _read_chunk). */
2197 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2198 goto fail;
2199
2200 assert (PyBytes_Check(next_input));
2201
2202 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2203
2204 /* How many decoded characters have been used up since the snapshot? */
2205 if (self->decoded_chars_used == 0) {
2206 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002207 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002208 }
2209
2210 chars_to_skip = self->decoded_chars_used;
2211
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002212 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2214 _PyIO_str_getstate, NULL);
2215 if (saved_state == NULL)
2216 goto fail;
2217
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002218#define DECODER_GETSTATE() do { \
2219 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2220 _PyIO_str_getstate, NULL); \
2221 if (_state == NULL) \
2222 goto fail; \
2223 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2224 Py_DECREF(_state); \
2225 goto fail; \
2226 } \
2227 Py_DECREF(_state); \
2228 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002230 /* TODO: replace assert with exception */
2231#define DECODER_DECODE(start, len, res) do { \
2232 PyObject *_decoded = PyObject_CallMethod( \
2233 self->decoder, "decode", "y#", start, len); \
2234 if (_decoded == NULL) \
2235 goto fail; \
2236 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002237 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002238 Py_DECREF(_decoded); \
2239 } while (0)
2240
2241 /* Fast search for an acceptable start point, close to our
2242 current pos */
2243 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2244 skip_back = 1;
2245 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2246 input = PyBytes_AS_STRING(next_input);
2247 while (skip_bytes > 0) {
2248 /* Decode up to temptative start point */
2249 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2250 goto fail;
2251 DECODER_DECODE(input, skip_bytes, chars_decoded);
2252 if (chars_decoded <= chars_to_skip) {
2253 DECODER_GETSTATE();
2254 if (dec_buffer_len == 0) {
2255 /* Before pos and no bytes buffered in decoder => OK */
2256 cookie.dec_flags = dec_flags;
2257 chars_to_skip -= chars_decoded;
2258 break;
2259 }
2260 /* Skip back by buffered amount and reset heuristic */
2261 skip_bytes -= dec_buffer_len;
2262 skip_back = 1;
2263 }
2264 else {
2265 /* We're too far ahead, skip back a bit */
2266 skip_bytes -= skip_back;
2267 skip_back *= 2;
2268 }
2269 }
2270 if (skip_bytes <= 0) {
2271 skip_bytes = 0;
2272 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2273 goto fail;
2274 }
2275
2276 /* Note our initial start point. */
2277 cookie.start_pos += skip_bytes;
2278 cookie.chars_to_skip = chars_to_skip;
2279 if (chars_to_skip == 0)
2280 goto finally;
2281
2282 /* We should be close to the desired position. Now feed the decoder one
2283 * byte at a time until we reach the `chars_to_skip` target.
2284 * As we go, note the nearest "safe start point" before the current
2285 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286 * can safely start from there and advance to this location).
2287 */
2288 chars_decoded = 0;
2289 input = PyBytes_AS_STRING(next_input);
2290 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002291 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002293 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002294
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002295 DECODER_DECODE(input, 1, n);
2296 /* We got n chars for 1 byte */
2297 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002298 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002299 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002300
2301 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2302 /* Decoder buffer is empty, so this is a safe start point. */
2303 cookie.start_pos += cookie.bytes_to_feed;
2304 chars_to_skip -= chars_decoded;
2305 cookie.dec_flags = dec_flags;
2306 cookie.bytes_to_feed = 0;
2307 chars_decoded = 0;
2308 }
2309 if (chars_decoded >= chars_to_skip)
2310 break;
2311 input++;
2312 }
2313 if (input == input_end) {
2314 /* We didn't get enough decoded data; signal EOF to get more. */
2315 PyObject *decoded = PyObject_CallMethod(
2316 self->decoder, "decode", "yi", "", /* final = */ 1);
2317 if (decoded == NULL)
2318 goto fail;
2319 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002320 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002321 Py_DECREF(decoded);
2322 cookie.need_eof = 1;
2323
2324 if (chars_decoded < chars_to_skip) {
2325 PyErr_SetString(PyExc_IOError,
2326 "can't reconstruct logical file position");
2327 goto fail;
2328 }
2329 }
2330
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002331finally:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002332 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2333 Py_DECREF(saved_state);
2334 if (res == NULL)
2335 return NULL;
2336 Py_DECREF(res);
2337
2338 /* The returned cookie corresponds to the last safe start point. */
2339 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002340 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002342fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343 if (saved_state) {
2344 PyObject *type, *value, *traceback;
2345 PyErr_Fetch(&type, &value, &traceback);
2346
2347 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2348 Py_DECREF(saved_state);
2349 if (res == NULL)
2350 return NULL;
2351 Py_DECREF(res);
2352
2353 PyErr_Restore(type, value, traceback);
2354 }
2355 return NULL;
2356}
2357
2358static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002359textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360{
2361 PyObject *pos = Py_None;
2362 PyObject *res;
2363
2364 CHECK_INITIALIZED(self)
2365 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2366 return NULL;
2367 }
2368
2369 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2370 if (res == NULL)
2371 return NULL;
2372 Py_DECREF(res);
2373
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002374 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375}
2376
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002377static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002378textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002379{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002380 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002381
2382 CHECK_INITIALIZED(self);
2383
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002384 res = PyUnicode_FromString("<_io.TextIOWrapper");
2385 if (res == NULL)
2386 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002387 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2388 if (nameobj == NULL) {
2389 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2390 PyErr_Clear();
2391 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002392 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002393 }
2394 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002395 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002396 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002397 if (s == NULL)
2398 goto error;
2399 PyUnicode_AppendAndDel(&res, s);
2400 if (res == NULL)
2401 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002402 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002403 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2404 if (modeobj == NULL) {
2405 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2406 PyErr_Clear();
2407 else
2408 goto error;
2409 }
2410 else {
2411 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2412 Py_DECREF(modeobj);
2413 if (s == NULL)
2414 goto error;
2415 PyUnicode_AppendAndDel(&res, s);
2416 if (res == NULL)
2417 return NULL;
2418 }
2419 s = PyUnicode_FromFormat("%U encoding=%R>",
2420 res, self->encoding);
2421 Py_DECREF(res);
2422 return s;
2423error:
2424 Py_XDECREF(res);
2425 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002426}
2427
2428
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002429/* Inquiries */
2430
2431static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002432textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002433{
2434 CHECK_INITIALIZED(self);
2435 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2436}
2437
2438static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002439textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002440{
2441 CHECK_INITIALIZED(self);
2442 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2443}
2444
2445static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002446textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002447{
2448 CHECK_INITIALIZED(self);
2449 return PyObject_CallMethod(self->buffer, "readable", NULL);
2450}
2451
2452static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002453textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454{
2455 CHECK_INITIALIZED(self);
2456 return PyObject_CallMethod(self->buffer, "writable", NULL);
2457}
2458
2459static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002460textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461{
2462 CHECK_INITIALIZED(self);
2463 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2464}
2465
2466static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002467textiowrapper_getstate(textio *self, PyObject *args)
2468{
2469 PyErr_Format(PyExc_TypeError,
2470 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2471 return NULL;
2472}
2473
2474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002475textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476{
2477 CHECK_INITIALIZED(self);
2478 CHECK_CLOSED(self);
2479 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002480 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481 return NULL;
2482 return PyObject_CallMethod(self->buffer, "flush", NULL);
2483}
2484
2485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002486textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487{
2488 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002489 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491
Antoine Pitrou6be88762010-05-03 16:48:20 +00002492 res = textiowrapper_closed_get(self, NULL);
2493 if (res == NULL)
2494 return NULL;
2495 r = PyObject_IsTrue(res);
2496 Py_DECREF(res);
2497 if (r < 0)
2498 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002499
Antoine Pitrou6be88762010-05-03 16:48:20 +00002500 if (r > 0) {
2501 Py_RETURN_NONE; /* stream already closed */
2502 }
2503 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002504 if (self->deallocating) {
2505 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2506 if (res)
2507 Py_DECREF(res);
2508 else
2509 PyErr_Clear();
2510 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002511 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2512 if (res == NULL) {
2513 return NULL;
2514 }
2515 else
2516 Py_DECREF(res);
2517
2518 return PyObject_CallMethod(self->buffer, "close", NULL);
2519 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520}
2521
2522static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002523textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524{
2525 PyObject *line;
2526
2527 CHECK_INITIALIZED(self);
2528
2529 self->telling = 0;
2530 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2531 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002532 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 }
2534 else {
2535 line = PyObject_CallMethodObjArgs((PyObject *)self,
2536 _PyIO_str_readline, NULL);
2537 if (line && !PyUnicode_Check(line)) {
2538 PyErr_Format(PyExc_IOError,
2539 "readline() should have returned an str object, "
2540 "not '%.200s'", Py_TYPE(line)->tp_name);
2541 Py_DECREF(line);
2542 return NULL;
2543 }
2544 }
2545
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002546 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547 return NULL;
2548
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002549 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550 /* Reached EOF or would have blocked */
2551 Py_DECREF(line);
2552 Py_CLEAR(self->snapshot);
2553 self->telling = self->seekable;
2554 return NULL;
2555 }
2556
2557 return line;
2558}
2559
2560static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002561textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562{
2563 CHECK_INITIALIZED(self);
2564 return PyObject_GetAttrString(self->buffer, "name");
2565}
2566
2567static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002568textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569{
2570 CHECK_INITIALIZED(self);
2571 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2572}
2573
2574static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002575textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576{
2577 PyObject *res;
2578 CHECK_INITIALIZED(self);
2579 if (self->decoder == NULL)
2580 Py_RETURN_NONE;
2581 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2582 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002583 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2584 PyErr_Clear();
2585 Py_RETURN_NONE;
2586 }
2587 else {
2588 return NULL;
2589 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 }
2591 return res;
2592}
2593
2594static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002595textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002596{
2597 CHECK_INITIALIZED(self);
2598 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2599}
2600
2601static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002602textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603{
2604 CHECK_INITIALIZED(self);
2605 return PyLong_FromSsize_t(self->chunk_size);
2606}
2607
2608static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002609textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610{
2611 Py_ssize_t n;
2612 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002613 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 if (n == -1 && PyErr_Occurred())
2615 return -1;
2616 if (n <= 0) {
2617 PyErr_SetString(PyExc_ValueError,
2618 "a strictly positive integer is required");
2619 return -1;
2620 }
2621 self->chunk_size = n;
2622 return 0;
2623}
2624
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002625static PyMethodDef textiowrapper_methods[] = {
2626 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2627 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2628 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2629 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2630 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2631 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002633 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2634 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2635 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2636 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2637 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002638 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002640 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2641 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2642 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 {NULL, NULL}
2644};
2645
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002646static PyMemberDef textiowrapper_members[] = {
2647 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2648 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2649 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002650 {NULL}
2651};
2652
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002653static PyGetSetDef textiowrapper_getset[] = {
2654 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2655 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2657*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002658 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2659 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2660 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2661 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002662 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663};
2664
2665PyTypeObject PyTextIOWrapper_Type = {
2666 PyVarObject_HEAD_INIT(NULL, 0)
2667 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002668 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002670 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002671 0, /*tp_print*/
2672 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002673 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676 0, /*tp_as_number*/
2677 0, /*tp_as_sequence*/
2678 0, /*tp_as_mapping*/
2679 0, /*tp_hash */
2680 0, /*tp_call*/
2681 0, /*tp_str*/
2682 0, /*tp_getattro*/
2683 0, /*tp_setattro*/
2684 0, /*tp_as_buffer*/
2685 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2686 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002687 textiowrapper_doc, /* tp_doc */
2688 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2689 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002691 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002692 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002693 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2694 textiowrapper_methods, /* tp_methods */
2695 textiowrapper_members, /* tp_members */
2696 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697 0, /* tp_base */
2698 0, /* tp_dict */
2699 0, /* tp_descr_get */
2700 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002701 offsetof(textio, dict), /*tp_dictoffset*/
2702 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703 0, /* tp_alloc */
2704 PyType_GenericNew, /* tp_new */
2705};