blob: 880a5f0df558fcdf1c28ffcfa6dfc87b44c44247 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200277 if (PyUnicode_READY(output) == -1)
278 goto error;
279
280 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000281 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200282 /* Prefix output with CR */
283 int kind;
284 PyObject *modified;
285 char *out;
286
287 modified = PyUnicode_New(output_len + 1,
288 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000289 if (modified == NULL)
290 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200291 kind = PyUnicode_KIND(modified);
292 out = PyUnicode_DATA(modified);
293 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
294 memcpy(out + PyUnicode_KIND_SIZE(kind, 1),
295 PyUnicode_DATA(output),
296 PyUnicode_KIND_SIZE(kind, output_len));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 self->pendingcr = 0;
300 output_len++;
301 }
302
303 /* retain last \r even when not translating data:
304 * then readline() is sure to get \r\n in one pass
305 */
306 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000307 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200308 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
309 {
310 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
311 if (modified == NULL)
312 goto error;
313 Py_DECREF(output);
314 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000315 self->pendingcr = 1;
316 }
317 }
318
319 /* Record which newlines are read and do newline translation if desired,
320 all in one pass. */
321 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 Py_ssize_t len;
324 int seennl = self->seennl;
325 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200326 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000327
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 in_str = PyUnicode_DATA(output);
329 len = PyUnicode_GET_LENGTH(output);
330 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331
332 if (len == 0)
333 return output;
334
335 /* If, up to now, newlines are consistently \n, do a quick check
336 for the \r *byte* with the libc's optimized memchr.
337 */
338 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200339 only_lf = (memchr(in_str, '\r', PyUnicode_KIND_SIZE(kind, len)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 }
341
Antoine Pitrou66913e22009-03-06 23:40:56 +0000342 if (only_lf) {
343 /* If not already seen, quick scan for a possible "\n" character.
344 (there's nothing else to be done, even when in translation mode)
345 */
346 if (seennl == 0 &&
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 memchr(in_str, '\n', PyUnicode_KIND_SIZE(kind, len)) != NULL) {
348 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000349 for (;;) {
Victor Stinnerf7b8cb62011-09-29 03:28:17 +0200350 Py_UCS4 c;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000351 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200352 while (PyUnicode_READ(kind, in_str, i) > '\n')
353 i++;
354 c = PyUnicode_READ(kind, in_str, i++);
Antoine Pitrou66913e22009-03-06 23:40:56 +0000355 if (c == '\n') {
356 seennl |= SEEN_LF;
357 break;
358 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200359 if (i >= len)
Antoine Pitrou66913e22009-03-06 23:40:56 +0000360 break;
361 }
362 }
363 /* Finished: we have scanned for newlines, and none of them
364 need translating */
365 }
366 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200367 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000368 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 if (seennl == SEEN_ALL)
370 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000373 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200374 while (PyUnicode_READ(kind, in_str, i) > '\r')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200380 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000381 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200382 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 }
384 else
385 seennl |= SEEN_CR;
386 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 void *translated;
397 int kind = PyUnicode_KIND(output);
398 void *in_str = PyUnicode_DATA(output);
399 Py_ssize_t in, out;
400 /* XXX: Previous in-place translation here is disabled as
401 resizing is not possible anymore */
402 /* We could try to optimize this so that we only do a copy
403 when there is something to translate. On the other hand,
404 we already know there is a \r byte, so chances are high
405 that something needs to be done. */
406 translated = PyMem_Malloc(PyUnicode_KIND_SIZE(kind, len));
407 if (translated == NULL) {
408 PyErr_NoMemory();
409 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
416 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000419 seennl |= SEEN_LF;
420 continue;
421 }
422 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200423 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000424 in++;
425 seennl |= SEEN_CRLF;
426 }
427 else
428 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 continue;
431 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200432 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_DECREF(output);
437 output = PyUnicode_FromKindAndData(kind, translated, out);
438 if (!output)
439 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 }
441 self->seennl |= seennl;
442 }
443
444 return output;
445
446 error:
447 Py_DECREF(output);
448 return NULL;
449}
450
451static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000452incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 PyObject *args, PyObject *kwds)
454{
455 char *kwlist[] = {"input", "final", NULL};
456 PyObject *input;
457 int final = 0;
458
459 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
460 kwlist, &input, &final))
461 return NULL;
462 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
463}
464
465static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000466incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000467{
468 PyObject *buffer;
469 unsigned PY_LONG_LONG flag;
470
471 if (self->decoder != Py_None) {
472 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
473 _PyIO_str_getstate, NULL);
474 if (state == NULL)
475 return NULL;
476 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
477 Py_DECREF(state);
478 return NULL;
479 }
480 Py_INCREF(buffer);
481 Py_DECREF(state);
482 }
483 else {
484 buffer = PyBytes_FromString("");
485 flag = 0;
486 }
487 flag <<= 1;
488 if (self->pendingcr)
489 flag |= 1;
490 return Py_BuildValue("NK", buffer, flag);
491}
492
493static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000494incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000495{
496 PyObject *buffer;
497 unsigned PY_LONG_LONG flag;
498
499 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
500 return NULL;
501
502 self->pendingcr = (int) flag & 1;
503 flag >>= 1;
504
505 if (self->decoder != Py_None)
506 return PyObject_CallMethod(self->decoder,
507 "setstate", "((OK))", buffer, flag);
508 else
509 Py_RETURN_NONE;
510}
511
512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000513incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000514{
515 self->seennl = 0;
516 self->pendingcr = 0;
517 if (self->decoder != Py_None)
518 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 switch (self->seennl) {
527 case SEEN_CR:
528 return PyUnicode_FromString("\r");
529 case SEEN_LF:
530 return PyUnicode_FromString("\n");
531 case SEEN_CRLF:
532 return PyUnicode_FromString("\r\n");
533 case SEEN_CR | SEEN_LF:
534 return Py_BuildValue("ss", "\r", "\n");
535 case SEEN_CR | SEEN_CRLF:
536 return Py_BuildValue("ss", "\r", "\r\n");
537 case SEEN_LF | SEEN_CRLF:
538 return Py_BuildValue("ss", "\n", "\r\n");
539 case SEEN_CR | SEEN_LF | SEEN_CRLF:
540 return Py_BuildValue("sss", "\r", "\n", "\r\n");
541 default:
542 Py_RETURN_NONE;
543 }
544
545}
546
547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548static PyMethodDef incrementalnewlinedecoder_methods[] = {
549 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
550 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
551 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
552 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000553 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000554};
555
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000556static PyGetSetDef incrementalnewlinedecoder_getset[] = {
557 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000558 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559};
560
561PyTypeObject PyIncrementalNewlineDecoder_Type = {
562 PyVarObject_HEAD_INIT(NULL, 0)
563 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000564 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000566 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000567 0, /*tp_print*/
568 0, /*tp_getattr*/
569 0, /*tp_setattr*/
570 0, /*tp_compare */
571 0, /*tp_repr*/
572 0, /*tp_as_number*/
573 0, /*tp_as_sequence*/
574 0, /*tp_as_mapping*/
575 0, /*tp_hash */
576 0, /*tp_call*/
577 0, /*tp_str*/
578 0, /*tp_getattro*/
579 0, /*tp_setattro*/
580 0, /*tp_as_buffer*/
581 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 0, /* tp_traverse */
584 0, /* tp_clear */
585 0, /* tp_richcompare */
586 0, /*tp_weaklistoffset*/
587 0, /* tp_iter */
588 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000589 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000591 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 0, /* tp_base */
593 0, /* tp_dict */
594 0, /* tp_descr_get */
595 0, /* tp_descr_set */
596 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000597 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000598 0, /* tp_alloc */
599 PyType_GenericNew, /* tp_new */
600};
601
602
603/* TextIOWrapper */
604
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000605PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 "Character and line based layer over a BufferedIOBase object, buffer.\n"
607 "\n"
608 "encoding gives the name of the encoding that the stream will be\n"
609 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
610 "\n"
611 "errors determines the strictness of encoding and decoding (see the\n"
612 "codecs.register) and defaults to \"strict\".\n"
613 "\n"
614 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
615 "handling of line endings. If it is None, universal newlines is\n"
616 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
617 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
618 "caller. Conversely, on output, '\\n' is translated to the system\n"
619 "default line seperator, os.linesep. If newline is any other of its\n"
620 "legal values, that newline becomes the newline when the file is read\n"
621 "and it is returned untranslated. On output, '\\n' is converted to the\n"
622 "newline.\n"
623 "\n"
624 "If line_buffering is True, a call to flush is implied when a call to\n"
625 "write contains a newline character."
626 );
627
628typedef PyObject *
629 (*encodefunc_t)(PyObject *, PyObject *);
630
631typedef struct
632{
633 PyObject_HEAD
634 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000635 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000636 Py_ssize_t chunk_size;
637 PyObject *buffer;
638 PyObject *encoding;
639 PyObject *encoder;
640 PyObject *decoder;
641 PyObject *readnl;
642 PyObject *errors;
643 const char *writenl; /* utf-8 encoded, NULL stands for \n */
644 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200645 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000646 char readuniversal;
647 char readtranslate;
648 char writetranslate;
649 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200650 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000652 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000653 /* Specialized encoding func (see below) */
654 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000655 /* Whether or not it's the start of the stream */
656 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657
658 /* Reads and writes are internally buffered in order to speed things up.
659 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000660
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 Please also note that text to be written is first encoded before being
662 buffered. This is necessary so that encoding errors are immediately
663 reported to the caller, but it unfortunately means that the
664 IncrementalEncoder (whose encode() method is always written in Python)
665 becomes a bottleneck for small writes.
666 */
667 PyObject *decoded_chars; /* buffer for text returned from decoder */
668 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
669 PyObject *pending_bytes; /* list of bytes objects waiting to be
670 written, or NULL */
671 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000672
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673 /* snapshot is either None, or a tuple (dec_flags, next_input) where
674 * dec_flags is the second (integer) item of the decoder state and
675 * next_input is the chunk of input bytes that comes next after the
676 * snapshot point. We use this to reconstruct decoder states in tell().
677 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000678 PyObject *snapshot;
679 /* Bytes-to-characters ratio for the current chunk. Serves as input for
680 the heuristic in tell(). */
681 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 /* Cache raw object if it's a FileIO object */
684 PyObject *raw;
685
686 PyObject *weakreflist;
687 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000688} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689
690
691/* A couple of specialized cases in order to bypass the slow incremental
692 encoding methods for the most popular encodings. */
693
694static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000695ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000696{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200697 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698}
699
700static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000701utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702{
703 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
704 PyUnicode_GET_SIZE(text),
705 PyBytes_AS_STRING(self->errors), 1);
706}
707
708static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000709utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710{
711 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
712 PyUnicode_GET_SIZE(text),
713 PyBytes_AS_STRING(self->errors), -1);
714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
Antoine Pitroue4501852009-05-14 18:55:55 +0000719 if (!self->encoding_start_of_stream) {
720 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000722 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000724 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 }
727 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
728 PyUnicode_GET_SIZE(text),
729 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730}
731
Antoine Pitroue4501852009-05-14 18:55:55 +0000732static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000733utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000734{
735 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
736 PyUnicode_GET_SIZE(text),
737 PyBytes_AS_STRING(self->errors), 1);
738}
739
740static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000741utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000742{
743 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
744 PyUnicode_GET_SIZE(text),
745 PyBytes_AS_STRING(self->errors), -1);
746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
751 if (!self->encoding_start_of_stream) {
752 /* Skip the BOM and use native byte ordering */
753#if defined(WORDS_BIGENDIAN)
754 return utf32be_encode(self, text);
755#else
756 return utf32le_encode(self, text);
757#endif
758 }
759 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
760 PyUnicode_GET_SIZE(text),
761 PyBytes_AS_STRING(self->errors), 0);
762}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000763
764static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000765utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000766{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200767 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000768}
769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200773 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774}
775
776/* Map normalized encoding names onto the specialized encoding funcs */
777
778typedef struct {
779 const char *name;
780 encodefunc_t encodefunc;
781} encodefuncentry;
782
Antoine Pitrou24f36292009-03-28 22:16:42 +0000783static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 {"ascii", (encodefunc_t) ascii_encode},
785 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000786 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787 {"utf-16-be", (encodefunc_t) utf16be_encode},
788 {"utf-16-le", (encodefunc_t) utf16le_encode},
789 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000790 {"utf-32-be", (encodefunc_t) utf32be_encode},
791 {"utf-32-le", (encodefunc_t) utf32le_encode},
792 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793 {NULL, NULL}
794};
795
796
797static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000798textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799{
800 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200801 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 NULL};
803 PyObject *buffer, *raw;
804 char *encoding = NULL;
805 char *errors = NULL;
806 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200807 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 _PyIO_State *state = IO_STATE;
809
810 PyObject *res;
811 int r;
812
813 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000814 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200815 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000816 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200817 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 return -1;
819
820 if (newline && newline[0] != '\0'
821 && !(newline[0] == '\n' && newline[1] == '\0')
822 && !(newline[0] == '\r' && newline[1] == '\0')
823 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
824 PyErr_Format(PyExc_ValueError,
825 "illegal newline value: %s", newline);
826 return -1;
827 }
828
829 Py_CLEAR(self->buffer);
830 Py_CLEAR(self->encoding);
831 Py_CLEAR(self->encoder);
832 Py_CLEAR(self->decoder);
833 Py_CLEAR(self->readnl);
834 Py_CLEAR(self->decoded_chars);
835 Py_CLEAR(self->pending_bytes);
836 Py_CLEAR(self->snapshot);
837 Py_CLEAR(self->errors);
838 Py_CLEAR(self->raw);
839 self->decoded_chars_used = 0;
840 self->pending_bytes_count = 0;
841 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000842 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843
844 if (encoding == NULL) {
845 /* Try os.device_encoding(fileno) */
846 PyObject *fileno;
847 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
848 /* Ignore only AttributeError and UnsupportedOperation */
849 if (fileno == NULL) {
850 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
851 PyErr_ExceptionMatches(state->unsupported_operation)) {
852 PyErr_Clear();
853 }
854 else {
855 goto error;
856 }
857 }
858 else {
859 self->encoding = PyObject_CallMethod(state->os_module,
860 "device_encoding",
861 "N", fileno);
862 if (self->encoding == NULL)
863 goto error;
864 else if (!PyUnicode_Check(self->encoding))
865 Py_CLEAR(self->encoding);
866 }
867 }
868 if (encoding == NULL && self->encoding == NULL) {
869 if (state->locale_module == NULL) {
870 state->locale_module = PyImport_ImportModule("locale");
871 if (state->locale_module == NULL)
872 goto catch_ImportError;
873 else
874 goto use_locale;
875 }
876 else {
877 use_locale:
878 self->encoding = PyObject_CallMethod(
879 state->locale_module, "getpreferredencoding", NULL);
880 if (self->encoding == NULL) {
881 catch_ImportError:
882 /*
883 Importing locale can raise a ImportError because of
884 _functools, and locale.getpreferredencoding can raise a
885 ImportError if _locale is not available. These will happen
886 during module building.
887 */
888 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
889 PyErr_Clear();
890 self->encoding = PyUnicode_FromString("ascii");
891 }
892 else
893 goto error;
894 }
895 else if (!PyUnicode_Check(self->encoding))
896 Py_CLEAR(self->encoding);
897 }
898 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000899 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000901 if (encoding == NULL)
902 goto error;
903 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000904 else if (encoding != NULL) {
905 self->encoding = PyUnicode_FromString(encoding);
906 if (self->encoding == NULL)
907 goto error;
908 }
909 else {
910 PyErr_SetString(PyExc_IOError,
911 "could not determine default encoding");
912 }
913
914 if (errors == NULL)
915 errors = "strict";
916 self->errors = PyBytes_FromString(errors);
917 if (self->errors == NULL)
918 goto error;
919
920 self->chunk_size = 8192;
921 self->readuniversal = (newline == NULL || newline[0] == '\0');
922 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200923 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 self->readtranslate = (newline == NULL);
925 if (newline) {
926 self->readnl = PyUnicode_FromString(newline);
927 if (self->readnl == NULL)
928 return -1;
929 }
930 self->writetranslate = (newline == NULL || newline[0] != '\0');
931 if (!self->readuniversal && self->readnl) {
932 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000933 if (self->writenl == NULL)
934 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000935 if (!strcmp(self->writenl, "\n"))
936 self->writenl = NULL;
937 }
938#ifdef MS_WINDOWS
939 else
940 self->writenl = "\r\n";
941#endif
942
943 /* Build the decoder object */
944 res = PyObject_CallMethod(buffer, "readable", NULL);
945 if (res == NULL)
946 goto error;
947 r = PyObject_IsTrue(res);
948 Py_DECREF(res);
949 if (r == -1)
950 goto error;
951 if (r == 1) {
952 self->decoder = PyCodec_IncrementalDecoder(
953 encoding, errors);
954 if (self->decoder == NULL)
955 goto error;
956
957 if (self->readuniversal) {
958 PyObject *incrementalDecoder = PyObject_CallFunction(
959 (PyObject *)&PyIncrementalNewlineDecoder_Type,
960 "Oi", self->decoder, (int)self->readtranslate);
961 if (incrementalDecoder == NULL)
962 goto error;
963 Py_CLEAR(self->decoder);
964 self->decoder = incrementalDecoder;
965 }
966 }
967
968 /* Build the encoder object */
969 res = PyObject_CallMethod(buffer, "writable", NULL);
970 if (res == NULL)
971 goto error;
972 r = PyObject_IsTrue(res);
973 Py_DECREF(res);
974 if (r == -1)
975 goto error;
976 if (r == 1) {
977 PyObject *ci;
978 self->encoder = PyCodec_IncrementalEncoder(
979 encoding, errors);
980 if (self->encoder == NULL)
981 goto error;
982 /* Get the normalized named of the codec */
983 ci = _PyCodec_Lookup(encoding);
984 if (ci == NULL)
985 goto error;
986 res = PyObject_GetAttrString(ci, "name");
987 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000988 if (res == NULL) {
989 if (PyErr_ExceptionMatches(PyExc_AttributeError))
990 PyErr_Clear();
991 else
992 goto error;
993 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000994 else if (PyUnicode_Check(res)) {
995 encodefuncentry *e = encodefuncs;
996 while (e->name != NULL) {
997 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
998 self->encodefunc = e->encodefunc;
999 break;
1000 }
1001 e++;
1002 }
1003 }
1004 Py_XDECREF(res);
1005 }
1006
1007 self->buffer = buffer;
1008 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001009
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1011 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1012 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1013 raw = PyObject_GetAttrString(buffer, "raw");
1014 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001015 if (raw == NULL) {
1016 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1017 PyErr_Clear();
1018 else
1019 goto error;
1020 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001021 else if (Py_TYPE(raw) == &PyFileIO_Type)
1022 self->raw = raw;
1023 else
1024 Py_DECREF(raw);
1025 }
1026
1027 res = PyObject_CallMethod(buffer, "seekable", NULL);
1028 if (res == NULL)
1029 goto error;
1030 self->seekable = self->telling = PyObject_IsTrue(res);
1031 Py_DECREF(res);
1032
Antoine Pitroue96ec682011-07-23 21:46:35 +02001033 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1034
Antoine Pitroue4501852009-05-14 18:55:55 +00001035 self->encoding_start_of_stream = 0;
1036 if (self->seekable && self->encoder) {
1037 PyObject *cookieObj;
1038 int cmp;
1039
1040 self->encoding_start_of_stream = 1;
1041
1042 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1043 if (cookieObj == NULL)
1044 goto error;
1045
1046 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1047 Py_DECREF(cookieObj);
1048 if (cmp < 0) {
1049 goto error;
1050 }
1051
1052 if (cmp == 0) {
1053 self->encoding_start_of_stream = 0;
1054 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1055 _PyIO_zero, NULL);
1056 if (res == NULL)
1057 goto error;
1058 Py_DECREF(res);
1059 }
1060 }
1061
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001062 self->ok = 1;
1063 return 0;
1064
1065 error:
1066 return -1;
1067}
1068
1069static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001070_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071{
1072 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1073 return -1;
1074 self->ok = 0;
1075 Py_CLEAR(self->buffer);
1076 Py_CLEAR(self->encoding);
1077 Py_CLEAR(self->encoder);
1078 Py_CLEAR(self->decoder);
1079 Py_CLEAR(self->readnl);
1080 Py_CLEAR(self->decoded_chars);
1081 Py_CLEAR(self->pending_bytes);
1082 Py_CLEAR(self->snapshot);
1083 Py_CLEAR(self->errors);
1084 Py_CLEAR(self->raw);
1085 return 0;
1086}
1087
1088static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001089textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090{
Antoine Pitroue033e062010-10-29 10:38:18 +00001091 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001092 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 return;
1094 _PyObject_GC_UNTRACK(self);
1095 if (self->weakreflist != NULL)
1096 PyObject_ClearWeakRefs((PyObject *)self);
1097 Py_CLEAR(self->dict);
1098 Py_TYPE(self)->tp_free((PyObject *)self);
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 Py_VISIT(self->buffer);
1105 Py_VISIT(self->encoding);
1106 Py_VISIT(self->encoder);
1107 Py_VISIT(self->decoder);
1108 Py_VISIT(self->readnl);
1109 Py_VISIT(self->decoded_chars);
1110 Py_VISIT(self->pending_bytes);
1111 Py_VISIT(self->snapshot);
1112 Py_VISIT(self->errors);
1113 Py_VISIT(self->raw);
1114
1115 Py_VISIT(self->dict);
1116 return 0;
1117}
1118
1119static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001120textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001122 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123 return -1;
1124 Py_CLEAR(self->dict);
1125 return 0;
1126}
1127
1128static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001129textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130
1131/* This macro takes some shortcuts to make the common case faster. */
1132#define CHECK_CLOSED(self) \
1133 do { \
1134 int r; \
1135 PyObject *_res; \
1136 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1137 if (self->raw != NULL) \
1138 r = _PyFileIO_closed(self->raw); \
1139 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001140 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 if (_res == NULL) \
1142 return NULL; \
1143 r = PyObject_IsTrue(_res); \
1144 Py_DECREF(_res); \
1145 if (r < 0) \
1146 return NULL; \
1147 } \
1148 if (r > 0) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "I/O operation on closed file."); \
1151 return NULL; \
1152 } \
1153 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return NULL; \
1156 } while (0)
1157
1158#define CHECK_INITIALIZED(self) \
1159 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001160 if (self->detached) { \
1161 PyErr_SetString(PyExc_ValueError, \
1162 "underlying buffer has been detached"); \
1163 } else { \
1164 PyErr_SetString(PyExc_ValueError, \
1165 "I/O operation on uninitialized object"); \
1166 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001167 return NULL; \
1168 }
1169
1170#define CHECK_INITIALIZED_INT(self) \
1171 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001172 if (self->detached) { \
1173 PyErr_SetString(PyExc_ValueError, \
1174 "underlying buffer has been detached"); \
1175 } else { \
1176 PyErr_SetString(PyExc_ValueError, \
1177 "I/O operation on uninitialized object"); \
1178 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179 return -1; \
1180 }
1181
1182
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001183static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001185{
1186 PyObject *buffer, *res;
1187 CHECK_INITIALIZED(self);
1188 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1189 if (res == NULL)
1190 return NULL;
1191 Py_DECREF(res);
1192 buffer = self->buffer;
1193 self->buffer = NULL;
1194 self->detached = 1;
1195 self->ok = 0;
1196 return buffer;
1197}
1198
Antoine Pitrou24f36292009-03-28 22:16:42 +00001199/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 underlying buffered object, though. */
1201static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001202_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001203{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001204 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001205
1206 if (self->pending_bytes == NULL)
1207 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001208
1209 pending = self->pending_bytes;
1210 Py_INCREF(pending);
1211 self->pending_bytes_count = 0;
1212 Py_CLEAR(self->pending_bytes);
1213
1214 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1215 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 if (b == NULL)
1217 return -1;
1218 ret = PyObject_CallMethodObjArgs(self->buffer,
1219 _PyIO_str_write, b, NULL);
1220 Py_DECREF(b);
1221 if (ret == NULL)
1222 return -1;
1223 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224 return 0;
1225}
1226
1227static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001228textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229{
1230 PyObject *ret;
1231 PyObject *text; /* owned reference */
1232 PyObject *b;
1233 Py_ssize_t textlen;
1234 int haslf = 0;
1235 int needflush = 0;
1236
1237 CHECK_INITIALIZED(self);
1238
1239 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1240 return NULL;
1241 }
1242
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001243 if (PyUnicode_READY(text) == -1)
1244 return NULL;
1245
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 CHECK_CLOSED(self);
1247
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001248 if (self->encoder == NULL)
1249 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001250
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251 Py_INCREF(text);
1252
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001253 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254
1255 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001256 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 haslf = 1;
1258
1259 if (haslf && self->writetranslate && self->writenl != NULL) {
1260 PyObject *newtext = PyObject_CallMethod(
1261 text, "replace", "ss", "\n", self->writenl);
1262 Py_DECREF(text);
1263 if (newtext == NULL)
1264 return NULL;
1265 text = newtext;
1266 }
1267
Antoine Pitroue96ec682011-07-23 21:46:35 +02001268 if (self->write_through)
1269 needflush = 1;
1270 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273 needflush = 1;
1274
1275 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001276 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001278 self->encoding_start_of_stream = 0;
1279 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001280 else
1281 b = PyObject_CallMethodObjArgs(self->encoder,
1282 _PyIO_str_encode, text, NULL);
1283 Py_DECREF(text);
1284 if (b == NULL)
1285 return NULL;
1286
1287 if (self->pending_bytes == NULL) {
1288 self->pending_bytes = PyList_New(0);
1289 if (self->pending_bytes == NULL) {
1290 Py_DECREF(b);
1291 return NULL;
1292 }
1293 self->pending_bytes_count = 0;
1294 }
1295 if (PyList_Append(self->pending_bytes, b) < 0) {
1296 Py_DECREF(b);
1297 return NULL;
1298 }
1299 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1300 Py_DECREF(b);
1301 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001302 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 return NULL;
1304 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001305
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306 if (needflush) {
1307 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1308 if (ret == NULL)
1309 return NULL;
1310 Py_DECREF(ret);
1311 }
1312
1313 Py_CLEAR(self->snapshot);
1314
1315 if (self->decoder) {
1316 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1317 if (ret == NULL)
1318 return NULL;
1319 Py_DECREF(ret);
1320 }
1321
1322 return PyLong_FromSsize_t(textlen);
1323}
1324
1325/* Steal a reference to chars and store it in the decoded_char buffer;
1326 */
1327static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001328textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001329{
1330 Py_CLEAR(self->decoded_chars);
1331 self->decoded_chars = chars;
1332 self->decoded_chars_used = 0;
1333}
1334
1335static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001336textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001337{
1338 PyObject *chars;
1339 Py_ssize_t avail;
1340
1341 if (self->decoded_chars == NULL)
1342 return PyUnicode_FromStringAndSize(NULL, 0);
1343
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001344 /* decoded_chars is guaranteed to be "ready". */
1345 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 - self->decoded_chars_used);
1347
1348 assert(avail >= 0);
1349
1350 if (n < 0 || n > avail)
1351 n = avail;
1352
1353 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001354 chars = PyUnicode_Substring(self->decoded_chars,
1355 self->decoded_chars_used,
1356 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357 if (chars == NULL)
1358 return NULL;
1359 }
1360 else {
1361 chars = self->decoded_chars;
1362 Py_INCREF(chars);
1363 }
1364
1365 self->decoded_chars_used += n;
1366 return chars;
1367}
1368
1369/* Read and decode the next chunk of data from the BufferedReader.
1370 */
1371static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001372textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373{
1374 PyObject *dec_buffer = NULL;
1375 PyObject *dec_flags = NULL;
1376 PyObject *input_chunk = NULL;
1377 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001378 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 int eof;
1380
1381 /* The return value is True unless EOF was reached. The decoded string is
1382 * placed in self._decoded_chars (replacing its previous value). The
1383 * entire input chunk is sent to the decoder, though some of it may remain
1384 * buffered in the decoder, yet to be converted.
1385 */
1386
1387 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001388 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 return -1;
1390 }
1391
1392 if (self->telling) {
1393 /* To prepare for tell(), we need to snapshot a point in the file
1394 * where the decoder's input buffer is empty.
1395 */
1396
1397 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1398 _PyIO_str_getstate, NULL);
1399 if (state == NULL)
1400 return -1;
1401 /* Given this, we know there was a valid snapshot point
1402 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1403 */
1404 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1405 Py_DECREF(state);
1406 return -1;
1407 }
1408 Py_INCREF(dec_buffer);
1409 Py_INCREF(dec_flags);
1410 Py_DECREF(state);
1411 }
1412
1413 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1414 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1415 if (chunk_size == NULL)
1416 goto fail;
1417 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001418 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1419 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001420 Py_DECREF(chunk_size);
1421 if (input_chunk == NULL)
1422 goto fail;
1423 assert(PyBytes_Check(input_chunk));
1424
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001425 nbytes = PyBytes_Size(input_chunk);
1426 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001427
1428 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1429 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1430 self->decoder, input_chunk, eof);
1431 }
1432 else {
1433 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1434 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1435 }
1436
1437 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1438 if (decoded_chars == NULL)
1439 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001440 if (PyUnicode_READY(decoded_chars) == -1)
1441 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001442 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001443 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001444 if (nchars > 0)
1445 self->b2cratio = (double) nbytes / nchars;
1446 else
1447 self->b2cratio = 0.0;
1448 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001449 eof = 0;
1450
1451 if (self->telling) {
1452 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1453 * next input to be decoded is dec_buffer + input_chunk.
1454 */
1455 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1456 if (next_input == NULL)
1457 goto fail;
1458 assert (PyBytes_Check(next_input));
1459 Py_DECREF(dec_buffer);
1460 Py_CLEAR(self->snapshot);
1461 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1462 }
1463 Py_DECREF(input_chunk);
1464
1465 return (eof == 0);
1466
1467 fail:
1468 Py_XDECREF(dec_buffer);
1469 Py_XDECREF(dec_flags);
1470 Py_XDECREF(input_chunk);
1471 return -1;
1472}
1473
1474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001475textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001476{
1477 Py_ssize_t n = -1;
1478 PyObject *result = NULL, *chunks = NULL;
1479
1480 CHECK_INITIALIZED(self);
1481
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001482 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483 return NULL;
1484
1485 CHECK_CLOSED(self);
1486
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001487 if (self->decoder == NULL)
1488 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001489
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001490 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001491 return NULL;
1492
1493 if (n < 0) {
1494 /* Read everything */
1495 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1496 PyObject *decoded;
1497 if (bytes == NULL)
1498 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001499
1500 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1501 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1502 bytes, 1);
1503 else
1504 decoded = PyObject_CallMethodObjArgs(
1505 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001506 Py_DECREF(bytes);
1507 if (decoded == NULL)
1508 goto fail;
1509
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511
1512 if (result == NULL) {
1513 Py_DECREF(decoded);
1514 return NULL;
1515 }
1516
1517 PyUnicode_AppendAndDel(&result, decoded);
1518 if (result == NULL)
1519 goto fail;
1520
1521 Py_CLEAR(self->snapshot);
1522 return result;
1523 }
1524 else {
1525 int res = 1;
1526 Py_ssize_t remaining = n;
1527
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001528 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 if (result == NULL)
1530 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001531 if (PyUnicode_READY(result) == -1)
1532 goto fail;
1533 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534
1535 /* Keep reading chunks until we have n characters to return */
1536 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001537 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 if (res < 0)
1539 goto fail;
1540 if (res == 0) /* EOF */
1541 break;
1542 if (chunks == NULL) {
1543 chunks = PyList_New(0);
1544 if (chunks == NULL)
1545 goto fail;
1546 }
1547 if (PyList_Append(chunks, result) < 0)
1548 goto fail;
1549 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001550 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001551 if (result == NULL)
1552 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001553 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 }
1555 if (chunks != NULL) {
1556 if (result != NULL && PyList_Append(chunks, result) < 0)
1557 goto fail;
1558 Py_CLEAR(result);
1559 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1560 if (result == NULL)
1561 goto fail;
1562 Py_CLEAR(chunks);
1563 }
1564 return result;
1565 }
1566 fail:
1567 Py_XDECREF(result);
1568 Py_XDECREF(chunks);
1569 return NULL;
1570}
1571
1572
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001573/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 that is to the NUL character. Otherwise the function will produce
1575 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001576static char *
1577find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001578{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001579 int size = PyUnicode_KIND_SIZE(kind, 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001580 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001581 while (PyUnicode_READ(kind, s, 0) > ch)
1582 s += size;
1583 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001584 return s;
1585 if (s == end)
1586 return NULL;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001587 s += size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001588 }
1589}
1590
1591Py_ssize_t
1592_PyIO_find_line_ending(
1593 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001594 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001596 int size = PyUnicode_KIND_SIZE(kind, 1);
1597 Py_ssize_t len = ((char*)end - (char*)start)/size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598
1599 if (translated) {
1600 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001601 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 if (pos != NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001603 return (pos - start)/size + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 else {
1605 *consumed = len;
1606 return -1;
1607 }
1608 }
1609 else if (universal) {
1610 /* Universal newline search. Find any of \r, \r\n, \n
1611 * The decoder ensures that \r\n are not split in two pieces
1612 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001613 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001615 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001617 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618 while (PyUnicode_READ(kind, s, 0) > '\r')
1619 s += size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 if (s >= end) {
1621 *consumed = len;
1622 return -1;
1623 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001624 ch = PyUnicode_READ(kind, s, 0);
1625 s += size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 if (ch == '\n')
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001627 return (s - start)/size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001629 if (PyUnicode_READ(kind, s, 0) == '\n')
1630 return (s - start)/size + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001632 return (s - start)/size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001633 }
1634 }
1635 }
1636 else {
1637 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001638 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1639 char *nl = PyUnicode_DATA(readnl);
1640 /* Assume that readnl is an ASCII character. */
1641 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 if (pos != NULL)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 return (pos - start)/size + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 *consumed = len;
1647 return -1;
1648 }
1649 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001650 char *s = start;
1651 char *e = end - (readnl_len - 1)*size;
1652 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 if (e < s)
1654 e = s;
1655 while (s < e) {
1656 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 if (pos == NULL || pos >= e)
1659 break;
1660 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001661 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 break;
1663 }
1664 if (i == readnl_len)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001665 return (pos - start)/size + readnl_len;
1666 s = pos + size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (pos == NULL)
1670 *consumed = len;
1671 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001672 *consumed = (pos - start)/size;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673 return -1;
1674 }
1675 }
1676}
1677
1678static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001679_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680{
1681 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1682 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1683 int res;
1684
1685 CHECK_CLOSED(self);
1686
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001687 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 return NULL;
1689
1690 chunked = 0;
1691
1692 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001693 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 Py_ssize_t consumed = 0;
1697
1698 /* First, get some data if necessary */
1699 res = 1;
1700 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001702 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 if (res < 0)
1704 goto error;
1705 if (res == 0)
1706 break;
1707 }
1708 if (res == 0) {
1709 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001710 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 Py_CLEAR(self->snapshot);
1712 start = endpos = offset_to_buffer = 0;
1713 break;
1714 }
1715
1716 if (remaining == NULL) {
1717 line = self->decoded_chars;
1718 start = self->decoded_chars_used;
1719 offset_to_buffer = 0;
1720 Py_INCREF(line);
1721 }
1722 else {
1723 assert(self->decoded_chars_used == 0);
1724 line = PyUnicode_Concat(remaining, self->decoded_chars);
1725 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001726 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727 Py_CLEAR(remaining);
1728 if (line == NULL)
1729 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001730 if (PyUnicode_READY(line) == -1)
1731 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 }
1733
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 ptr = PyUnicode_DATA(line);
1735 line_len = PyUnicode_GET_LENGTH(line);
1736 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737
1738 endpos = _PyIO_find_line_ending(
1739 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001740 kind,
1741 ptr + PyUnicode_KIND_SIZE(kind, start),
1742 ptr + PyUnicode_KIND_SIZE(kind, line_len),
1743 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001744 if (endpos >= 0) {
1745 endpos += start;
1746 if (limit >= 0 && (endpos - start) + chunked >= limit)
1747 endpos = start + limit - chunked;
1748 break;
1749 }
1750
1751 /* We can put aside up to `endpos` */
1752 endpos = consumed + start;
1753 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1754 /* Didn't find line ending, but reached length limit */
1755 endpos = start + limit - chunked;
1756 break;
1757 }
1758
1759 if (endpos > start) {
1760 /* No line ending seen yet - put aside current data */
1761 PyObject *s;
1762 if (chunks == NULL) {
1763 chunks = PyList_New(0);
1764 if (chunks == NULL)
1765 goto error;
1766 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001767 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 if (s == NULL)
1769 goto error;
1770 if (PyList_Append(chunks, s) < 0) {
1771 Py_DECREF(s);
1772 goto error;
1773 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001774 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775 Py_DECREF(s);
1776 }
1777 /* There may be some remaining bytes we'll have to prepend to the
1778 next chunk of data */
1779 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001780 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781 if (remaining == NULL)
1782 goto error;
1783 }
1784 Py_CLEAR(line);
1785 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001786 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 }
1788
1789 if (line != NULL) {
1790 /* Our line ends in the current buffer */
1791 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001792 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1793 PyObject *s = PyUnicode_Substring(line, start, endpos);
1794 Py_CLEAR(line);
1795 if (s == NULL)
1796 goto error;
1797 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001798 }
1799 }
1800 if (remaining != NULL) {
1801 if (chunks == NULL) {
1802 chunks = PyList_New(0);
1803 if (chunks == NULL)
1804 goto error;
1805 }
1806 if (PyList_Append(chunks, remaining) < 0)
1807 goto error;
1808 Py_CLEAR(remaining);
1809 }
1810 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001811 if (line != NULL) {
1812 if (PyList_Append(chunks, line) < 0)
1813 goto error;
1814 Py_DECREF(line);
1815 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1817 if (line == NULL)
1818 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001819 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001820 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001821 if (line == NULL) {
1822 Py_INCREF(_PyIO_empty_str);
1823 line = _PyIO_empty_str;
1824 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825
1826 return line;
1827
1828 error:
1829 Py_XDECREF(chunks);
1830 Py_XDECREF(remaining);
1831 Py_XDECREF(line);
1832 return NULL;
1833}
1834
1835static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001836textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837{
1838 Py_ssize_t limit = -1;
1839
1840 CHECK_INITIALIZED(self);
1841 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1842 return NULL;
1843 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001844 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845}
1846
1847/* Seek and Tell */
1848
1849typedef struct {
1850 Py_off_t start_pos;
1851 int dec_flags;
1852 int bytes_to_feed;
1853 int chars_to_skip;
1854 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001855} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856
1857/*
1858 To speed up cookie packing/unpacking, we store the fields in a temporary
1859 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1860 The following macros define at which offsets in the intermediary byte
1861 string the various CookieStruct fields will be stored.
1862 */
1863
1864#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1865
1866#if defined(WORDS_BIGENDIAN)
1867
1868# define IS_LITTLE_ENDIAN 0
1869
1870/* We want the least significant byte of start_pos to also be the least
1871 significant byte of the cookie, which means that in big-endian mode we
1872 must copy the fields in reverse order. */
1873
1874# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1875# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1876# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1877# define OFF_CHARS_TO_SKIP (sizeof(char))
1878# define OFF_NEED_EOF 0
1879
1880#else
1881
1882# define IS_LITTLE_ENDIAN 1
1883
1884/* Little-endian mode: the least significant byte of start_pos will
1885 naturally end up the least significant byte of the cookie. */
1886
1887# define OFF_START_POS 0
1888# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1889# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1890# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1891# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1892
1893#endif
1894
1895static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001896textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001897{
1898 unsigned char buffer[COOKIE_BUF_LEN];
1899 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1900 if (cookieLong == NULL)
1901 return -1;
1902
1903 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1904 IS_LITTLE_ENDIAN, 0) < 0) {
1905 Py_DECREF(cookieLong);
1906 return -1;
1907 }
1908 Py_DECREF(cookieLong);
1909
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001910 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1911 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1912 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1913 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1914 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915
1916 return 0;
1917}
1918
1919static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001920textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001921{
1922 unsigned char buffer[COOKIE_BUF_LEN];
1923
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001924 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1925 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1926 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1927 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1928 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
1930 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1931}
1932#undef IS_LITTLE_ENDIAN
1933
1934static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001935_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936{
1937 PyObject *res;
1938 /* When seeking to the start of the stream, we call decoder.reset()
1939 rather than decoder.getstate().
1940 This is for a few decoders such as utf-16 for which the state value
1941 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1942 utf-16, that we are expecting a BOM).
1943 */
1944 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1945 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1946 else
1947 res = PyObject_CallMethod(self->decoder, "setstate",
1948 "((yi))", "", cookie->dec_flags);
1949 if (res == NULL)
1950 return -1;
1951 Py_DECREF(res);
1952 return 0;
1953}
1954
Antoine Pitroue4501852009-05-14 18:55:55 +00001955static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001956_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001957{
1958 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001959 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001960 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1961 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1962 self->encoding_start_of_stream = 1;
1963 }
1964 else {
1965 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1966 _PyIO_zero, NULL);
1967 self->encoding_start_of_stream = 0;
1968 }
1969 if (res == NULL)
1970 return -1;
1971 Py_DECREF(res);
1972 return 0;
1973}
1974
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001975static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001976textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977{
1978 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001979 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001981 PyObject *res;
1982 int cmp;
1983
1984 CHECK_INITIALIZED(self);
1985
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1987 return NULL;
1988 CHECK_CLOSED(self);
1989
1990 Py_INCREF(cookieObj);
1991
1992 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001993 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 goto fail;
1995 }
1996
1997 if (whence == 1) {
1998 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001999 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 if (cmp < 0)
2001 goto fail;
2002
2003 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002004 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 goto fail;
2006 }
2007
2008 /* Seeking to the current position should attempt to
2009 * sync the underlying buffer with the current position.
2010 */
2011 Py_DECREF(cookieObj);
2012 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2013 if (cookieObj == NULL)
2014 goto fail;
2015 }
2016 else if (whence == 2) {
2017 /* seek relative to end of file */
2018
Antoine Pitroue4501852009-05-14 18:55:55 +00002019 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020 if (cmp < 0)
2021 goto fail;
2022
2023 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002024 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 goto fail;
2026 }
2027
2028 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2029 if (res == NULL)
2030 goto fail;
2031 Py_DECREF(res);
2032
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002033 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 Py_CLEAR(self->snapshot);
2035 if (self->decoder) {
2036 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2037 if (res == NULL)
2038 goto fail;
2039 Py_DECREF(res);
2040 }
2041
2042 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2043 Py_XDECREF(cookieObj);
2044 return res;
2045 }
2046 else if (whence != 0) {
2047 PyErr_Format(PyExc_ValueError,
2048 "invalid whence (%d, should be 0, 1 or 2)", whence);
2049 goto fail;
2050 }
2051
Antoine Pitroue4501852009-05-14 18:55:55 +00002052 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (cmp < 0)
2054 goto fail;
2055
2056 if (cmp == 1) {
2057 PyErr_Format(PyExc_ValueError,
2058 "negative seek position %R", cookieObj);
2059 goto fail;
2060 }
2061
2062 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2063 if (res == NULL)
2064 goto fail;
2065 Py_DECREF(res);
2066
2067 /* The strategy of seek() is to go back to the safe start point
2068 * and replay the effect of read(chars_to_skip) from there.
2069 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002070 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 goto fail;
2072
2073 /* Seek back to the safe start point. */
2074 posobj = PyLong_FromOff_t(cookie.start_pos);
2075 if (posobj == NULL)
2076 goto fail;
2077 res = PyObject_CallMethodObjArgs(self->buffer,
2078 _PyIO_str_seek, posobj, NULL);
2079 Py_DECREF(posobj);
2080 if (res == NULL)
2081 goto fail;
2082 Py_DECREF(res);
2083
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002084 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085 Py_CLEAR(self->snapshot);
2086
2087 /* Restore the decoder to its state from the safe start point. */
2088 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002089 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002090 goto fail;
2091 }
2092
2093 if (cookie.chars_to_skip) {
2094 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2095 PyObject *input_chunk = PyObject_CallMethod(
2096 self->buffer, "read", "i", cookie.bytes_to_feed);
2097 PyObject *decoded;
2098
2099 if (input_chunk == NULL)
2100 goto fail;
2101
2102 assert (PyBytes_Check(input_chunk));
2103
2104 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2105 if (self->snapshot == NULL) {
2106 Py_DECREF(input_chunk);
2107 goto fail;
2108 }
2109
2110 decoded = PyObject_CallMethod(self->decoder, "decode",
2111 "Oi", input_chunk, (int)cookie.need_eof);
2112
2113 if (decoded == NULL)
2114 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002115 if (PyUnicode_READY(decoded) == -1) {
2116 Py_DECREF(decoded);
2117 goto fail;
2118 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002120 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121
2122 /* Skip chars_to_skip of the decoded characters. */
2123 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2124 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2125 goto fail;
2126 }
2127 self->decoded_chars_used = cookie.chars_to_skip;
2128 }
2129 else {
2130 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2131 if (self->snapshot == NULL)
2132 goto fail;
2133 }
2134
Antoine Pitroue4501852009-05-14 18:55:55 +00002135 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2136 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002137 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002138 goto fail;
2139 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 return cookieObj;
2141 fail:
2142 Py_XDECREF(cookieObj);
2143 return NULL;
2144
2145}
2146
2147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002148textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002149{
2150 PyObject *res;
2151 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002152 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002153 PyObject *next_input;
2154 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002155 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156 PyObject *saved_state = NULL;
2157 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002158 char *dec_buffer;
2159 Py_ssize_t dec_buffer_len;
2160 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161
2162 CHECK_INITIALIZED(self);
2163 CHECK_CLOSED(self);
2164
2165 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002166 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 goto fail;
2168 }
2169 if (!self->telling) {
2170 PyErr_SetString(PyExc_IOError,
2171 "telling position disabled by next() call");
2172 goto fail;
2173 }
2174
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002175 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176 return NULL;
2177 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2178 if (res == NULL)
2179 goto fail;
2180 Py_DECREF(res);
2181
2182 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2183 if (posobj == NULL)
2184 goto fail;
2185
2186 if (self->decoder == NULL || self->snapshot == NULL) {
2187 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2188 return posobj;
2189 }
2190
2191#if defined(HAVE_LARGEFILE_SUPPORT)
2192 cookie.start_pos = PyLong_AsLongLong(posobj);
2193#else
2194 cookie.start_pos = PyLong_AsLong(posobj);
2195#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002196 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197 if (PyErr_Occurred())
2198 goto fail;
2199
2200 /* Skip backward to the snapshot point (see _read_chunk). */
2201 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2202 goto fail;
2203
2204 assert (PyBytes_Check(next_input));
2205
2206 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2207
2208 /* How many decoded characters have been used up since the snapshot? */
2209 if (self->decoded_chars_used == 0) {
2210 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002211 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002212 }
2213
2214 chars_to_skip = self->decoded_chars_used;
2215
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002216 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002217 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2218 _PyIO_str_getstate, NULL);
2219 if (saved_state == NULL)
2220 goto fail;
2221
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002222#define DECODER_GETSTATE() do { \
2223 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2224 _PyIO_str_getstate, NULL); \
2225 if (_state == NULL) \
2226 goto fail; \
2227 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2228 Py_DECREF(_state); \
2229 goto fail; \
2230 } \
2231 Py_DECREF(_state); \
2232 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002233
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002234 /* TODO: replace assert with exception */
2235#define DECODER_DECODE(start, len, res) do { \
2236 PyObject *_decoded = PyObject_CallMethod( \
2237 self->decoder, "decode", "y#", start, len); \
2238 if (_decoded == NULL) \
2239 goto fail; \
2240 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002241 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002242 Py_DECREF(_decoded); \
2243 } while (0)
2244
2245 /* Fast search for an acceptable start point, close to our
2246 current pos */
2247 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2248 skip_back = 1;
2249 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2250 input = PyBytes_AS_STRING(next_input);
2251 while (skip_bytes > 0) {
2252 /* Decode up to temptative start point */
2253 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2254 goto fail;
2255 DECODER_DECODE(input, skip_bytes, chars_decoded);
2256 if (chars_decoded <= chars_to_skip) {
2257 DECODER_GETSTATE();
2258 if (dec_buffer_len == 0) {
2259 /* Before pos and no bytes buffered in decoder => OK */
2260 cookie.dec_flags = dec_flags;
2261 chars_to_skip -= chars_decoded;
2262 break;
2263 }
2264 /* Skip back by buffered amount and reset heuristic */
2265 skip_bytes -= dec_buffer_len;
2266 skip_back = 1;
2267 }
2268 else {
2269 /* We're too far ahead, skip back a bit */
2270 skip_bytes -= skip_back;
2271 skip_back *= 2;
2272 }
2273 }
2274 if (skip_bytes <= 0) {
2275 skip_bytes = 0;
2276 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2277 goto fail;
2278 }
2279
2280 /* Note our initial start point. */
2281 cookie.start_pos += skip_bytes;
2282 cookie.chars_to_skip = chars_to_skip;
2283 if (chars_to_skip == 0)
2284 goto finally;
2285
2286 /* We should be close to the desired position. Now feed the decoder one
2287 * byte at a time until we reach the `chars_to_skip` target.
2288 * As we go, note the nearest "safe start point" before the current
2289 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002290 * can safely start from there and advance to this location).
2291 */
2292 chars_decoded = 0;
2293 input = PyBytes_AS_STRING(next_input);
2294 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002295 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002296 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002297 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002298
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002299 DECODER_DECODE(input, 1, n);
2300 /* We got n chars for 1 byte */
2301 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002303 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002304
2305 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2306 /* Decoder buffer is empty, so this is a safe start point. */
2307 cookie.start_pos += cookie.bytes_to_feed;
2308 chars_to_skip -= chars_decoded;
2309 cookie.dec_flags = dec_flags;
2310 cookie.bytes_to_feed = 0;
2311 chars_decoded = 0;
2312 }
2313 if (chars_decoded >= chars_to_skip)
2314 break;
2315 input++;
2316 }
2317 if (input == input_end) {
2318 /* We didn't get enough decoded data; signal EOF to get more. */
2319 PyObject *decoded = PyObject_CallMethod(
2320 self->decoder, "decode", "yi", "", /* final = */ 1);
2321 if (decoded == NULL)
2322 goto fail;
2323 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002324 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325 Py_DECREF(decoded);
2326 cookie.need_eof = 1;
2327
2328 if (chars_decoded < chars_to_skip) {
2329 PyErr_SetString(PyExc_IOError,
2330 "can't reconstruct logical file position");
2331 goto fail;
2332 }
2333 }
2334
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002335finally:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2337 Py_DECREF(saved_state);
2338 if (res == NULL)
2339 return NULL;
2340 Py_DECREF(res);
2341
2342 /* The returned cookie corresponds to the last safe start point. */
2343 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002344 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002346fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347 if (saved_state) {
2348 PyObject *type, *value, *traceback;
2349 PyErr_Fetch(&type, &value, &traceback);
2350
2351 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2352 Py_DECREF(saved_state);
2353 if (res == NULL)
2354 return NULL;
2355 Py_DECREF(res);
2356
2357 PyErr_Restore(type, value, traceback);
2358 }
2359 return NULL;
2360}
2361
2362static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002363textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364{
2365 PyObject *pos = Py_None;
2366 PyObject *res;
2367
2368 CHECK_INITIALIZED(self)
2369 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2370 return NULL;
2371 }
2372
2373 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2374 if (res == NULL)
2375 return NULL;
2376 Py_DECREF(res);
2377
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002378 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379}
2380
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002381static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002382textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002383{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002384 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002385
2386 CHECK_INITIALIZED(self);
2387
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002388 res = PyUnicode_FromString("<_io.TextIOWrapper");
2389 if (res == NULL)
2390 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002391 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2392 if (nameobj == NULL) {
2393 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2394 PyErr_Clear();
2395 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002396 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002397 }
2398 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002399 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002400 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002401 if (s == NULL)
2402 goto error;
2403 PyUnicode_AppendAndDel(&res, s);
2404 if (res == NULL)
2405 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002406 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002407 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2408 if (modeobj == NULL) {
2409 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2410 PyErr_Clear();
2411 else
2412 goto error;
2413 }
2414 else {
2415 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2416 Py_DECREF(modeobj);
2417 if (s == NULL)
2418 goto error;
2419 PyUnicode_AppendAndDel(&res, s);
2420 if (res == NULL)
2421 return NULL;
2422 }
2423 s = PyUnicode_FromFormat("%U encoding=%R>",
2424 res, self->encoding);
2425 Py_DECREF(res);
2426 return s;
2427error:
2428 Py_XDECREF(res);
2429 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002430}
2431
2432
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002433/* Inquiries */
2434
2435static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002436textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437{
2438 CHECK_INITIALIZED(self);
2439 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2440}
2441
2442static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002443textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002444{
2445 CHECK_INITIALIZED(self);
2446 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2447}
2448
2449static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002450textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451{
2452 CHECK_INITIALIZED(self);
2453 return PyObject_CallMethod(self->buffer, "readable", NULL);
2454}
2455
2456static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002457textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458{
2459 CHECK_INITIALIZED(self);
2460 return PyObject_CallMethod(self->buffer, "writable", NULL);
2461}
2462
2463static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002464textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465{
2466 CHECK_INITIALIZED(self);
2467 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2468}
2469
2470static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002471textiowrapper_getstate(textio *self, PyObject *args)
2472{
2473 PyErr_Format(PyExc_TypeError,
2474 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2475 return NULL;
2476}
2477
2478static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002479textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480{
2481 CHECK_INITIALIZED(self);
2482 CHECK_CLOSED(self);
2483 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002484 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485 return NULL;
2486 return PyObject_CallMethod(self->buffer, "flush", NULL);
2487}
2488
2489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002490textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491{
2492 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002493 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495
Antoine Pitrou6be88762010-05-03 16:48:20 +00002496 res = textiowrapper_closed_get(self, NULL);
2497 if (res == NULL)
2498 return NULL;
2499 r = PyObject_IsTrue(res);
2500 Py_DECREF(res);
2501 if (r < 0)
2502 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002503
Antoine Pitrou6be88762010-05-03 16:48:20 +00002504 if (r > 0) {
2505 Py_RETURN_NONE; /* stream already closed */
2506 }
2507 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002508 if (self->deallocating) {
2509 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2510 if (res)
2511 Py_DECREF(res);
2512 else
2513 PyErr_Clear();
2514 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002515 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2516 if (res == NULL) {
2517 return NULL;
2518 }
2519 else
2520 Py_DECREF(res);
2521
2522 return PyObject_CallMethod(self->buffer, "close", NULL);
2523 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524}
2525
2526static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002527textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528{
2529 PyObject *line;
2530
2531 CHECK_INITIALIZED(self);
2532
2533 self->telling = 0;
2534 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2535 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002536 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537 }
2538 else {
2539 line = PyObject_CallMethodObjArgs((PyObject *)self,
2540 _PyIO_str_readline, NULL);
2541 if (line && !PyUnicode_Check(line)) {
2542 PyErr_Format(PyExc_IOError,
2543 "readline() should have returned an str object, "
2544 "not '%.200s'", Py_TYPE(line)->tp_name);
2545 Py_DECREF(line);
2546 return NULL;
2547 }
2548 }
2549
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002550 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 return NULL;
2552
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002553 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554 /* Reached EOF or would have blocked */
2555 Py_DECREF(line);
2556 Py_CLEAR(self->snapshot);
2557 self->telling = self->seekable;
2558 return NULL;
2559 }
2560
2561 return line;
2562}
2563
2564static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002565textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566{
2567 CHECK_INITIALIZED(self);
2568 return PyObject_GetAttrString(self->buffer, "name");
2569}
2570
2571static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002572textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573{
2574 CHECK_INITIALIZED(self);
2575 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2576}
2577
2578static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580{
2581 PyObject *res;
2582 CHECK_INITIALIZED(self);
2583 if (self->decoder == NULL)
2584 Py_RETURN_NONE;
2585 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2586 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002587 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2588 PyErr_Clear();
2589 Py_RETURN_NONE;
2590 }
2591 else {
2592 return NULL;
2593 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594 }
2595 return res;
2596}
2597
2598static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002600{
2601 CHECK_INITIALIZED(self);
2602 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2603}
2604
2605static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002606textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607{
2608 CHECK_INITIALIZED(self);
2609 return PyLong_FromSsize_t(self->chunk_size);
2610}
2611
2612static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002613textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614{
2615 Py_ssize_t n;
2616 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002617 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618 if (n == -1 && PyErr_Occurred())
2619 return -1;
2620 if (n <= 0) {
2621 PyErr_SetString(PyExc_ValueError,
2622 "a strictly positive integer is required");
2623 return -1;
2624 }
2625 self->chunk_size = n;
2626 return 0;
2627}
2628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002629static PyMethodDef textiowrapper_methods[] = {
2630 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2631 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2632 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2633 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2634 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2635 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002637 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2638 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2639 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2640 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2641 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002642 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002644 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2645 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2646 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647 {NULL, NULL}
2648};
2649
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002650static PyMemberDef textiowrapper_members[] = {
2651 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2652 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2653 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654 {NULL}
2655};
2656
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002657static PyGetSetDef textiowrapper_getset[] = {
2658 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2659 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2661*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2663 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2664 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2665 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002666 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667};
2668
2669PyTypeObject PyTextIOWrapper_Type = {
2670 PyVarObject_HEAD_INIT(NULL, 0)
2671 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002672 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675 0, /*tp_print*/
2676 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002677 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002678 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002679 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 0, /*tp_as_number*/
2681 0, /*tp_as_sequence*/
2682 0, /*tp_as_mapping*/
2683 0, /*tp_hash */
2684 0, /*tp_call*/
2685 0, /*tp_str*/
2686 0, /*tp_getattro*/
2687 0, /*tp_setattro*/
2688 0, /*tp_as_buffer*/
2689 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2690 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002691 textiowrapper_doc, /* tp_doc */
2692 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2693 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002695 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002697 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2698 textiowrapper_methods, /* tp_methods */
2699 textiowrapper_members, /* tp_members */
2700 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002701 0, /* tp_base */
2702 0, /* tp_dict */
2703 0, /* tp_descr_get */
2704 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002705 offsetof(textio, dict), /*tp_dictoffset*/
2706 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 0, /* tp_alloc */
2708 PyType_GenericNew, /* tp_new */
2709};