blob: b659795448a27c6de17119f8f071a7544b0161ba [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
686
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
689
690 PyObject *weakreflist;
691 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000692} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694
695/* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
720}
721
722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000723utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724{
Antoine Pitroue4501852009-05-14 18:55:55 +0000725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 }
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
752}
753
754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759#if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761#else
762 return utf32le_encode(self, text);
763#endif
764 }
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780{
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
784}
785
786/* Map normalized encoding names onto the specialized encoding funcs */
787
788typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791} encodefuncentry;
792
Antoine Pitrou24f36292009-03-28 22:16:42 +0000793static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000796 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {NULL, NULL}
804};
805
806
807static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000808textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809{
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
818 _PyIO_State *state = IO_STATE;
819
820 PyObject *res;
821 int r;
822
823 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000824 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
826 kwlist, &buffer, &encoding, &errors,
827 &newline, &line_buffering))
828 return -1;
829
830 if (newline && newline[0] != '\0'
831 && !(newline[0] == '\n' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\0')
833 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
834 PyErr_Format(PyExc_ValueError,
835 "illegal newline value: %s", newline);
836 return -1;
837 }
838
839 Py_CLEAR(self->buffer);
840 Py_CLEAR(self->encoding);
841 Py_CLEAR(self->encoder);
842 Py_CLEAR(self->decoder);
843 Py_CLEAR(self->readnl);
844 Py_CLEAR(self->decoded_chars);
845 Py_CLEAR(self->pending_bytes);
846 Py_CLEAR(self->snapshot);
847 Py_CLEAR(self->errors);
848 Py_CLEAR(self->raw);
849 self->decoded_chars_used = 0;
850 self->pending_bytes_count = 0;
851 self->encodefunc = NULL;
852
853 if (encoding == NULL) {
854 /* Try os.device_encoding(fileno) */
855 PyObject *fileno;
856 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
857 /* Ignore only AttributeError and UnsupportedOperation */
858 if (fileno == NULL) {
859 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
860 PyErr_ExceptionMatches(state->unsupported_operation)) {
861 PyErr_Clear();
862 }
863 else {
864 goto error;
865 }
866 }
867 else {
868 self->encoding = PyObject_CallMethod(state->os_module,
869 "device_encoding",
870 "N", fileno);
871 if (self->encoding == NULL)
872 goto error;
873 else if (!PyUnicode_Check(self->encoding))
874 Py_CLEAR(self->encoding);
875 }
876 }
877 if (encoding == NULL && self->encoding == NULL) {
878 if (state->locale_module == NULL) {
879 state->locale_module = PyImport_ImportModule("locale");
880 if (state->locale_module == NULL)
881 goto catch_ImportError;
882 else
883 goto use_locale;
884 }
885 else {
886 use_locale:
887 self->encoding = PyObject_CallMethod(
888 state->locale_module, "getpreferredencoding", NULL);
889 if (self->encoding == NULL) {
890 catch_ImportError:
891 /*
892 Importing locale can raise a ImportError because of
893 _functools, and locale.getpreferredencoding can raise a
894 ImportError if _locale is not available. These will happen
895 during module building.
896 */
897 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
898 PyErr_Clear();
899 self->encoding = PyUnicode_FromString("ascii");
900 }
901 else
902 goto error;
903 }
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000908 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000910 if (encoding == NULL)
911 goto error;
912 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000913 else if (encoding != NULL) {
914 self->encoding = PyUnicode_FromString(encoding);
915 if (self->encoding == NULL)
916 goto error;
917 }
918 else {
919 PyErr_SetString(PyExc_IOError,
920 "could not determine default encoding");
921 }
922
923 if (errors == NULL)
924 errors = "strict";
925 self->errors = PyBytes_FromString(errors);
926 if (self->errors == NULL)
927 goto error;
928
929 self->chunk_size = 8192;
930 self->readuniversal = (newline == NULL || newline[0] == '\0');
931 self->line_buffering = line_buffering;
932 self->readtranslate = (newline == NULL);
933 if (newline) {
934 self->readnl = PyUnicode_FromString(newline);
935 if (self->readnl == NULL)
936 return -1;
937 }
938 self->writetranslate = (newline == NULL || newline[0] != '\0');
939 if (!self->readuniversal && self->readnl) {
940 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000941 if (self->writenl == NULL)
942 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 if (!strcmp(self->writenl, "\n"))
944 self->writenl = NULL;
945 }
946#ifdef MS_WINDOWS
947 else
948 self->writenl = "\r\n";
949#endif
950
951 /* Build the decoder object */
952 res = PyObject_CallMethod(buffer, "readable", NULL);
953 if (res == NULL)
954 goto error;
955 r = PyObject_IsTrue(res);
956 Py_DECREF(res);
957 if (r == -1)
958 goto error;
959 if (r == 1) {
960 self->decoder = PyCodec_IncrementalDecoder(
961 encoding, errors);
962 if (self->decoder == NULL)
963 goto error;
964
965 if (self->readuniversal) {
966 PyObject *incrementalDecoder = PyObject_CallFunction(
967 (PyObject *)&PyIncrementalNewlineDecoder_Type,
968 "Oi", self->decoder, (int)self->readtranslate);
969 if (incrementalDecoder == NULL)
970 goto error;
971 Py_CLEAR(self->decoder);
972 self->decoder = incrementalDecoder;
973 }
974 }
975
976 /* Build the encoder object */
977 res = PyObject_CallMethod(buffer, "writable", NULL);
978 if (res == NULL)
979 goto error;
980 r = PyObject_IsTrue(res);
981 Py_DECREF(res);
982 if (r == -1)
983 goto error;
984 if (r == 1) {
985 PyObject *ci;
986 self->encoder = PyCodec_IncrementalEncoder(
987 encoding, errors);
988 if (self->encoder == NULL)
989 goto error;
990 /* Get the normalized named of the codec */
991 ci = _PyCodec_Lookup(encoding);
992 if (ci == NULL)
993 goto error;
994 res = PyObject_GetAttrString(ci, "name");
995 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000996 if (res == NULL) {
997 if (PyErr_ExceptionMatches(PyExc_AttributeError))
998 PyErr_Clear();
999 else
1000 goto error;
1001 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001002 else if (PyUnicode_Check(res)) {
1003 encodefuncentry *e = encodefuncs;
1004 while (e->name != NULL) {
1005 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1006 self->encodefunc = e->encodefunc;
1007 break;
1008 }
1009 e++;
1010 }
1011 }
1012 Py_XDECREF(res);
1013 }
1014
1015 self->buffer = buffer;
1016 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001017
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001018 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1019 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1020 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1021 raw = PyObject_GetAttrString(buffer, "raw");
1022 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001023 if (raw == NULL) {
1024 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1025 PyErr_Clear();
1026 else
1027 goto error;
1028 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 else if (Py_TYPE(raw) == &PyFileIO_Type)
1030 self->raw = raw;
1031 else
1032 Py_DECREF(raw);
1033 }
1034
1035 res = PyObject_CallMethod(buffer, "seekable", NULL);
1036 if (res == NULL)
1037 goto error;
1038 self->seekable = self->telling = PyObject_IsTrue(res);
1039 Py_DECREF(res);
1040
Antoine Pitroue4501852009-05-14 18:55:55 +00001041 self->encoding_start_of_stream = 0;
1042 if (self->seekable && self->encoder) {
1043 PyObject *cookieObj;
1044 int cmp;
1045
1046 self->encoding_start_of_stream = 1;
1047
1048 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1049 if (cookieObj == NULL)
1050 goto error;
1051
1052 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1053 Py_DECREF(cookieObj);
1054 if (cmp < 0) {
1055 goto error;
1056 }
1057
1058 if (cmp == 0) {
1059 self->encoding_start_of_stream = 0;
1060 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1061 _PyIO_zero, NULL);
1062 if (res == NULL)
1063 goto error;
1064 Py_DECREF(res);
1065 }
1066 }
1067
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 self->ok = 1;
1069 return 0;
1070
1071 error:
1072 return -1;
1073}
1074
1075static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001076_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001077{
1078 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1079 return -1;
1080 self->ok = 0;
1081 Py_CLEAR(self->buffer);
1082 Py_CLEAR(self->encoding);
1083 Py_CLEAR(self->encoder);
1084 Py_CLEAR(self->decoder);
1085 Py_CLEAR(self->readnl);
1086 Py_CLEAR(self->decoded_chars);
1087 Py_CLEAR(self->pending_bytes);
1088 Py_CLEAR(self->snapshot);
1089 Py_CLEAR(self->errors);
1090 Py_CLEAR(self->raw);
1091 return 0;
1092}
1093
1094static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001095textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001097 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001098 return;
1099 _PyObject_GC_UNTRACK(self);
1100 if (self->weakreflist != NULL)
1101 PyObject_ClearWeakRefs((PyObject *)self);
1102 Py_CLEAR(self->dict);
1103 Py_TYPE(self)->tp_free((PyObject *)self);
1104}
1105
1106static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001107textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001108{
1109 Py_VISIT(self->buffer);
1110 Py_VISIT(self->encoding);
1111 Py_VISIT(self->encoder);
1112 Py_VISIT(self->decoder);
1113 Py_VISIT(self->readnl);
1114 Py_VISIT(self->decoded_chars);
1115 Py_VISIT(self->pending_bytes);
1116 Py_VISIT(self->snapshot);
1117 Py_VISIT(self->errors);
1118 Py_VISIT(self->raw);
1119
1120 Py_VISIT(self->dict);
1121 return 0;
1122}
1123
1124static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001125textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001126{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001127 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128 return -1;
1129 Py_CLEAR(self->dict);
1130 return 0;
1131}
1132
1133static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135
1136/* This macro takes some shortcuts to make the common case faster. */
1137#define CHECK_CLOSED(self) \
1138 do { \
1139 int r; \
1140 PyObject *_res; \
1141 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1142 if (self->raw != NULL) \
1143 r = _PyFileIO_closed(self->raw); \
1144 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 if (_res == NULL) \
1147 return NULL; \
1148 r = PyObject_IsTrue(_res); \
1149 Py_DECREF(_res); \
1150 if (r < 0) \
1151 return NULL; \
1152 } \
1153 if (r > 0) { \
1154 PyErr_SetString(PyExc_ValueError, \
1155 "I/O operation on closed file."); \
1156 return NULL; \
1157 } \
1158 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001159 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 return NULL; \
1161 } while (0)
1162
1163#define CHECK_INITIALIZED(self) \
1164 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001165 if (self->detached) { \
1166 PyErr_SetString(PyExc_ValueError, \
1167 "underlying buffer has been detached"); \
1168 } else { \
1169 PyErr_SetString(PyExc_ValueError, \
1170 "I/O operation on uninitialized object"); \
1171 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 return NULL; \
1173 }
1174
1175#define CHECK_INITIALIZED_INT(self) \
1176 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001177 if (self->detached) { \
1178 PyErr_SetString(PyExc_ValueError, \
1179 "underlying buffer has been detached"); \
1180 } else { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on uninitialized object"); \
1183 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 return -1; \
1185 }
1186
1187
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001188static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001189textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001190{
1191 PyObject *buffer, *res;
1192 CHECK_INITIALIZED(self);
1193 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1194 if (res == NULL)
1195 return NULL;
1196 Py_DECREF(res);
1197 buffer = self->buffer;
1198 self->buffer = NULL;
1199 self->detached = 1;
1200 self->ok = 0;
1201 return buffer;
1202}
1203
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204Py_LOCAL_INLINE(const Py_UNICODE *)
1205findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1206{
1207 /* like wcschr, but doesn't stop at NULL characters */
1208 while (size-- > 0) {
1209 if (*s == ch)
1210 return s;
1211 s++;
1212 }
1213 return NULL;
1214}
1215
Antoine Pitrou24f36292009-03-28 22:16:42 +00001216/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217 underlying buffered object, though. */
1218static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001219_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001220{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001221 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222
1223 if (self->pending_bytes == NULL)
1224 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001225
1226 pending = self->pending_bytes;
1227 Py_INCREF(pending);
1228 self->pending_bytes_count = 0;
1229 Py_CLEAR(self->pending_bytes);
1230
1231 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1232 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001233 if (b == NULL)
1234 return -1;
1235 ret = PyObject_CallMethodObjArgs(self->buffer,
1236 _PyIO_str_write, b, NULL);
1237 Py_DECREF(b);
1238 if (ret == NULL)
1239 return -1;
1240 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 return 0;
1242}
1243
1244static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001245textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246{
1247 PyObject *ret;
1248 PyObject *text; /* owned reference */
1249 PyObject *b;
1250 Py_ssize_t textlen;
1251 int haslf = 0;
1252 int needflush = 0;
1253
1254 CHECK_INITIALIZED(self);
1255
1256 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1257 return NULL;
1258 }
1259
1260 CHECK_CLOSED(self);
1261
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001262 if (self->encoder == NULL) {
1263 PyErr_SetString(PyExc_IOError, "not writable");
1264 return NULL;
1265 }
1266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 Py_INCREF(text);
1268
1269 textlen = PyUnicode_GetSize(text);
1270
1271 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1272 if (findchar(PyUnicode_AS_UNICODE(text),
1273 PyUnicode_GET_SIZE(text), '\n'))
1274 haslf = 1;
1275
1276 if (haslf && self->writetranslate && self->writenl != NULL) {
1277 PyObject *newtext = PyObject_CallMethod(
1278 text, "replace", "ss", "\n", self->writenl);
1279 Py_DECREF(text);
1280 if (newtext == NULL)
1281 return NULL;
1282 text = newtext;
1283 }
1284
1285 if (self->line_buffering &&
1286 (haslf ||
1287 findchar(PyUnicode_AS_UNICODE(text),
1288 PyUnicode_GET_SIZE(text), '\r')))
1289 needflush = 1;
1290
1291 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001292 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001294 self->encoding_start_of_stream = 0;
1295 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 else
1297 b = PyObject_CallMethodObjArgs(self->encoder,
1298 _PyIO_str_encode, text, NULL);
1299 Py_DECREF(text);
1300 if (b == NULL)
1301 return NULL;
1302
1303 if (self->pending_bytes == NULL) {
1304 self->pending_bytes = PyList_New(0);
1305 if (self->pending_bytes == NULL) {
1306 Py_DECREF(b);
1307 return NULL;
1308 }
1309 self->pending_bytes_count = 0;
1310 }
1311 if (PyList_Append(self->pending_bytes, b) < 0) {
1312 Py_DECREF(b);
1313 return NULL;
1314 }
1315 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1316 Py_DECREF(b);
1317 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001318 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 return NULL;
1320 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001321
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 if (needflush) {
1323 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1324 if (ret == NULL)
1325 return NULL;
1326 Py_DECREF(ret);
1327 }
1328
1329 Py_CLEAR(self->snapshot);
1330
1331 if (self->decoder) {
1332 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1333 if (ret == NULL)
1334 return NULL;
1335 Py_DECREF(ret);
1336 }
1337
1338 return PyLong_FromSsize_t(textlen);
1339}
1340
1341/* Steal a reference to chars and store it in the decoded_char buffer;
1342 */
1343static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001344textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345{
1346 Py_CLEAR(self->decoded_chars);
1347 self->decoded_chars = chars;
1348 self->decoded_chars_used = 0;
1349}
1350
1351static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001352textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353{
1354 PyObject *chars;
1355 Py_ssize_t avail;
1356
1357 if (self->decoded_chars == NULL)
1358 return PyUnicode_FromStringAndSize(NULL, 0);
1359
1360 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1361 - self->decoded_chars_used);
1362
1363 assert(avail >= 0);
1364
1365 if (n < 0 || n > avail)
1366 n = avail;
1367
1368 if (self->decoded_chars_used > 0 || n < avail) {
1369 chars = PyUnicode_FromUnicode(
1370 PyUnicode_AS_UNICODE(self->decoded_chars)
1371 + self->decoded_chars_used, n);
1372 if (chars == NULL)
1373 return NULL;
1374 }
1375 else {
1376 chars = self->decoded_chars;
1377 Py_INCREF(chars);
1378 }
1379
1380 self->decoded_chars_used += n;
1381 return chars;
1382}
1383
1384/* Read and decode the next chunk of data from the BufferedReader.
1385 */
1386static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001387textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001388{
1389 PyObject *dec_buffer = NULL;
1390 PyObject *dec_flags = NULL;
1391 PyObject *input_chunk = NULL;
1392 PyObject *decoded_chars, *chunk_size;
1393 int eof;
1394
1395 /* The return value is True unless EOF was reached. The decoded string is
1396 * placed in self._decoded_chars (replacing its previous value). The
1397 * entire input chunk is sent to the decoder, though some of it may remain
1398 * buffered in the decoder, yet to be converted.
1399 */
1400
1401 if (self->decoder == NULL) {
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001402 PyErr_SetString(PyExc_IOError, "not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 return -1;
1404 }
1405
1406 if (self->telling) {
1407 /* To prepare for tell(), we need to snapshot a point in the file
1408 * where the decoder's input buffer is empty.
1409 */
1410
1411 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1412 _PyIO_str_getstate, NULL);
1413 if (state == NULL)
1414 return -1;
1415 /* Given this, we know there was a valid snapshot point
1416 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1417 */
1418 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1419 Py_DECREF(state);
1420 return -1;
1421 }
1422 Py_INCREF(dec_buffer);
1423 Py_INCREF(dec_flags);
1424 Py_DECREF(state);
1425 }
1426
1427 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1428 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1429 if (chunk_size == NULL)
1430 goto fail;
1431 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1432 _PyIO_str_read1, chunk_size, NULL);
1433 Py_DECREF(chunk_size);
1434 if (input_chunk == NULL)
1435 goto fail;
1436 assert(PyBytes_Check(input_chunk));
1437
1438 eof = (PyBytes_Size(input_chunk) == 0);
1439
1440 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1441 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1442 self->decoder, input_chunk, eof);
1443 }
1444 else {
1445 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1446 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1447 }
1448
1449 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1450 if (decoded_chars == NULL)
1451 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001452 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1454 eof = 0;
1455
1456 if (self->telling) {
1457 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1458 * next input to be decoded is dec_buffer + input_chunk.
1459 */
1460 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1461 if (next_input == NULL)
1462 goto fail;
1463 assert (PyBytes_Check(next_input));
1464 Py_DECREF(dec_buffer);
1465 Py_CLEAR(self->snapshot);
1466 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1467 }
1468 Py_DECREF(input_chunk);
1469
1470 return (eof == 0);
1471
1472 fail:
1473 Py_XDECREF(dec_buffer);
1474 Py_XDECREF(dec_flags);
1475 Py_XDECREF(input_chunk);
1476 return -1;
1477}
1478
1479static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001480textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481{
1482 Py_ssize_t n = -1;
1483 PyObject *result = NULL, *chunks = NULL;
1484
1485 CHECK_INITIALIZED(self);
1486
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001487 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 return NULL;
1489
1490 CHECK_CLOSED(self);
1491
Benjamin Petersona1b49012009-03-31 23:11:32 +00001492 if (self->decoder == NULL) {
1493 PyErr_SetString(PyExc_IOError, "not readable");
1494 return NULL;
1495 }
1496
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001497 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498 return NULL;
1499
1500 if (n < 0) {
1501 /* Read everything */
1502 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1503 PyObject *decoded;
1504 if (bytes == NULL)
1505 goto fail;
1506 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1507 bytes, Py_True, NULL);
1508 Py_DECREF(bytes);
1509 if (decoded == NULL)
1510 goto fail;
1511
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001512 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001513
1514 if (result == NULL) {
1515 Py_DECREF(decoded);
1516 return NULL;
1517 }
1518
1519 PyUnicode_AppendAndDel(&result, decoded);
1520 if (result == NULL)
1521 goto fail;
1522
1523 Py_CLEAR(self->snapshot);
1524 return result;
1525 }
1526 else {
1527 int res = 1;
1528 Py_ssize_t remaining = n;
1529
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001530 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 if (result == NULL)
1532 goto fail;
1533 remaining -= PyUnicode_GET_SIZE(result);
1534
1535 /* Keep reading chunks until we have n characters to return */
1536 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001537 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 if (res < 0)
1539 goto fail;
1540 if (res == 0) /* EOF */
1541 break;
1542 if (chunks == NULL) {
1543 chunks = PyList_New(0);
1544 if (chunks == NULL)
1545 goto fail;
1546 }
1547 if (PyList_Append(chunks, result) < 0)
1548 goto fail;
1549 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001550 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001551 if (result == NULL)
1552 goto fail;
1553 remaining -= PyUnicode_GET_SIZE(result);
1554 }
1555 if (chunks != NULL) {
1556 if (result != NULL && PyList_Append(chunks, result) < 0)
1557 goto fail;
1558 Py_CLEAR(result);
1559 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1560 if (result == NULL)
1561 goto fail;
1562 Py_CLEAR(chunks);
1563 }
1564 return result;
1565 }
1566 fail:
1567 Py_XDECREF(result);
1568 Py_XDECREF(chunks);
1569 return NULL;
1570}
1571
1572
1573/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1574 that is to the NUL character. Otherwise the function will produce
1575 incorrect results. */
1576static Py_UNICODE *
1577find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1578{
1579 Py_UNICODE *s = start;
1580 for (;;) {
1581 while (*s > ch)
1582 s++;
1583 if (*s == ch)
1584 return s;
1585 if (s == end)
1586 return NULL;
1587 s++;
1588 }
1589}
1590
1591Py_ssize_t
1592_PyIO_find_line_ending(
1593 int translated, int universal, PyObject *readnl,
1594 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1595{
1596 Py_ssize_t len = end - start;
1597
1598 if (translated) {
1599 /* Newlines are already translated, only search for \n */
1600 Py_UNICODE *pos = find_control_char(start, end, '\n');
1601 if (pos != NULL)
1602 return pos - start + 1;
1603 else {
1604 *consumed = len;
1605 return -1;
1606 }
1607 }
1608 else if (universal) {
1609 /* Universal newline search. Find any of \r, \r\n, \n
1610 * The decoder ensures that \r\n are not split in two pieces
1611 */
1612 Py_UNICODE *s = start;
1613 for (;;) {
1614 Py_UNICODE ch;
1615 /* Fast path for non-control chars. The loop always ends
1616 since the Py_UNICODE storage is NUL-terminated. */
1617 while (*s > '\r')
1618 s++;
1619 if (s >= end) {
1620 *consumed = len;
1621 return -1;
1622 }
1623 ch = *s++;
1624 if (ch == '\n')
1625 return s - start;
1626 if (ch == '\r') {
1627 if (*s == '\n')
1628 return s - start + 1;
1629 else
1630 return s - start;
1631 }
1632 }
1633 }
1634 else {
1635 /* Non-universal mode. */
1636 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1637 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1638 if (readnl_len == 1) {
1639 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1640 if (pos != NULL)
1641 return pos - start + 1;
1642 *consumed = len;
1643 return -1;
1644 }
1645 else {
1646 Py_UNICODE *s = start;
1647 Py_UNICODE *e = end - readnl_len + 1;
1648 Py_UNICODE *pos;
1649 if (e < s)
1650 e = s;
1651 while (s < e) {
1652 Py_ssize_t i;
1653 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1654 if (pos == NULL || pos >= e)
1655 break;
1656 for (i = 1; i < readnl_len; i++) {
1657 if (pos[i] != nl[i])
1658 break;
1659 }
1660 if (i == readnl_len)
1661 return pos - start + readnl_len;
1662 s = pos + 1;
1663 }
1664 pos = find_control_char(e, end, nl[0]);
1665 if (pos == NULL)
1666 *consumed = len;
1667 else
1668 *consumed = pos - start;
1669 return -1;
1670 }
1671 }
1672}
1673
1674static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001675_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676{
1677 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1678 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1679 int res;
1680
1681 CHECK_CLOSED(self);
1682
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001683 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 return NULL;
1685
1686 chunked = 0;
1687
1688 while (1) {
1689 Py_UNICODE *ptr;
1690 Py_ssize_t line_len;
1691 Py_ssize_t consumed = 0;
1692
1693 /* First, get some data if necessary */
1694 res = 1;
1695 while (!self->decoded_chars ||
1696 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001697 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 if (res < 0)
1699 goto error;
1700 if (res == 0)
1701 break;
1702 }
1703 if (res == 0) {
1704 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001705 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 Py_CLEAR(self->snapshot);
1707 start = endpos = offset_to_buffer = 0;
1708 break;
1709 }
1710
1711 if (remaining == NULL) {
1712 line = self->decoded_chars;
1713 start = self->decoded_chars_used;
1714 offset_to_buffer = 0;
1715 Py_INCREF(line);
1716 }
1717 else {
1718 assert(self->decoded_chars_used == 0);
1719 line = PyUnicode_Concat(remaining, self->decoded_chars);
1720 start = 0;
1721 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1722 Py_CLEAR(remaining);
1723 if (line == NULL)
1724 goto error;
1725 }
1726
1727 ptr = PyUnicode_AS_UNICODE(line);
1728 line_len = PyUnicode_GET_SIZE(line);
1729
1730 endpos = _PyIO_find_line_ending(
1731 self->readtranslate, self->readuniversal, self->readnl,
1732 ptr + start, ptr + line_len, &consumed);
1733 if (endpos >= 0) {
1734 endpos += start;
1735 if (limit >= 0 && (endpos - start) + chunked >= limit)
1736 endpos = start + limit - chunked;
1737 break;
1738 }
1739
1740 /* We can put aside up to `endpos` */
1741 endpos = consumed + start;
1742 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1743 /* Didn't find line ending, but reached length limit */
1744 endpos = start + limit - chunked;
1745 break;
1746 }
1747
1748 if (endpos > start) {
1749 /* No line ending seen yet - put aside current data */
1750 PyObject *s;
1751 if (chunks == NULL) {
1752 chunks = PyList_New(0);
1753 if (chunks == NULL)
1754 goto error;
1755 }
1756 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1757 if (s == NULL)
1758 goto error;
1759 if (PyList_Append(chunks, s) < 0) {
1760 Py_DECREF(s);
1761 goto error;
1762 }
1763 chunked += PyUnicode_GET_SIZE(s);
1764 Py_DECREF(s);
1765 }
1766 /* There may be some remaining bytes we'll have to prepend to the
1767 next chunk of data */
1768 if (endpos < line_len) {
1769 remaining = PyUnicode_FromUnicode(
1770 ptr + endpos, line_len - endpos);
1771 if (remaining == NULL)
1772 goto error;
1773 }
1774 Py_CLEAR(line);
1775 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001776 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 }
1778
1779 if (line != NULL) {
1780 /* Our line ends in the current buffer */
1781 self->decoded_chars_used = endpos - offset_to_buffer;
1782 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1783 if (start == 0 && Py_REFCNT(line) == 1) {
1784 if (PyUnicode_Resize(&line, endpos) < 0)
1785 goto error;
1786 }
1787 else {
1788 PyObject *s = PyUnicode_FromUnicode(
1789 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1790 Py_CLEAR(line);
1791 if (s == NULL)
1792 goto error;
1793 line = s;
1794 }
1795 }
1796 }
1797 if (remaining != NULL) {
1798 if (chunks == NULL) {
1799 chunks = PyList_New(0);
1800 if (chunks == NULL)
1801 goto error;
1802 }
1803 if (PyList_Append(chunks, remaining) < 0)
1804 goto error;
1805 Py_CLEAR(remaining);
1806 }
1807 if (chunks != NULL) {
1808 if (line != NULL && PyList_Append(chunks, line) < 0)
1809 goto error;
1810 Py_CLEAR(line);
1811 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1812 if (line == NULL)
1813 goto error;
1814 Py_DECREF(chunks);
1815 }
1816 if (line == NULL)
1817 line = PyUnicode_FromStringAndSize(NULL, 0);
1818
1819 return line;
1820
1821 error:
1822 Py_XDECREF(chunks);
1823 Py_XDECREF(remaining);
1824 Py_XDECREF(line);
1825 return NULL;
1826}
1827
1828static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001829textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830{
1831 Py_ssize_t limit = -1;
1832
1833 CHECK_INITIALIZED(self);
1834 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1835 return NULL;
1836 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001837 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838}
1839
1840/* Seek and Tell */
1841
1842typedef struct {
1843 Py_off_t start_pos;
1844 int dec_flags;
1845 int bytes_to_feed;
1846 int chars_to_skip;
1847 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001848} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849
1850/*
1851 To speed up cookie packing/unpacking, we store the fields in a temporary
1852 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1853 The following macros define at which offsets in the intermediary byte
1854 string the various CookieStruct fields will be stored.
1855 */
1856
1857#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1858
1859#if defined(WORDS_BIGENDIAN)
1860
1861# define IS_LITTLE_ENDIAN 0
1862
1863/* We want the least significant byte of start_pos to also be the least
1864 significant byte of the cookie, which means that in big-endian mode we
1865 must copy the fields in reverse order. */
1866
1867# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1868# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1869# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1870# define OFF_CHARS_TO_SKIP (sizeof(char))
1871# define OFF_NEED_EOF 0
1872
1873#else
1874
1875# define IS_LITTLE_ENDIAN 1
1876
1877/* Little-endian mode: the least significant byte of start_pos will
1878 naturally end up the least significant byte of the cookie. */
1879
1880# define OFF_START_POS 0
1881# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1882# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1883# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1884# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1885
1886#endif
1887
1888static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001889textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001890{
1891 unsigned char buffer[COOKIE_BUF_LEN];
1892 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1893 if (cookieLong == NULL)
1894 return -1;
1895
1896 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1897 IS_LITTLE_ENDIAN, 0) < 0) {
1898 Py_DECREF(cookieLong);
1899 return -1;
1900 }
1901 Py_DECREF(cookieLong);
1902
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001903 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1904 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1905 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1906 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1907 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001908
1909 return 0;
1910}
1911
1912static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001913textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001914{
1915 unsigned char buffer[COOKIE_BUF_LEN];
1916
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001917 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1918 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1919 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1920 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1921 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922
1923 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1924}
1925#undef IS_LITTLE_ENDIAN
1926
1927static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001928_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929{
1930 PyObject *res;
1931 /* When seeking to the start of the stream, we call decoder.reset()
1932 rather than decoder.getstate().
1933 This is for a few decoders such as utf-16 for which the state value
1934 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1935 utf-16, that we are expecting a BOM).
1936 */
1937 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1938 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1939 else
1940 res = PyObject_CallMethod(self->decoder, "setstate",
1941 "((yi))", "", cookie->dec_flags);
1942 if (res == NULL)
1943 return -1;
1944 Py_DECREF(res);
1945 return 0;
1946}
1947
Antoine Pitroue4501852009-05-14 18:55:55 +00001948static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001949_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001950{
1951 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001952 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001953 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1954 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1955 self->encoding_start_of_stream = 1;
1956 }
1957 else {
1958 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1959 _PyIO_zero, NULL);
1960 self->encoding_start_of_stream = 0;
1961 }
1962 if (res == NULL)
1963 return -1;
1964 Py_DECREF(res);
1965 return 0;
1966}
1967
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001969textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001970{
1971 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001972 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 PyObject *res;
1975 int cmp;
1976
1977 CHECK_INITIALIZED(self);
1978
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1980 return NULL;
1981 CHECK_CLOSED(self);
1982
1983 Py_INCREF(cookieObj);
1984
1985 if (!self->seekable) {
1986 PyErr_SetString(PyExc_IOError,
1987 "underlying stream is not seekable");
1988 goto fail;
1989 }
1990
1991 if (whence == 1) {
1992 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001993 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 if (cmp < 0)
1995 goto fail;
1996
1997 if (cmp == 0) {
1998 PyErr_SetString(PyExc_IOError,
1999 "can't do nonzero cur-relative seeks");
2000 goto fail;
2001 }
2002
2003 /* Seeking to the current position should attempt to
2004 * sync the underlying buffer with the current position.
2005 */
2006 Py_DECREF(cookieObj);
2007 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2008 if (cookieObj == NULL)
2009 goto fail;
2010 }
2011 else if (whence == 2) {
2012 /* seek relative to end of file */
2013
Antoine Pitroue4501852009-05-14 18:55:55 +00002014 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015 if (cmp < 0)
2016 goto fail;
2017
2018 if (cmp == 0) {
2019 PyErr_SetString(PyExc_IOError,
2020 "can't do nonzero end-relative seeks");
2021 goto fail;
2022 }
2023
2024 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2025 if (res == NULL)
2026 goto fail;
2027 Py_DECREF(res);
2028
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 Py_CLEAR(self->snapshot);
2031 if (self->decoder) {
2032 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2033 if (res == NULL)
2034 goto fail;
2035 Py_DECREF(res);
2036 }
2037
2038 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2039 Py_XDECREF(cookieObj);
2040 return res;
2041 }
2042 else if (whence != 0) {
2043 PyErr_Format(PyExc_ValueError,
2044 "invalid whence (%d, should be 0, 1 or 2)", whence);
2045 goto fail;
2046 }
2047
Antoine Pitroue4501852009-05-14 18:55:55 +00002048 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 if (cmp < 0)
2050 goto fail;
2051
2052 if (cmp == 1) {
2053 PyErr_Format(PyExc_ValueError,
2054 "negative seek position %R", cookieObj);
2055 goto fail;
2056 }
2057
2058 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2059 if (res == NULL)
2060 goto fail;
2061 Py_DECREF(res);
2062
2063 /* The strategy of seek() is to go back to the safe start point
2064 * and replay the effect of read(chars_to_skip) from there.
2065 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002066 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 goto fail;
2068
2069 /* Seek back to the safe start point. */
2070 posobj = PyLong_FromOff_t(cookie.start_pos);
2071 if (posobj == NULL)
2072 goto fail;
2073 res = PyObject_CallMethodObjArgs(self->buffer,
2074 _PyIO_str_seek, posobj, NULL);
2075 Py_DECREF(posobj);
2076 if (res == NULL)
2077 goto fail;
2078 Py_DECREF(res);
2079
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002080 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 Py_CLEAR(self->snapshot);
2082
2083 /* Restore the decoder to its state from the safe start point. */
2084 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002085 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 goto fail;
2087 }
2088
2089 if (cookie.chars_to_skip) {
2090 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2091 PyObject *input_chunk = PyObject_CallMethod(
2092 self->buffer, "read", "i", cookie.bytes_to_feed);
2093 PyObject *decoded;
2094
2095 if (input_chunk == NULL)
2096 goto fail;
2097
2098 assert (PyBytes_Check(input_chunk));
2099
2100 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2101 if (self->snapshot == NULL) {
2102 Py_DECREF(input_chunk);
2103 goto fail;
2104 }
2105
2106 decoded = PyObject_CallMethod(self->decoder, "decode",
2107 "Oi", input_chunk, (int)cookie.need_eof);
2108
2109 if (decoded == NULL)
2110 goto fail;
2111
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002112 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002113
2114 /* Skip chars_to_skip of the decoded characters. */
2115 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2116 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2117 goto fail;
2118 }
2119 self->decoded_chars_used = cookie.chars_to_skip;
2120 }
2121 else {
2122 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2123 if (self->snapshot == NULL)
2124 goto fail;
2125 }
2126
Antoine Pitroue4501852009-05-14 18:55:55 +00002127 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2128 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002129 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002130 goto fail;
2131 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132 return cookieObj;
2133 fail:
2134 Py_XDECREF(cookieObj);
2135 return NULL;
2136
2137}
2138
2139static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002140textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002141{
2142 PyObject *res;
2143 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002144 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145 PyObject *next_input;
2146 Py_ssize_t chars_to_skip, chars_decoded;
2147 PyObject *saved_state = NULL;
2148 char *input, *input_end;
2149
2150 CHECK_INITIALIZED(self);
2151 CHECK_CLOSED(self);
2152
2153 if (!self->seekable) {
2154 PyErr_SetString(PyExc_IOError,
2155 "underlying stream is not seekable");
2156 goto fail;
2157 }
2158 if (!self->telling) {
2159 PyErr_SetString(PyExc_IOError,
2160 "telling position disabled by next() call");
2161 goto fail;
2162 }
2163
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002164 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002165 return NULL;
2166 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2167 if (res == NULL)
2168 goto fail;
2169 Py_DECREF(res);
2170
2171 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2172 if (posobj == NULL)
2173 goto fail;
2174
2175 if (self->decoder == NULL || self->snapshot == NULL) {
2176 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2177 return posobj;
2178 }
2179
2180#if defined(HAVE_LARGEFILE_SUPPORT)
2181 cookie.start_pos = PyLong_AsLongLong(posobj);
2182#else
2183 cookie.start_pos = PyLong_AsLong(posobj);
2184#endif
2185 if (PyErr_Occurred())
2186 goto fail;
2187
2188 /* Skip backward to the snapshot point (see _read_chunk). */
2189 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2190 goto fail;
2191
2192 assert (PyBytes_Check(next_input));
2193
2194 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2195
2196 /* How many decoded characters have been used up since the snapshot? */
2197 if (self->decoded_chars_used == 0) {
2198 /* We haven't moved from the snapshot point. */
2199 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002200 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201 }
2202
2203 chars_to_skip = self->decoded_chars_used;
2204
2205 /* Starting from the snapshot position, we will walk the decoder
2206 * forward until it gives us enough decoded characters.
2207 */
2208 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2209 _PyIO_str_getstate, NULL);
2210 if (saved_state == NULL)
2211 goto fail;
2212
2213 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002214 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 goto fail;
2216
2217 /* Feed the decoder one byte at a time. As we go, note the
2218 * nearest "safe start point" before the current location
2219 * (a point where the decoder has nothing buffered, so seek()
2220 * can safely start from there and advance to this location).
2221 */
2222 chars_decoded = 0;
2223 input = PyBytes_AS_STRING(next_input);
2224 input_end = input + PyBytes_GET_SIZE(next_input);
2225 while (input < input_end) {
2226 PyObject *state;
2227 char *dec_buffer;
2228 Py_ssize_t dec_buffer_len;
2229 int dec_flags;
2230
2231 PyObject *decoded = PyObject_CallMethod(
2232 self->decoder, "decode", "y#", input, 1);
2233 if (decoded == NULL)
2234 goto fail;
2235 assert (PyUnicode_Check(decoded));
2236 chars_decoded += PyUnicode_GET_SIZE(decoded);
2237 Py_DECREF(decoded);
2238
2239 cookie.bytes_to_feed += 1;
2240
2241 state = PyObject_CallMethodObjArgs(self->decoder,
2242 _PyIO_str_getstate, NULL);
2243 if (state == NULL)
2244 goto fail;
2245 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2246 Py_DECREF(state);
2247 goto fail;
2248 }
2249 Py_DECREF(state);
2250
2251 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2252 /* Decoder buffer is empty, so this is a safe start point. */
2253 cookie.start_pos += cookie.bytes_to_feed;
2254 chars_to_skip -= chars_decoded;
2255 cookie.dec_flags = dec_flags;
2256 cookie.bytes_to_feed = 0;
2257 chars_decoded = 0;
2258 }
2259 if (chars_decoded >= chars_to_skip)
2260 break;
2261 input++;
2262 }
2263 if (input == input_end) {
2264 /* We didn't get enough decoded data; signal EOF to get more. */
2265 PyObject *decoded = PyObject_CallMethod(
2266 self->decoder, "decode", "yi", "", /* final = */ 1);
2267 if (decoded == NULL)
2268 goto fail;
2269 assert (PyUnicode_Check(decoded));
2270 chars_decoded += PyUnicode_GET_SIZE(decoded);
2271 Py_DECREF(decoded);
2272 cookie.need_eof = 1;
2273
2274 if (chars_decoded < chars_to_skip) {
2275 PyErr_SetString(PyExc_IOError,
2276 "can't reconstruct logical file position");
2277 goto fail;
2278 }
2279 }
2280
2281 /* finally */
2282 Py_XDECREF(posobj);
2283 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2284 Py_DECREF(saved_state);
2285 if (res == NULL)
2286 return NULL;
2287 Py_DECREF(res);
2288
2289 /* The returned cookie corresponds to the last safe start point. */
2290 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002291 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292
2293 fail:
2294 Py_XDECREF(posobj);
2295 if (saved_state) {
2296 PyObject *type, *value, *traceback;
2297 PyErr_Fetch(&type, &value, &traceback);
2298
2299 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2300 Py_DECREF(saved_state);
2301 if (res == NULL)
2302 return NULL;
2303 Py_DECREF(res);
2304
2305 PyErr_Restore(type, value, traceback);
2306 }
2307 return NULL;
2308}
2309
2310static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002311textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002312{
2313 PyObject *pos = Py_None;
2314 PyObject *res;
2315
2316 CHECK_INITIALIZED(self)
2317 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2318 return NULL;
2319 }
2320
2321 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2322 if (res == NULL)
2323 return NULL;
2324 Py_DECREF(res);
2325
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002326 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002327}
2328
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002329static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002330textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002331{
Antoine Pitrou716c4442009-05-23 19:04:03 +00002332 PyObject *nameobj, *res;
2333
2334 CHECK_INITIALIZED(self);
2335
2336 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2337 if (nameobj == NULL) {
2338 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2339 PyErr_Clear();
2340 else
2341 return NULL;
2342 res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>",
2343 self->encoding);
2344 }
2345 else {
2346 res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>",
2347 nameobj, self->encoding);
2348 Py_DECREF(nameobj);
2349 }
2350 return res;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002351}
2352
2353
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002354/* Inquiries */
2355
2356static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002357textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358{
2359 CHECK_INITIALIZED(self);
2360 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2361}
2362
2363static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002364textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365{
2366 CHECK_INITIALIZED(self);
2367 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2368}
2369
2370static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002371textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372{
2373 CHECK_INITIALIZED(self);
2374 return PyObject_CallMethod(self->buffer, "readable", NULL);
2375}
2376
2377static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002378textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379{
2380 CHECK_INITIALIZED(self);
2381 return PyObject_CallMethod(self->buffer, "writable", NULL);
2382}
2383
2384static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002385textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386{
2387 CHECK_INITIALIZED(self);
2388 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2389}
2390
2391static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002392textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393{
2394 CHECK_INITIALIZED(self);
2395 CHECK_CLOSED(self);
2396 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002397 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 return NULL;
2399 return PyObject_CallMethod(self->buffer, "flush", NULL);
2400}
2401
2402static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002403textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404{
2405 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002406 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408
Antoine Pitrou6be88762010-05-03 16:48:20 +00002409 res = textiowrapper_closed_get(self, NULL);
2410 if (res == NULL)
2411 return NULL;
2412 r = PyObject_IsTrue(res);
2413 Py_DECREF(res);
2414 if (r < 0)
2415 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002416
Antoine Pitrou6be88762010-05-03 16:48:20 +00002417 if (r > 0) {
2418 Py_RETURN_NONE; /* stream already closed */
2419 }
2420 else {
2421 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2422 if (res == NULL) {
2423 return NULL;
2424 }
2425 else
2426 Py_DECREF(res);
2427
2428 return PyObject_CallMethod(self->buffer, "close", NULL);
2429 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430}
2431
2432static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002433textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434{
2435 PyObject *line;
2436
2437 CHECK_INITIALIZED(self);
2438
2439 self->telling = 0;
2440 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2441 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002442 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002443 }
2444 else {
2445 line = PyObject_CallMethodObjArgs((PyObject *)self,
2446 _PyIO_str_readline, NULL);
2447 if (line && !PyUnicode_Check(line)) {
2448 PyErr_Format(PyExc_IOError,
2449 "readline() should have returned an str object, "
2450 "not '%.200s'", Py_TYPE(line)->tp_name);
2451 Py_DECREF(line);
2452 return NULL;
2453 }
2454 }
2455
2456 if (line == NULL)
2457 return NULL;
2458
2459 if (PyUnicode_GET_SIZE(line) == 0) {
2460 /* Reached EOF or would have blocked */
2461 Py_DECREF(line);
2462 Py_CLEAR(self->snapshot);
2463 self->telling = self->seekable;
2464 return NULL;
2465 }
2466
2467 return line;
2468}
2469
2470static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002471textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472{
2473 CHECK_INITIALIZED(self);
2474 return PyObject_GetAttrString(self->buffer, "name");
2475}
2476
2477static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002478textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479{
2480 CHECK_INITIALIZED(self);
2481 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2482}
2483
2484static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002485textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486{
2487 PyObject *res;
2488 CHECK_INITIALIZED(self);
2489 if (self->decoder == NULL)
2490 Py_RETURN_NONE;
2491 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2492 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002493 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2494 PyErr_Clear();
2495 Py_RETURN_NONE;
2496 }
2497 else {
2498 return NULL;
2499 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500 }
2501 return res;
2502}
2503
2504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002505textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002506{
2507 CHECK_INITIALIZED(self);
2508 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2509}
2510
2511static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002512textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513{
2514 CHECK_INITIALIZED(self);
2515 return PyLong_FromSsize_t(self->chunk_size);
2516}
2517
2518static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002519textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520{
2521 Py_ssize_t n;
2522 CHECK_INITIALIZED_INT(self);
2523 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2524 if (n == -1 && PyErr_Occurred())
2525 return -1;
2526 if (n <= 0) {
2527 PyErr_SetString(PyExc_ValueError,
2528 "a strictly positive integer is required");
2529 return -1;
2530 }
2531 self->chunk_size = n;
2532 return 0;
2533}
2534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002535static PyMethodDef textiowrapper_methods[] = {
2536 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2537 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2538 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2539 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2540 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2541 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002543 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2544 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2545 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2546 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2547 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002549 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2550 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2551 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552 {NULL, NULL}
2553};
2554
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002555static PyMemberDef textiowrapper_members[] = {
2556 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2557 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2558 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559 {NULL}
2560};
2561
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562static PyGetSetDef textiowrapper_getset[] = {
2563 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2564 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002565/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2566*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002567 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2568 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2569 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2570 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002571 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572};
2573
2574PyTypeObject PyTextIOWrapper_Type = {
2575 PyVarObject_HEAD_INIT(NULL, 0)
2576 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002577 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580 0, /*tp_print*/
2581 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002582 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002584 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585 0, /*tp_as_number*/
2586 0, /*tp_as_sequence*/
2587 0, /*tp_as_mapping*/
2588 0, /*tp_hash */
2589 0, /*tp_call*/
2590 0, /*tp_str*/
2591 0, /*tp_getattro*/
2592 0, /*tp_setattro*/
2593 0, /*tp_as_buffer*/
2594 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2595 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002596 textiowrapper_doc, /* tp_doc */
2597 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2598 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002600 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002602 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2603 textiowrapper_methods, /* tp_methods */
2604 textiowrapper_members, /* tp_members */
2605 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606 0, /* tp_base */
2607 0, /* tp_dict */
2608 0, /* tp_descr_get */
2609 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002610 offsetof(textio, dict), /*tp_dictoffset*/
2611 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612 0, /* tp_alloc */
2613 PyType_GenericNew, /* tp_new */
2614};