blob: 2662e4bcbe06026a7c0a4f0fd7bf71844000406a [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116 Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou2a466582009-09-21 21:17:48 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Antoine Pitrou19690592009-06-12 20:14:08 +0000196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336 }
337
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
463incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
565};
566
567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
616PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
Antoine Pitrou76370f42012-08-04 00:55:38 +0200625 "newline controls how line endings are handled. It can be None, '',\n"
626 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
627 "\n"
628 "* On input, if newline is None, universal newlines mode is\n"
629 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
630 " these are translated into '\\n' before being returned to the\n"
631 " caller. If it is '', universal newline mode is enabled, but line\n"
632 " endings are returned to the caller untranslated. If it has any of\n"
633 " the other legal values, input lines are only terminated by the given\n"
634 " string, and the line ending is returned to the caller untranslated.\n"
635 "\n"
636 "* On output, if newline is None, any '\\n' characters written are\n"
637 " translated to the system default line separator, os.linesep. If\n"
638 " newline is '', no translation takes place. If newline is any of the\n"
639 " other legal values, any '\\n' characters written are translated to\n"
640 " the given string.\n"
Antoine Pitrou19690592009-06-12 20:14:08 +0000641 "\n"
642 "If line_buffering is True, a call to flush is implied when a call to\n"
643 "write contains a newline character."
644 );
645
646typedef PyObject *
647 (*encodefunc_t)(PyObject *, PyObject *);
648
649typedef struct
650{
651 PyObject_HEAD
652 int ok; /* initialized? */
653 int detached;
654 Py_ssize_t chunk_size;
655 PyObject *buffer;
656 PyObject *encoding;
657 PyObject *encoder;
658 PyObject *decoder;
659 PyObject *readnl;
660 PyObject *errors;
661 const char *writenl; /* utf-8 encoded, NULL stands for \n */
662 char line_buffering;
663 char readuniversal;
664 char readtranslate;
665 char writetranslate;
666 char seekable;
667 char telling;
668 /* Specialized encoding func (see below) */
669 encodefunc_t encodefunc;
670 /* Whether or not it's the start of the stream */
671 char encoding_start_of_stream;
672
673 /* Reads and writes are internally buffered in order to speed things up.
674 However, any read will first flush the write buffer if itsn't empty.
675
676 Please also note that text to be written is first encoded before being
677 buffered. This is necessary so that encoding errors are immediately
678 reported to the caller, but it unfortunately means that the
679 IncrementalEncoder (whose encode() method is always written in Python)
680 becomes a bottleneck for small writes.
681 */
682 PyObject *decoded_chars; /* buffer for text returned from decoder */
683 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
684 PyObject *pending_bytes; /* list of bytes objects waiting to be
685 written, or NULL */
686 Py_ssize_t pending_bytes_count;
687 PyObject *snapshot;
688 /* snapshot is either None, or a tuple (dec_flags, next_input) where
689 * dec_flags is the second (integer) item of the decoder state and
690 * next_input is the chunk of input bytes that comes next after the
691 * snapshot point. We use this to reconstruct decoder states in tell().
692 */
693
694 /* Cache raw object if it's a FileIO object */
695 PyObject *raw;
696
697 PyObject *weakreflist;
698 PyObject *dict;
699} textio;
700
701
702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
706ascii_encode(textio *self, PyObject *text)
707{
708 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
709 PyUnicode_GET_SIZE(text),
710 PyBytes_AS_STRING(self->errors));
711}
712
713static PyObject *
714utf16be_encode(textio *self, PyObject *text)
715{
716 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
717 PyUnicode_GET_SIZE(text),
718 PyBytes_AS_STRING(self->errors), 1);
719}
720
721static PyObject *
722utf16le_encode(textio *self, PyObject *text)
723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), -1);
727}
728
729static PyObject *
730utf16_encode(textio *self, PyObject *text)
731{
732 if (!self->encoding_start_of_stream) {
733 /* Skip the BOM and use native byte ordering */
734#if defined(WORDS_BIGENDIAN)
735 return utf16be_encode(self, text);
736#else
737 return utf16le_encode(self, text);
738#endif
739 }
740 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
741 PyUnicode_GET_SIZE(text),
742 PyBytes_AS_STRING(self->errors), 0);
743}
744
745static PyObject *
746utf32be_encode(textio *self, PyObject *text)
747{
748 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 1);
751}
752
753static PyObject *
754utf32le_encode(textio *self, PyObject *text)
755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), -1);
759}
760
761static PyObject *
762utf32_encode(textio *self, PyObject *text)
763{
764 if (!self->encoding_start_of_stream) {
765 /* Skip the BOM and use native byte ordering */
766#if defined(WORDS_BIGENDIAN)
767 return utf32be_encode(self, text);
768#else
769 return utf32le_encode(self, text);
770#endif
771 }
772 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
773 PyUnicode_GET_SIZE(text),
774 PyBytes_AS_STRING(self->errors), 0);
775}
776
777static PyObject *
778utf8_encode(textio *self, PyObject *text)
779{
780 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors));
783}
784
785static PyObject *
786latin1_encode(textio *self, PyObject *text)
787{
788 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
789 PyUnicode_GET_SIZE(text),
790 PyBytes_AS_STRING(self->errors));
791}
792
793/* Map normalized encoding names onto the specialized encoding funcs */
794
795typedef struct {
796 const char *name;
797 encodefunc_t encodefunc;
798} encodefuncentry;
799
800static encodefuncentry encodefuncs[] = {
801 {"ascii", (encodefunc_t) ascii_encode},
802 {"iso8859-1", (encodefunc_t) latin1_encode},
803 {"utf-8", (encodefunc_t) utf8_encode},
804 {"utf-16-be", (encodefunc_t) utf16be_encode},
805 {"utf-16-le", (encodefunc_t) utf16le_encode},
806 {"utf-16", (encodefunc_t) utf16_encode},
807 {"utf-32-be", (encodefunc_t) utf32be_encode},
808 {"utf-32-le", (encodefunc_t) utf32le_encode},
809 {"utf-32", (encodefunc_t) utf32_encode},
810 {NULL, NULL}
811};
812
813
814static int
815textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
816{
817 char *kwlist[] = {"buffer", "encoding", "errors",
818 "newline", "line_buffering",
819 NULL};
820 PyObject *buffer, *raw;
821 char *encoding = NULL;
822 char *errors = NULL;
823 char *newline = NULL;
824 int line_buffering = 0;
825
826 PyObject *res;
827 int r;
828
829 self->ok = 0;
830 self->detached = 0;
831 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
832 kwlist, &buffer, &encoding, &errors,
833 &newline, &line_buffering))
834 return -1;
835
836 if (newline && newline[0] != '\0'
837 && !(newline[0] == '\n' && newline[1] == '\0')
838 && !(newline[0] == '\r' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
840 PyErr_Format(PyExc_ValueError,
841 "illegal newline value: %s", newline);
842 return -1;
843 }
844
845 Py_CLEAR(self->buffer);
846 Py_CLEAR(self->encoding);
847 Py_CLEAR(self->encoder);
848 Py_CLEAR(self->decoder);
849 Py_CLEAR(self->readnl);
850 Py_CLEAR(self->decoded_chars);
851 Py_CLEAR(self->pending_bytes);
852 Py_CLEAR(self->snapshot);
853 Py_CLEAR(self->errors);
854 Py_CLEAR(self->raw);
855 self->decoded_chars_used = 0;
856 self->pending_bytes_count = 0;
857 self->encodefunc = NULL;
858 self->writenl = NULL;
859
860 if (encoding == NULL && self->encoding == NULL) {
861 if (_PyIO_locale_module == NULL) {
862 _PyIO_locale_module = PyImport_ImportModule("locale");
863 if (_PyIO_locale_module == NULL)
864 goto catch_ImportError;
865 else
866 goto use_locale;
867 }
868 else {
869 use_locale:
870 self->encoding = PyObject_CallMethod(
871 _PyIO_locale_module, "getpreferredencoding", NULL);
872 if (self->encoding == NULL) {
873 catch_ImportError:
874 /*
875 Importing locale can raise a ImportError because of
876 _functools, and locale.getpreferredencoding can raise a
877 ImportError if _locale is not available. These will happen
878 during module building.
879 */
880 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
881 PyErr_Clear();
882 self->encoding = PyString_FromString("ascii");
883 }
884 else
885 goto error;
886 }
887 else if (!PyString_Check(self->encoding))
888 Py_CLEAR(self->encoding);
889 }
890 }
891 if (self->encoding != NULL)
892 encoding = PyString_AsString(self->encoding);
893 else if (encoding != NULL) {
894 self->encoding = PyString_FromString(encoding);
895 if (self->encoding == NULL)
896 goto error;
897 }
898 else {
899 PyErr_SetString(PyExc_IOError,
900 "could not determine default encoding");
901 }
902
903 if (errors == NULL)
904 errors = "strict";
905 self->errors = PyBytes_FromString(errors);
906 if (self->errors == NULL)
907 goto error;
908
909 self->chunk_size = 8192;
910 self->readuniversal = (newline == NULL || newline[0] == '\0');
911 self->line_buffering = line_buffering;
912 self->readtranslate = (newline == NULL);
913 if (newline) {
914 self->readnl = PyString_FromString(newline);
915 if (self->readnl == NULL)
916 return -1;
917 }
918 self->writetranslate = (newline == NULL || newline[0] != '\0');
919 if (!self->readuniversal && self->writetranslate) {
920 self->writenl = PyString_AsString(self->readnl);
921 if (!strcmp(self->writenl, "\n"))
922 self->writenl = NULL;
923 }
924#ifdef MS_WINDOWS
925 else
926 self->writenl = "\r\n";
927#endif
928
929 /* Build the decoder object */
930 res = PyObject_CallMethod(buffer, "readable", NULL);
931 if (res == NULL)
932 goto error;
933 r = PyObject_IsTrue(res);
934 Py_DECREF(res);
935 if (r == -1)
936 goto error;
937 if (r == 1) {
938 self->decoder = PyCodec_IncrementalDecoder(
939 encoding, errors);
940 if (self->decoder == NULL)
941 goto error;
942
943 if (self->readuniversal) {
944 PyObject *incrementalDecoder = PyObject_CallFunction(
945 (PyObject *)&PyIncrementalNewlineDecoder_Type,
946 "Oi", self->decoder, (int)self->readtranslate);
947 if (incrementalDecoder == NULL)
948 goto error;
949 Py_CLEAR(self->decoder);
950 self->decoder = incrementalDecoder;
951 }
952 }
953
954 /* Build the encoder object */
955 res = PyObject_CallMethod(buffer, "writable", NULL);
956 if (res == NULL)
957 goto error;
958 r = PyObject_IsTrue(res);
959 Py_DECREF(res);
960 if (r == -1)
961 goto error;
962 if (r == 1) {
963 PyObject *ci;
964 self->encoder = PyCodec_IncrementalEncoder(
965 encoding, errors);
966 if (self->encoder == NULL)
967 goto error;
968 /* Get the normalized named of the codec */
969 ci = _PyCodec_Lookup(encoding);
970 if (ci == NULL)
971 goto error;
972 res = PyObject_GetAttrString(ci, "name");
973 Py_DECREF(ci);
974 if (res == NULL) {
975 if (PyErr_ExceptionMatches(PyExc_AttributeError))
976 PyErr_Clear();
977 else
978 goto error;
979 }
980 else if (PyString_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!strcmp(PyString_AS_STRING(res), e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
995
996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL) {
1002 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1003 PyErr_Clear();
1004 else
1005 goto error;
1006 }
1007 else if (Py_TYPE(raw) == &PyFileIO_Type)
1008 self->raw = raw;
1009 else
1010 Py_DECREF(raw);
1011 }
1012
1013 res = PyObject_CallMethod(buffer, "seekable", NULL);
1014 if (res == NULL)
1015 goto error;
1016 self->seekable = self->telling = PyObject_IsTrue(res);
1017 Py_DECREF(res);
1018
1019 self->encoding_start_of_stream = 0;
1020 if (self->seekable && self->encoder) {
1021 PyObject *cookieObj;
1022 int cmp;
1023
1024 self->encoding_start_of_stream = 1;
1025
1026 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1027 if (cookieObj == NULL)
1028 goto error;
1029
1030 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1031 Py_DECREF(cookieObj);
1032 if (cmp < 0) {
1033 goto error;
1034 }
1035
1036 if (cmp == 0) {
1037 self->encoding_start_of_stream = 0;
1038 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1039 _PyIO_zero, NULL);
1040 if (res == NULL)
1041 goto error;
1042 Py_DECREF(res);
1043 }
1044 }
1045
1046 self->ok = 1;
1047 return 0;
1048
1049 error:
1050 return -1;
1051}
1052
1053static int
1054_textiowrapper_clear(textio *self)
1055{
1056 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1057 return -1;
1058 self->ok = 0;
1059 Py_CLEAR(self->buffer);
1060 Py_CLEAR(self->encoding);
1061 Py_CLEAR(self->encoder);
1062 Py_CLEAR(self->decoder);
1063 Py_CLEAR(self->readnl);
1064 Py_CLEAR(self->decoded_chars);
1065 Py_CLEAR(self->pending_bytes);
1066 Py_CLEAR(self->snapshot);
1067 Py_CLEAR(self->errors);
1068 Py_CLEAR(self->raw);
1069 return 0;
1070}
1071
1072static void
1073textiowrapper_dealloc(textio *self)
1074{
1075 if (_textiowrapper_clear(self) < 0)
1076 return;
1077 _PyObject_GC_UNTRACK(self);
1078 if (self->weakreflist != NULL)
1079 PyObject_ClearWeakRefs((PyObject *)self);
1080 Py_CLEAR(self->dict);
1081 Py_TYPE(self)->tp_free((PyObject *)self);
1082}
1083
1084static int
1085textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1086{
1087 Py_VISIT(self->buffer);
1088 Py_VISIT(self->encoding);
1089 Py_VISIT(self->encoder);
1090 Py_VISIT(self->decoder);
1091 Py_VISIT(self->readnl);
1092 Py_VISIT(self->decoded_chars);
1093 Py_VISIT(self->pending_bytes);
1094 Py_VISIT(self->snapshot);
1095 Py_VISIT(self->errors);
1096 Py_VISIT(self->raw);
1097
1098 Py_VISIT(self->dict);
1099 return 0;
1100}
1101
1102static int
1103textiowrapper_clear(textio *self)
1104{
1105 if (_textiowrapper_clear(self) < 0)
1106 return -1;
1107 Py_CLEAR(self->dict);
1108 return 0;
1109}
1110
1111static PyObject *
1112textiowrapper_closed_get(textio *self, void *context);
1113
1114/* This macro takes some shortcuts to make the common case faster. */
1115#define CHECK_CLOSED(self) \
1116 do { \
1117 int r; \
1118 PyObject *_res; \
1119 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1120 if (self->raw != NULL) \
1121 r = _PyFileIO_closed(self->raw); \
1122 else { \
1123 _res = textiowrapper_closed_get(self, NULL); \
1124 if (_res == NULL) \
1125 return NULL; \
1126 r = PyObject_IsTrue(_res); \
1127 Py_DECREF(_res); \
1128 if (r < 0) \
1129 return NULL; \
1130 } \
1131 if (r > 0) { \
1132 PyErr_SetString(PyExc_ValueError, \
1133 "I/O operation on closed file."); \
1134 return NULL; \
1135 } \
1136 } \
1137 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1138 return NULL; \
1139 } while (0)
1140
1141#define CHECK_INITIALIZED(self) \
1142 if (self->ok <= 0) { \
1143 if (self->detached) { \
1144 PyErr_SetString(PyExc_ValueError, \
1145 "underlying buffer has been detached"); \
1146 } else { \
1147 PyErr_SetString(PyExc_ValueError, \
1148 "I/O operation on uninitialized object"); \
1149 } \
1150 return NULL; \
1151 }
1152
1153#define CHECK_INITIALIZED_INT(self) \
1154 if (self->ok <= 0) { \
1155 if (self->detached) { \
1156 PyErr_SetString(PyExc_ValueError, \
1157 "underlying buffer has been detached"); \
1158 } else { \
1159 PyErr_SetString(PyExc_ValueError, \
1160 "I/O operation on uninitialized object"); \
1161 } \
1162 return -1; \
1163 }
1164
1165
1166static PyObject *
1167textiowrapper_detach(textio *self)
1168{
1169 PyObject *buffer, *res;
1170 CHECK_INITIALIZED(self);
1171 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1172 if (res == NULL)
1173 return NULL;
1174 Py_DECREF(res);
1175 buffer = self->buffer;
1176 self->buffer = NULL;
1177 self->detached = 1;
1178 self->ok = 0;
1179 return buffer;
1180}
1181
1182Py_LOCAL_INLINE(const Py_UNICODE *)
1183findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1184{
1185 /* like wcschr, but doesn't stop at NULL characters */
1186 while (size-- > 0) {
1187 if (*s == ch)
1188 return s;
1189 s++;
1190 }
1191 return NULL;
1192}
1193
1194/* Flush the internal write buffer. This doesn't explicitly flush the
1195 underlying buffered object, though. */
1196static int
1197_textiowrapper_writeflush(textio *self)
1198{
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001199 PyObject *pending, *b, *ret;
Antoine Pitrou19690592009-06-12 20:14:08 +00001200
1201 if (self->pending_bytes == NULL)
1202 return 0;
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001203
1204 pending = self->pending_bytes;
1205 Py_INCREF(pending);
1206 self->pending_bytes_count = 0;
1207 Py_CLEAR(self->pending_bytes);
1208
1209 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1210 Py_DECREF(pending);
Antoine Pitrou19690592009-06-12 20:14:08 +00001211 if (b == NULL)
1212 return -1;
1213 ret = PyObject_CallMethodObjArgs(self->buffer,
1214 _PyIO_str_write, b, NULL);
1215 Py_DECREF(b);
1216 if (ret == NULL)
1217 return -1;
1218 Py_DECREF(ret);
Antoine Pitrou19690592009-06-12 20:14:08 +00001219 return 0;
1220}
1221
1222static PyObject *
1223textiowrapper_write(textio *self, PyObject *args)
1224{
1225 PyObject *ret;
1226 PyObject *text; /* owned reference */
1227 PyObject *b;
1228 Py_ssize_t textlen;
1229 int haslf = 0;
1230 int needflush = 0;
1231
1232 CHECK_INITIALIZED(self);
1233
1234 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1235 return NULL;
1236 }
1237
1238 CHECK_CLOSED(self);
1239
1240 if (self->encoder == NULL) {
1241 PyErr_SetString(PyExc_IOError, "not writable");
1242 return NULL;
1243 }
1244
1245 Py_INCREF(text);
1246
1247 textlen = PyUnicode_GetSize(text);
1248
1249 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1250 if (findchar(PyUnicode_AS_UNICODE(text),
1251 PyUnicode_GET_SIZE(text), '\n'))
1252 haslf = 1;
1253
1254 if (haslf && self->writetranslate && self->writenl != NULL) {
1255 PyObject *newtext = PyObject_CallMethod(
1256 text, "replace", "ss", "\n", self->writenl);
1257 Py_DECREF(text);
1258 if (newtext == NULL)
1259 return NULL;
1260 text = newtext;
1261 }
1262
1263 if (self->line_buffering &&
1264 (haslf ||
1265 findchar(PyUnicode_AS_UNICODE(text),
1266 PyUnicode_GET_SIZE(text), '\r')))
1267 needflush = 1;
1268
1269 /* XXX What if we were just reading? */
1270 if (self->encodefunc != NULL) {
1271 b = (*self->encodefunc)((PyObject *) self, text);
1272 self->encoding_start_of_stream = 0;
1273 }
1274 else
1275 b = PyObject_CallMethodObjArgs(self->encoder,
1276 _PyIO_str_encode, text, NULL);
1277 Py_DECREF(text);
1278 if (b == NULL)
1279 return NULL;
1280
1281 if (self->pending_bytes == NULL) {
1282 self->pending_bytes = PyList_New(0);
1283 if (self->pending_bytes == NULL) {
1284 Py_DECREF(b);
1285 return NULL;
1286 }
1287 self->pending_bytes_count = 0;
1288 }
1289 if (PyList_Append(self->pending_bytes, b) < 0) {
1290 Py_DECREF(b);
1291 return NULL;
1292 }
1293 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1294 Py_DECREF(b);
1295 if (self->pending_bytes_count > self->chunk_size || needflush) {
1296 if (_textiowrapper_writeflush(self) < 0)
1297 return NULL;
1298 }
1299
1300 if (needflush) {
1301 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1302 if (ret == NULL)
1303 return NULL;
1304 Py_DECREF(ret);
1305 }
1306
1307 Py_CLEAR(self->snapshot);
1308
1309 if (self->decoder) {
1310 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1311 if (ret == NULL)
1312 return NULL;
1313 Py_DECREF(ret);
1314 }
1315
1316 return PyLong_FromSsize_t(textlen);
1317}
1318
1319/* Steal a reference to chars and store it in the decoded_char buffer;
1320 */
1321static void
1322textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1323{
1324 Py_CLEAR(self->decoded_chars);
1325 self->decoded_chars = chars;
1326 self->decoded_chars_used = 0;
1327}
1328
1329static PyObject *
1330textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1331{
1332 PyObject *chars;
1333 Py_ssize_t avail;
1334
1335 if (self->decoded_chars == NULL)
1336 return PyUnicode_FromStringAndSize(NULL, 0);
1337
1338 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1339 - self->decoded_chars_used);
1340
1341 assert(avail >= 0);
1342
1343 if (n < 0 || n > avail)
1344 n = avail;
1345
1346 if (self->decoded_chars_used > 0 || n < avail) {
1347 chars = PyUnicode_FromUnicode(
1348 PyUnicode_AS_UNICODE(self->decoded_chars)
1349 + self->decoded_chars_used, n);
1350 if (chars == NULL)
1351 return NULL;
1352 }
1353 else {
1354 chars = self->decoded_chars;
1355 Py_INCREF(chars);
1356 }
1357
1358 self->decoded_chars_used += n;
1359 return chars;
1360}
1361
1362/* Read and decode the next chunk of data from the BufferedReader.
1363 */
1364static int
1365textiowrapper_read_chunk(textio *self)
1366{
1367 PyObject *dec_buffer = NULL;
1368 PyObject *dec_flags = NULL;
1369 PyObject *input_chunk = NULL;
1370 PyObject *decoded_chars, *chunk_size;
1371 int eof;
1372
1373 /* The return value is True unless EOF was reached. The decoded string is
1374 * placed in self._decoded_chars (replacing its previous value). The
1375 * entire input chunk is sent to the decoder, though some of it may remain
1376 * buffered in the decoder, yet to be converted.
1377 */
1378
1379 if (self->decoder == NULL) {
1380 PyErr_SetString(PyExc_IOError, "not readable");
1381 return -1;
1382 }
1383
1384 if (self->telling) {
1385 /* To prepare for tell(), we need to snapshot a point in the file
1386 * where the decoder's input buffer is empty.
1387 */
1388
1389 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1390 _PyIO_str_getstate, NULL);
1391 if (state == NULL)
1392 return -1;
1393 /* Given this, we know there was a valid snapshot point
1394 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1395 */
1396 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1397 Py_DECREF(state);
1398 return -1;
1399 }
1400 Py_INCREF(dec_buffer);
1401 Py_INCREF(dec_flags);
1402 Py_DECREF(state);
1403 }
1404
1405 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1406 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1407 if (chunk_size == NULL)
1408 goto fail;
1409 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1410 _PyIO_str_read1, chunk_size, NULL);
1411 Py_DECREF(chunk_size);
1412 if (input_chunk == NULL)
1413 goto fail;
1414 assert(PyBytes_Check(input_chunk));
1415
1416 eof = (PyBytes_Size(input_chunk) == 0);
1417
1418 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1419 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1420 self->decoder, input_chunk, eof);
1421 }
1422 else {
1423 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1424 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1425 }
1426
1427 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1428 if (decoded_chars == NULL)
1429 goto fail;
1430 textiowrapper_set_decoded_chars(self, decoded_chars);
1431 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1432 eof = 0;
1433
1434 if (self->telling) {
1435 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1436 * next input to be decoded is dec_buffer + input_chunk.
1437 */
1438 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1439 if (next_input == NULL)
1440 goto fail;
1441 assert (PyBytes_Check(next_input));
1442 Py_DECREF(dec_buffer);
1443 Py_CLEAR(self->snapshot);
1444 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1445 }
1446 Py_DECREF(input_chunk);
1447
1448 return (eof == 0);
1449
1450 fail:
1451 Py_XDECREF(dec_buffer);
1452 Py_XDECREF(dec_flags);
1453 Py_XDECREF(input_chunk);
1454 return -1;
1455}
1456
1457static PyObject *
1458textiowrapper_read(textio *self, PyObject *args)
1459{
1460 Py_ssize_t n = -1;
1461 PyObject *result = NULL, *chunks = NULL;
1462
1463 CHECK_INITIALIZED(self);
1464
Benjamin Petersonddd392c2009-12-13 19:19:07 +00001465 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Antoine Pitrou19690592009-06-12 20:14:08 +00001466 return NULL;
1467
1468 CHECK_CLOSED(self);
1469
1470 if (self->decoder == NULL) {
1471 PyErr_SetString(PyExc_IOError, "not readable");
1472 return NULL;
1473 }
1474
1475 if (_textiowrapper_writeflush(self) < 0)
1476 return NULL;
1477
1478 if (n < 0) {
1479 /* Read everything */
1480 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1481 PyObject *decoded, *final;
1482 if (bytes == NULL)
1483 goto fail;
1484 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1485 bytes, Py_True, NULL);
1486 Py_DECREF(bytes);
1487 if (decoded == NULL)
1488 goto fail;
1489
1490 result = textiowrapper_get_decoded_chars(self, -1);
1491
1492 if (result == NULL) {
1493 Py_DECREF(decoded);
1494 return NULL;
1495 }
1496
1497 final = PyUnicode_Concat(result, decoded);
1498 Py_DECREF(result);
1499 Py_DECREF(decoded);
1500 if (final == NULL)
1501 goto fail;
1502
1503 Py_CLEAR(self->snapshot);
1504 return final;
1505 }
1506 else {
1507 int res = 1;
1508 Py_ssize_t remaining = n;
1509
1510 result = textiowrapper_get_decoded_chars(self, n);
1511 if (result == NULL)
1512 goto fail;
1513 remaining -= PyUnicode_GET_SIZE(result);
1514
1515 /* Keep reading chunks until we have n characters to return */
1516 while (remaining > 0) {
1517 res = textiowrapper_read_chunk(self);
1518 if (res < 0)
1519 goto fail;
1520 if (res == 0) /* EOF */
1521 break;
1522 if (chunks == NULL) {
1523 chunks = PyList_New(0);
1524 if (chunks == NULL)
1525 goto fail;
1526 }
1527 if (PyList_Append(chunks, result) < 0)
1528 goto fail;
1529 Py_DECREF(result);
1530 result = textiowrapper_get_decoded_chars(self, remaining);
1531 if (result == NULL)
1532 goto fail;
1533 remaining -= PyUnicode_GET_SIZE(result);
1534 }
1535 if (chunks != NULL) {
1536 if (result != NULL && PyList_Append(chunks, result) < 0)
1537 goto fail;
1538 Py_CLEAR(result);
1539 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1540 if (result == NULL)
1541 goto fail;
1542 Py_CLEAR(chunks);
1543 }
1544 return result;
1545 }
1546 fail:
1547 Py_XDECREF(result);
1548 Py_XDECREF(chunks);
1549 return NULL;
1550}
1551
1552
1553/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1554 that is to the NUL character. Otherwise the function will produce
1555 incorrect results. */
1556static Py_UNICODE *
1557find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1558{
1559 Py_UNICODE *s = start;
1560 for (;;) {
1561 while (*s > ch)
1562 s++;
1563 if (*s == ch)
1564 return s;
1565 if (s == end)
1566 return NULL;
1567 s++;
1568 }
1569}
1570
1571Py_ssize_t
1572_PyIO_find_line_ending(
1573 int translated, int universal, PyObject *readnl,
1574 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1575{
1576 Py_ssize_t len = end - start;
1577
1578 if (translated) {
1579 /* Newlines are already translated, only search for \n */
1580 Py_UNICODE *pos = find_control_char(start, end, '\n');
1581 if (pos != NULL)
1582 return pos - start + 1;
1583 else {
1584 *consumed = len;
1585 return -1;
1586 }
1587 }
1588 else if (universal) {
1589 /* Universal newline search. Find any of \r, \r\n, \n
1590 * The decoder ensures that \r\n are not split in two pieces
1591 */
1592 Py_UNICODE *s = start;
1593 for (;;) {
1594 Py_UNICODE ch;
1595 /* Fast path for non-control chars. The loop always ends
1596 since the Py_UNICODE storage is NUL-terminated. */
1597 while (*s > '\r')
1598 s++;
1599 if (s >= end) {
1600 *consumed = len;
1601 return -1;
1602 }
1603 ch = *s++;
1604 if (ch == '\n')
1605 return s - start;
1606 if (ch == '\r') {
1607 if (*s == '\n')
1608 return s - start + 1;
1609 else
1610 return s - start;
1611 }
1612 }
1613 }
1614 else {
1615 /* Non-universal mode. */
1616 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1617 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1618 if (readnl_len == 1) {
1619 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1620 if (pos != NULL)
1621 return pos - start + 1;
1622 *consumed = len;
1623 return -1;
1624 }
1625 else {
1626 Py_UNICODE *s = start;
1627 Py_UNICODE *e = end - readnl_len + 1;
1628 Py_UNICODE *pos;
1629 if (e < s)
1630 e = s;
1631 while (s < e) {
1632 Py_ssize_t i;
1633 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1634 if (pos == NULL || pos >= e)
1635 break;
1636 for (i = 1; i < readnl_len; i++) {
1637 if (pos[i] != nl[i])
1638 break;
1639 }
1640 if (i == readnl_len)
1641 return pos - start + readnl_len;
1642 s = pos + 1;
1643 }
1644 pos = find_control_char(e, end, nl[0]);
1645 if (pos == NULL)
1646 *consumed = len;
1647 else
1648 *consumed = pos - start;
1649 return -1;
1650 }
1651 }
1652}
1653
1654static PyObject *
1655_textiowrapper_readline(textio *self, Py_ssize_t limit)
1656{
1657 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1658 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1659 int res;
1660
1661 CHECK_CLOSED(self);
1662
1663 if (_textiowrapper_writeflush(self) < 0)
1664 return NULL;
1665
1666 chunked = 0;
1667
1668 while (1) {
1669 Py_UNICODE *ptr;
1670 Py_ssize_t line_len;
1671 Py_ssize_t consumed = 0;
1672
1673 /* First, get some data if necessary */
1674 res = 1;
1675 while (!self->decoded_chars ||
1676 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1677 res = textiowrapper_read_chunk(self);
1678 if (res < 0)
1679 goto error;
1680 if (res == 0)
1681 break;
1682 }
1683 if (res == 0) {
1684 /* end of file */
1685 textiowrapper_set_decoded_chars(self, NULL);
1686 Py_CLEAR(self->snapshot);
1687 start = endpos = offset_to_buffer = 0;
1688 break;
1689 }
1690
1691 if (remaining == NULL) {
1692 line = self->decoded_chars;
1693 start = self->decoded_chars_used;
1694 offset_to_buffer = 0;
1695 Py_INCREF(line);
1696 }
1697 else {
1698 assert(self->decoded_chars_used == 0);
1699 line = PyUnicode_Concat(remaining, self->decoded_chars);
1700 start = 0;
1701 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1702 Py_CLEAR(remaining);
1703 if (line == NULL)
1704 goto error;
1705 }
1706
1707 ptr = PyUnicode_AS_UNICODE(line);
1708 line_len = PyUnicode_GET_SIZE(line);
1709
1710 endpos = _PyIO_find_line_ending(
1711 self->readtranslate, self->readuniversal, self->readnl,
1712 ptr + start, ptr + line_len, &consumed);
1713 if (endpos >= 0) {
1714 endpos += start;
1715 if (limit >= 0 && (endpos - start) + chunked >= limit)
1716 endpos = start + limit - chunked;
1717 break;
1718 }
1719
1720 /* We can put aside up to `endpos` */
1721 endpos = consumed + start;
1722 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1723 /* Didn't find line ending, but reached length limit */
1724 endpos = start + limit - chunked;
1725 break;
1726 }
1727
1728 if (endpos > start) {
1729 /* No line ending seen yet - put aside current data */
1730 PyObject *s;
1731 if (chunks == NULL) {
1732 chunks = PyList_New(0);
1733 if (chunks == NULL)
1734 goto error;
1735 }
1736 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1737 if (s == NULL)
1738 goto error;
1739 if (PyList_Append(chunks, s) < 0) {
1740 Py_DECREF(s);
1741 goto error;
1742 }
1743 chunked += PyUnicode_GET_SIZE(s);
1744 Py_DECREF(s);
1745 }
1746 /* There may be some remaining bytes we'll have to prepend to the
1747 next chunk of data */
1748 if (endpos < line_len) {
1749 remaining = PyUnicode_FromUnicode(
1750 ptr + endpos, line_len - endpos);
1751 if (remaining == NULL)
1752 goto error;
1753 }
1754 Py_CLEAR(line);
1755 /* We have consumed the buffer */
1756 textiowrapper_set_decoded_chars(self, NULL);
1757 }
1758
1759 if (line != NULL) {
1760 /* Our line ends in the current buffer */
1761 self->decoded_chars_used = endpos - offset_to_buffer;
1762 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1763 if (start == 0 && Py_REFCNT(line) == 1) {
1764 if (PyUnicode_Resize(&line, endpos) < 0)
1765 goto error;
1766 }
1767 else {
1768 PyObject *s = PyUnicode_FromUnicode(
1769 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1770 Py_CLEAR(line);
1771 if (s == NULL)
1772 goto error;
1773 line = s;
1774 }
1775 }
1776 }
1777 if (remaining != NULL) {
1778 if (chunks == NULL) {
1779 chunks = PyList_New(0);
1780 if (chunks == NULL)
1781 goto error;
1782 }
1783 if (PyList_Append(chunks, remaining) < 0)
1784 goto error;
1785 Py_CLEAR(remaining);
1786 }
1787 if (chunks != NULL) {
1788 if (line != NULL && PyList_Append(chunks, line) < 0)
1789 goto error;
1790 Py_CLEAR(line);
1791 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1792 if (line == NULL)
1793 goto error;
1794 Py_DECREF(chunks);
1795 }
1796 if (line == NULL)
1797 line = PyUnicode_FromStringAndSize(NULL, 0);
1798
1799 return line;
1800
1801 error:
1802 Py_XDECREF(chunks);
1803 Py_XDECREF(remaining);
1804 Py_XDECREF(line);
1805 return NULL;
1806}
1807
1808static PyObject *
1809textiowrapper_readline(textio *self, PyObject *args)
1810{
1811 PyObject *limitobj = NULL;
1812 Py_ssize_t limit = -1;
1813
1814 CHECK_INITIALIZED(self);
1815 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1816 return NULL;
1817 }
1818 if (limitobj) {
1819 if (!PyNumber_Check(limitobj)) {
1820 PyErr_Format(PyExc_TypeError,
1821 "integer argument expected, got '%.200s'",
1822 Py_TYPE(limitobj)->tp_name);
1823 return NULL;
1824 }
1825 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1826 if (limit == -1 && PyErr_Occurred())
1827 return NULL;
1828 }
1829 return _textiowrapper_readline(self, limit);
1830}
1831
1832/* Seek and Tell */
1833
1834typedef struct {
1835 Py_off_t start_pos;
1836 int dec_flags;
1837 int bytes_to_feed;
1838 int chars_to_skip;
1839 char need_eof;
1840} cookie_type;
1841
1842/*
1843 To speed up cookie packing/unpacking, we store the fields in a temporary
1844 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1845 The following macros define at which offsets in the intermediary byte
1846 string the various CookieStruct fields will be stored.
1847 */
1848
1849#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1850
1851#if defined(WORDS_BIGENDIAN)
1852
1853# define IS_LITTLE_ENDIAN 0
1854
1855/* We want the least significant byte of start_pos to also be the least
1856 significant byte of the cookie, which means that in big-endian mode we
1857 must copy the fields in reverse order. */
1858
1859# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1860# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1861# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1862# define OFF_CHARS_TO_SKIP (sizeof(char))
1863# define OFF_NEED_EOF 0
1864
1865#else
1866
1867# define IS_LITTLE_ENDIAN 1
1868
1869/* Little-endian mode: the least significant byte of start_pos will
1870 naturally end up the least significant byte of the cookie. */
1871
1872# define OFF_START_POS 0
1873# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1874# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1875# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1876# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1877
1878#endif
1879
1880static int
1881textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1882{
1883 unsigned char buffer[COOKIE_BUF_LEN];
1884 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1885 if (cookieLong == NULL)
1886 return -1;
1887
1888 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1889 IS_LITTLE_ENDIAN, 0) < 0) {
1890 Py_DECREF(cookieLong);
1891 return -1;
1892 }
1893 Py_DECREF(cookieLong);
1894
1895 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1896 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1897 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1898 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1899 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1900
1901 return 0;
1902}
1903
1904static PyObject *
1905textiowrapper_build_cookie(cookie_type *cookie)
1906{
1907 unsigned char buffer[COOKIE_BUF_LEN];
1908
1909 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1910 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1911 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1912 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1913 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1914
1915 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1916}
1917#undef IS_LITTLE_ENDIAN
1918
1919static int
1920_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1921{
1922 PyObject *res;
1923 /* When seeking to the start of the stream, we call decoder.reset()
1924 rather than decoder.getstate().
1925 This is for a few decoders such as utf-16 for which the state value
1926 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1927 utf-16, that we are expecting a BOM).
1928 */
1929 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1930 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1931 else
1932 res = PyObject_CallMethod(self->decoder, "setstate",
1933 "((si))", "", cookie->dec_flags);
1934 if (res == NULL)
1935 return -1;
1936 Py_DECREF(res);
1937 return 0;
1938}
1939
1940static int
1941_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1942{
1943 PyObject *res;
1944 /* Same as _textiowrapper_decoder_setstate() above. */
1945 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1946 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1947 self->encoding_start_of_stream = 1;
1948 }
1949 else {
1950 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1951 _PyIO_zero, NULL);
1952 self->encoding_start_of_stream = 0;
1953 }
1954 if (res == NULL)
1955 return -1;
1956 Py_DECREF(res);
1957 return 0;
1958}
1959
1960static PyObject *
1961textiowrapper_seek(textio *self, PyObject *args)
1962{
1963 PyObject *cookieObj, *posobj;
1964 cookie_type cookie;
1965 int whence = 0;
1966 PyObject *res;
1967 int cmp;
1968
1969 CHECK_INITIALIZED(self);
1970
1971 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1972 return NULL;
1973 CHECK_CLOSED(self);
1974
1975 Py_INCREF(cookieObj);
1976
1977 if (!self->seekable) {
1978 PyErr_SetString(PyExc_IOError,
1979 "underlying stream is not seekable");
1980 goto fail;
1981 }
1982
1983 if (whence == 1) {
1984 /* seek relative to current position */
1985 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1986 if (cmp < 0)
1987 goto fail;
1988
1989 if (cmp == 0) {
1990 PyErr_SetString(PyExc_IOError,
1991 "can't do nonzero cur-relative seeks");
1992 goto fail;
1993 }
1994
1995 /* Seeking to the current position should attempt to
1996 * sync the underlying buffer with the current position.
1997 */
1998 Py_DECREF(cookieObj);
1999 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2000 if (cookieObj == NULL)
2001 goto fail;
2002 }
2003 else if (whence == 2) {
2004 /* seek relative to end of file */
2005
2006 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2007 if (cmp < 0)
2008 goto fail;
2009
2010 if (cmp == 0) {
2011 PyErr_SetString(PyExc_IOError,
2012 "can't do nonzero end-relative seeks");
2013 goto fail;
2014 }
2015
2016 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2017 if (res == NULL)
2018 goto fail;
2019 Py_DECREF(res);
2020
2021 textiowrapper_set_decoded_chars(self, NULL);
2022 Py_CLEAR(self->snapshot);
2023 if (self->decoder) {
2024 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2025 if (res == NULL)
2026 goto fail;
2027 Py_DECREF(res);
2028 }
2029
2030 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2031 Py_XDECREF(cookieObj);
2032 return res;
2033 }
2034 else if (whence != 0) {
2035 PyErr_Format(PyExc_ValueError,
2036 "invalid whence (%d, should be 0, 1 or 2)", whence);
2037 goto fail;
2038 }
2039
2040 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2041 if (cmp < 0)
2042 goto fail;
2043
2044 if (cmp == 1) {
2045 PyObject *repr = PyObject_Repr(cookieObj);
2046 if (repr != NULL) {
2047 PyErr_Format(PyExc_ValueError,
2048 "negative seek position %s",
2049 PyString_AS_STRING(repr));
2050 Py_DECREF(repr);
2051 }
2052 goto fail;
2053 }
2054
2055 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2056 if (res == NULL)
2057 goto fail;
2058 Py_DECREF(res);
2059
2060 /* The strategy of seek() is to go back to the safe start point
2061 * and replay the effect of read(chars_to_skip) from there.
2062 */
2063 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2064 goto fail;
2065
2066 /* Seek back to the safe start point. */
2067 posobj = PyLong_FromOff_t(cookie.start_pos);
2068 if (posobj == NULL)
2069 goto fail;
2070 res = PyObject_CallMethodObjArgs(self->buffer,
2071 _PyIO_str_seek, posobj, NULL);
2072 Py_DECREF(posobj);
2073 if (res == NULL)
2074 goto fail;
2075 Py_DECREF(res);
2076
2077 textiowrapper_set_decoded_chars(self, NULL);
2078 Py_CLEAR(self->snapshot);
2079
2080 /* Restore the decoder to its state from the safe start point. */
2081 if (self->decoder) {
2082 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2083 goto fail;
2084 }
2085
2086 if (cookie.chars_to_skip) {
2087 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2088 PyObject *input_chunk = PyObject_CallMethod(
2089 self->buffer, "read", "i", cookie.bytes_to_feed);
2090 PyObject *decoded;
2091
2092 if (input_chunk == NULL)
2093 goto fail;
2094
2095 assert (PyBytes_Check(input_chunk));
2096
2097 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2098 if (self->snapshot == NULL) {
2099 Py_DECREF(input_chunk);
2100 goto fail;
2101 }
2102
2103 decoded = PyObject_CallMethod(self->decoder, "decode",
2104 "Oi", input_chunk, (int)cookie.need_eof);
2105
2106 if (decoded == NULL)
2107 goto fail;
2108
2109 textiowrapper_set_decoded_chars(self, decoded);
2110
2111 /* Skip chars_to_skip of the decoded characters. */
2112 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2113 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2114 goto fail;
2115 }
2116 self->decoded_chars_used = cookie.chars_to_skip;
2117 }
2118 else {
2119 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2120 if (self->snapshot == NULL)
2121 goto fail;
2122 }
2123
2124 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2125 if (self->encoder) {
2126 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2127 goto fail;
2128 }
2129 return cookieObj;
2130 fail:
2131 Py_XDECREF(cookieObj);
2132 return NULL;
2133
2134}
2135
2136static PyObject *
2137textiowrapper_tell(textio *self, PyObject *args)
2138{
2139 PyObject *res;
2140 PyObject *posobj = NULL;
2141 cookie_type cookie = {0,0,0,0,0};
2142 PyObject *next_input;
2143 Py_ssize_t chars_to_skip, chars_decoded;
2144 PyObject *saved_state = NULL;
2145 char *input, *input_end;
2146
2147 CHECK_INITIALIZED(self);
2148 CHECK_CLOSED(self);
2149
2150 if (!self->seekable) {
2151 PyErr_SetString(PyExc_IOError,
2152 "underlying stream is not seekable");
2153 goto fail;
2154 }
2155 if (!self->telling) {
2156 PyErr_SetString(PyExc_IOError,
2157 "telling position disabled by next() call");
2158 goto fail;
2159 }
2160
2161 if (_textiowrapper_writeflush(self) < 0)
2162 return NULL;
2163 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2164 if (res == NULL)
2165 goto fail;
2166 Py_DECREF(res);
2167
2168 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2169 if (posobj == NULL)
2170 goto fail;
2171
2172 if (self->decoder == NULL || self->snapshot == NULL) {
2173 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2174 return posobj;
2175 }
2176
2177#if defined(HAVE_LARGEFILE_SUPPORT)
2178 cookie.start_pos = PyLong_AsLongLong(posobj);
2179#else
2180 cookie.start_pos = PyLong_AsLong(posobj);
2181#endif
2182 if (PyErr_Occurred())
2183 goto fail;
2184
2185 /* Skip backward to the snapshot point (see _read_chunk). */
2186 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2187 goto fail;
2188
2189 assert (PyBytes_Check(next_input));
2190
2191 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2192
2193 /* How many decoded characters have been used up since the snapshot? */
2194 if (self->decoded_chars_used == 0) {
2195 /* We haven't moved from the snapshot point. */
2196 Py_DECREF(posobj);
2197 return textiowrapper_build_cookie(&cookie);
2198 }
2199
2200 chars_to_skip = self->decoded_chars_used;
2201
2202 /* Starting from the snapshot position, we will walk the decoder
2203 * forward until it gives us enough decoded characters.
2204 */
2205 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2206 _PyIO_str_getstate, NULL);
2207 if (saved_state == NULL)
2208 goto fail;
2209
2210 /* Note our initial start point. */
2211 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2212 goto fail;
2213
2214 /* Feed the decoder one byte at a time. As we go, note the
2215 * nearest "safe start point" before the current location
2216 * (a point where the decoder has nothing buffered, so seek()
2217 * can safely start from there and advance to this location).
2218 */
2219 chars_decoded = 0;
2220 input = PyBytes_AS_STRING(next_input);
2221 input_end = input + PyBytes_GET_SIZE(next_input);
2222 while (input < input_end) {
2223 PyObject *state;
2224 char *dec_buffer;
2225 Py_ssize_t dec_buffer_len;
2226 int dec_flags;
2227
2228 PyObject *decoded = PyObject_CallMethod(
2229 self->decoder, "decode", "s#", input, 1);
2230 if (decoded == NULL)
2231 goto fail;
2232 assert (PyUnicode_Check(decoded));
2233 chars_decoded += PyUnicode_GET_SIZE(decoded);
2234 Py_DECREF(decoded);
2235
2236 cookie.bytes_to_feed += 1;
2237
2238 state = PyObject_CallMethodObjArgs(self->decoder,
2239 _PyIO_str_getstate, NULL);
2240 if (state == NULL)
2241 goto fail;
2242 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2243 Py_DECREF(state);
2244 goto fail;
2245 }
2246 Py_DECREF(state);
2247
2248 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2249 /* Decoder buffer is empty, so this is a safe start point. */
2250 cookie.start_pos += cookie.bytes_to_feed;
2251 chars_to_skip -= chars_decoded;
2252 cookie.dec_flags = dec_flags;
2253 cookie.bytes_to_feed = 0;
2254 chars_decoded = 0;
2255 }
2256 if (chars_decoded >= chars_to_skip)
2257 break;
2258 input++;
2259 }
2260 if (input == input_end) {
2261 /* We didn't get enough decoded data; signal EOF to get more. */
2262 PyObject *decoded = PyObject_CallMethod(
2263 self->decoder, "decode", "si", "", /* final = */ 1);
2264 if (decoded == NULL)
2265 goto fail;
2266 assert (PyUnicode_Check(decoded));
2267 chars_decoded += PyUnicode_GET_SIZE(decoded);
2268 Py_DECREF(decoded);
2269 cookie.need_eof = 1;
2270
2271 if (chars_decoded < chars_to_skip) {
2272 PyErr_SetString(PyExc_IOError,
2273 "can't reconstruct logical file position");
2274 goto fail;
2275 }
2276 }
2277
2278 /* finally */
2279 Py_XDECREF(posobj);
2280 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2281 Py_DECREF(saved_state);
2282 if (res == NULL)
2283 return NULL;
2284 Py_DECREF(res);
2285
2286 /* The returned cookie corresponds to the last safe start point. */
2287 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2288 return textiowrapper_build_cookie(&cookie);
2289
2290 fail:
2291 Py_XDECREF(posobj);
2292 if (saved_state) {
2293 PyObject *type, *value, *traceback;
2294 PyErr_Fetch(&type, &value, &traceback);
2295
2296 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2297 Py_DECREF(saved_state);
2298 if (res == NULL)
2299 return NULL;
2300 Py_DECREF(res);
2301
2302 PyErr_Restore(type, value, traceback);
2303 }
2304 return NULL;
2305}
2306
2307static PyObject *
2308textiowrapper_truncate(textio *self, PyObject *args)
2309{
2310 PyObject *pos = Py_None;
2311 PyObject *res;
2312
2313 CHECK_INITIALIZED(self)
2314 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2315 return NULL;
2316 }
2317
2318 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2319 if (res == NULL)
2320 return NULL;
2321 Py_DECREF(res);
2322
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00002323 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Antoine Pitrou19690592009-06-12 20:14:08 +00002324}
2325
2326static PyObject *
2327textiowrapper_repr(textio *self)
2328{
2329 PyObject *nameobj, *res;
2330 PyObject *namerepr = NULL, *encrepr = NULL;
2331
2332 CHECK_INITIALIZED(self);
2333
2334 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2335 if (nameobj == NULL) {
2336 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2337 PyErr_Clear();
2338 else
2339 goto error;
2340 encrepr = PyObject_Repr(self->encoding);
2341 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2342 PyString_AS_STRING(encrepr));
2343 }
2344 else {
2345 encrepr = PyObject_Repr(self->encoding);
2346 namerepr = PyObject_Repr(nameobj);
2347 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2348 PyString_AS_STRING(namerepr),
2349 PyString_AS_STRING(encrepr));
2350 Py_DECREF(nameobj);
2351 }
2352 Py_XDECREF(namerepr);
2353 Py_XDECREF(encrepr);
2354 return res;
2355
2356error:
2357 Py_XDECREF(namerepr);
2358 Py_XDECREF(encrepr);
2359 return NULL;
2360}
2361
2362
2363/* Inquiries */
2364
2365static PyObject *
2366textiowrapper_fileno(textio *self, PyObject *args)
2367{
2368 CHECK_INITIALIZED(self);
2369 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2370}
2371
2372static PyObject *
2373textiowrapper_seekable(textio *self, PyObject *args)
2374{
2375 CHECK_INITIALIZED(self);
2376 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2377}
2378
2379static PyObject *
2380textiowrapper_readable(textio *self, PyObject *args)
2381{
2382 CHECK_INITIALIZED(self);
2383 return PyObject_CallMethod(self->buffer, "readable", NULL);
2384}
2385
2386static PyObject *
2387textiowrapper_writable(textio *self, PyObject *args)
2388{
2389 CHECK_INITIALIZED(self);
2390 return PyObject_CallMethod(self->buffer, "writable", NULL);
2391}
2392
2393static PyObject *
2394textiowrapper_isatty(textio *self, PyObject *args)
2395{
2396 CHECK_INITIALIZED(self);
2397 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2398}
2399
2400static PyObject *
2401textiowrapper_flush(textio *self, PyObject *args)
2402{
2403 CHECK_INITIALIZED(self);
2404 CHECK_CLOSED(self);
2405 self->telling = self->seekable;
2406 if (_textiowrapper_writeflush(self) < 0)
2407 return NULL;
2408 return PyObject_CallMethod(self->buffer, "flush", NULL);
2409}
2410
2411static PyObject *
2412textiowrapper_close(textio *self, PyObject *args)
2413{
2414 PyObject *res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002415 int r;
Antoine Pitrou19690592009-06-12 20:14:08 +00002416 CHECK_INITIALIZED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002417
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002418 res = textiowrapper_closed_get(self, NULL);
2419 if (res == NULL)
2420 return NULL;
2421 r = PyObject_IsTrue(res);
2422 Py_DECREF(res);
2423 if (r < 0)
2424 return NULL;
2425
2426 if (r > 0) {
2427 Py_RETURN_NONE; /* stream already closed */
2428 }
2429 else {
2430 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2431 if (res == NULL) {
2432 return NULL;
2433 }
2434 else
2435 Py_DECREF(res);
2436
2437 return PyObject_CallMethod(self->buffer, "close", NULL);
2438 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002439}
2440
2441static PyObject *
2442textiowrapper_iternext(textio *self)
2443{
2444 PyObject *line;
2445
2446 CHECK_INITIALIZED(self);
2447
2448 self->telling = 0;
2449 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2450 /* Skip method call overhead for speed */
2451 line = _textiowrapper_readline(self, -1);
2452 }
2453 else {
2454 line = PyObject_CallMethodObjArgs((PyObject *)self,
2455 _PyIO_str_readline, NULL);
2456 if (line && !PyUnicode_Check(line)) {
2457 PyErr_Format(PyExc_IOError,
2458 "readline() should have returned an str object, "
2459 "not '%.200s'", Py_TYPE(line)->tp_name);
2460 Py_DECREF(line);
2461 return NULL;
2462 }
2463 }
2464
2465 if (line == NULL)
2466 return NULL;
2467
2468 if (PyUnicode_GET_SIZE(line) == 0) {
2469 /* Reached EOF or would have blocked */
2470 Py_DECREF(line);
2471 Py_CLEAR(self->snapshot);
2472 self->telling = self->seekable;
2473 return NULL;
2474 }
2475
2476 return line;
2477}
2478
2479static PyObject *
2480textiowrapper_name_get(textio *self, void *context)
2481{
2482 CHECK_INITIALIZED(self);
2483 return PyObject_GetAttrString(self->buffer, "name");
2484}
2485
2486static PyObject *
2487textiowrapper_closed_get(textio *self, void *context)
2488{
2489 CHECK_INITIALIZED(self);
2490 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2491}
2492
2493static PyObject *
2494textiowrapper_newlines_get(textio *self, void *context)
2495{
2496 PyObject *res;
2497 CHECK_INITIALIZED(self);
2498 if (self->decoder == NULL)
2499 Py_RETURN_NONE;
2500 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2501 if (res == NULL) {
2502 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2503 PyErr_Clear();
2504 Py_RETURN_NONE;
2505 }
2506 else {
2507 return NULL;
2508 }
2509 }
2510 return res;
2511}
2512
2513static PyObject *
2514textiowrapper_errors_get(textio *self, void *context)
2515{
2516 CHECK_INITIALIZED(self);
2517 Py_INCREF(self->errors);
2518 return self->errors;
2519}
2520
2521static PyObject *
2522textiowrapper_chunk_size_get(textio *self, void *context)
2523{
2524 CHECK_INITIALIZED(self);
2525 return PyLong_FromSsize_t(self->chunk_size);
2526}
2527
2528static int
2529textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2530{
2531 Py_ssize_t n;
2532 CHECK_INITIALIZED_INT(self);
2533 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2534 if (n == -1 && PyErr_Occurred())
2535 return -1;
2536 if (n <= 0) {
2537 PyErr_SetString(PyExc_ValueError,
2538 "a strictly positive integer is required");
2539 return -1;
2540 }
2541 self->chunk_size = n;
2542 return 0;
2543}
2544
2545static PyMethodDef textiowrapper_methods[] = {
2546 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2547 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2548 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2549 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2550 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2551 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2552
2553 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2554 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2555 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2556 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2557 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2558
2559 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2560 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2561 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2562 {NULL, NULL}
2563};
2564
2565static PyMemberDef textiowrapper_members[] = {
2566 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2567 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2568 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2569 {NULL}
2570};
2571
2572static PyGetSetDef textiowrapper_getset[] = {
2573 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2574 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2575/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2576*/
2577 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2578 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2579 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2580 (setter)textiowrapper_chunk_size_set, NULL},
2581 {NULL}
2582};
2583
2584PyTypeObject PyTextIOWrapper_Type = {
2585 PyVarObject_HEAD_INIT(NULL, 0)
2586 "_io.TextIOWrapper", /*tp_name*/
2587 sizeof(textio), /*tp_basicsize*/
2588 0, /*tp_itemsize*/
2589 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2590 0, /*tp_print*/
2591 0, /*tp_getattr*/
2592 0, /*tps_etattr*/
2593 0, /*tp_compare */
2594 (reprfunc)textiowrapper_repr,/*tp_repr*/
2595 0, /*tp_as_number*/
2596 0, /*tp_as_sequence*/
2597 0, /*tp_as_mapping*/
2598 0, /*tp_hash */
2599 0, /*tp_call*/
2600 0, /*tp_str*/
2601 0, /*tp_getattro*/
2602 0, /*tp_setattro*/
2603 0, /*tp_as_buffer*/
2604 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2605 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2606 textiowrapper_doc, /* tp_doc */
2607 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2608 (inquiry)textiowrapper_clear, /* tp_clear */
2609 0, /* tp_richcompare */
2610 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2611 0, /* tp_iter */
2612 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2613 textiowrapper_methods, /* tp_methods */
2614 textiowrapper_members, /* tp_members */
2615 textiowrapper_getset, /* tp_getset */
2616 0, /* tp_base */
2617 0, /* tp_dict */
2618 0, /* tp_descr_get */
2619 0, /* tp_descr_set */
2620 offsetof(textio, dict), /*tp_dictoffset*/
2621 (initproc)textiowrapper_init, /* tp_init */
2622 0, /* tp_alloc */
2623 PyType_GenericNew, /* tp_new */
2624};