blob: 1746604fcacb4cfca602185feb178648a3e0ac39 [file] [log] [blame]
Antoine Pitrou19690592009-06-12 20:14:08 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(textiobase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(_PyIO_unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(textiobase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39textiobase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
44PyDoc_STRVAR(textiobase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52textiobase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(textiobase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64textiobase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(textiobase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76textiobase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(textiobase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88textiobase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(textiobase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102textiobase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107PyDoc_STRVAR(textiobase_errors_doc,
108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
114textiobase_errors_get(PyObject *self, void *context)
115{
116 Py_RETURN_NONE;
117}
118
119
120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
125 {NULL, NULL}
126};
127
128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
132 {NULL}
133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
156 textiobase_doc, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
163 textiobase_methods, /* tp_methods */
164 0, /* tp_members */
165 textiobase_getset, /* tp_getset */
166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou2a466582009-09-21 21:17:48 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Antoine Pitrou19690592009-06-12 20:14:08 +0000196} nldecoder_object;
197
198static int
199incrementalnewlinedecoder_init(nldecoder_object *self,
200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
250 nldecoder_object *self = (nldecoder_object *) _self;
251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
297 if (output_len > 0
298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
336 }
337
338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
365 Py_UNICODE *s, *end;
366 /* We have already seen all newline types, no need to scan again */
367 if (seennl == SEEN_ALL)
368 goto endscan;
369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
395 else {
396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
463incrementalnewlinedecoder_decode(nldecoder_object *self,
464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
564 {NULL}
565};
566
567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
569 {NULL}
570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
575 sizeof(nldecoder_object), /*tp_basicsize*/
576 0, /*tp_itemsize*/
577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
593 incrementalnewlinedecoder_doc, /* tp_doc */
594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
600 incrementalnewlinedecoder_methods, /* tp_methods */
601 0, /* tp_members */
602 incrementalnewlinedecoder_getset, /* tp_getset */
603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
616PyDoc_STRVAR(textiowrapper_doc,
617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
Antoine Pitrou76370f42012-08-04 00:55:38 +0200625 "newline controls how line endings are handled. It can be None, '',\n"
626 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
627 "\n"
628 "* On input, if newline is None, universal newlines mode is\n"
629 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
630 " these are translated into '\\n' before being returned to the\n"
631 " caller. If it is '', universal newline mode is enabled, but line\n"
632 " endings are returned to the caller untranslated. If it has any of\n"
633 " the other legal values, input lines are only terminated by the given\n"
634 " string, and the line ending is returned to the caller untranslated.\n"
635 "\n"
636 "* On output, if newline is None, any '\\n' characters written are\n"
637 " translated to the system default line separator, os.linesep. If\n"
638 " newline is '', no translation takes place. If newline is any of the\n"
639 " other legal values, any '\\n' characters written are translated to\n"
640 " the given string.\n"
Antoine Pitrou19690592009-06-12 20:14:08 +0000641 "\n"
642 "If line_buffering is True, a call to flush is implied when a call to\n"
643 "write contains a newline character."
644 );
645
646typedef PyObject *
647 (*encodefunc_t)(PyObject *, PyObject *);
648
649typedef struct
650{
651 PyObject_HEAD
652 int ok; /* initialized? */
653 int detached;
654 Py_ssize_t chunk_size;
655 PyObject *buffer;
656 PyObject *encoding;
657 PyObject *encoder;
658 PyObject *decoder;
659 PyObject *readnl;
660 PyObject *errors;
661 const char *writenl; /* utf-8 encoded, NULL stands for \n */
662 char line_buffering;
663 char readuniversal;
664 char readtranslate;
665 char writetranslate;
666 char seekable;
667 char telling;
668 /* Specialized encoding func (see below) */
669 encodefunc_t encodefunc;
670 /* Whether or not it's the start of the stream */
671 char encoding_start_of_stream;
672
673 /* Reads and writes are internally buffered in order to speed things up.
674 However, any read will first flush the write buffer if itsn't empty.
675
676 Please also note that text to be written is first encoded before being
677 buffered. This is necessary so that encoding errors are immediately
678 reported to the caller, but it unfortunately means that the
679 IncrementalEncoder (whose encode() method is always written in Python)
680 becomes a bottleneck for small writes.
681 */
682 PyObject *decoded_chars; /* buffer for text returned from decoder */
683 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
684 PyObject *pending_bytes; /* list of bytes objects waiting to be
685 written, or NULL */
686 Py_ssize_t pending_bytes_count;
687 PyObject *snapshot;
688 /* snapshot is either None, or a tuple (dec_flags, next_input) where
689 * dec_flags is the second (integer) item of the decoder state and
690 * next_input is the chunk of input bytes that comes next after the
691 * snapshot point. We use this to reconstruct decoder states in tell().
692 */
693
694 /* Cache raw object if it's a FileIO object */
695 PyObject *raw;
696
697 PyObject *weakreflist;
698 PyObject *dict;
699} textio;
700
701
702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
706ascii_encode(textio *self, PyObject *text)
707{
708 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
709 PyUnicode_GET_SIZE(text),
710 PyBytes_AS_STRING(self->errors));
711}
712
713static PyObject *
714utf16be_encode(textio *self, PyObject *text)
715{
716 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
717 PyUnicode_GET_SIZE(text),
718 PyBytes_AS_STRING(self->errors), 1);
719}
720
721static PyObject *
722utf16le_encode(textio *self, PyObject *text)
723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), -1);
727}
728
729static PyObject *
730utf16_encode(textio *self, PyObject *text)
731{
732 if (!self->encoding_start_of_stream) {
733 /* Skip the BOM and use native byte ordering */
734#if defined(WORDS_BIGENDIAN)
735 return utf16be_encode(self, text);
736#else
737 return utf16le_encode(self, text);
738#endif
739 }
740 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
741 PyUnicode_GET_SIZE(text),
742 PyBytes_AS_STRING(self->errors), 0);
743}
744
745static PyObject *
746utf32be_encode(textio *self, PyObject *text)
747{
748 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 1);
751}
752
753static PyObject *
754utf32le_encode(textio *self, PyObject *text)
755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), -1);
759}
760
761static PyObject *
762utf32_encode(textio *self, PyObject *text)
763{
764 if (!self->encoding_start_of_stream) {
765 /* Skip the BOM and use native byte ordering */
766#if defined(WORDS_BIGENDIAN)
767 return utf32be_encode(self, text);
768#else
769 return utf32le_encode(self, text);
770#endif
771 }
772 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
773 PyUnicode_GET_SIZE(text),
774 PyBytes_AS_STRING(self->errors), 0);
775}
776
777static PyObject *
778utf8_encode(textio *self, PyObject *text)
779{
780 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors));
783}
784
785static PyObject *
786latin1_encode(textio *self, PyObject *text)
787{
788 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
789 PyUnicode_GET_SIZE(text),
790 PyBytes_AS_STRING(self->errors));
791}
792
793/* Map normalized encoding names onto the specialized encoding funcs */
794
795typedef struct {
796 const char *name;
797 encodefunc_t encodefunc;
798} encodefuncentry;
799
800static encodefuncentry encodefuncs[] = {
801 {"ascii", (encodefunc_t) ascii_encode},
802 {"iso8859-1", (encodefunc_t) latin1_encode},
803 {"utf-8", (encodefunc_t) utf8_encode},
804 {"utf-16-be", (encodefunc_t) utf16be_encode},
805 {"utf-16-le", (encodefunc_t) utf16le_encode},
806 {"utf-16", (encodefunc_t) utf16_encode},
807 {"utf-32-be", (encodefunc_t) utf32be_encode},
808 {"utf-32-le", (encodefunc_t) utf32le_encode},
809 {"utf-32", (encodefunc_t) utf32_encode},
810 {NULL, NULL}
811};
812
813
814static int
815textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
816{
817 char *kwlist[] = {"buffer", "encoding", "errors",
818 "newline", "line_buffering",
819 NULL};
820 PyObject *buffer, *raw;
821 char *encoding = NULL;
822 char *errors = NULL;
823 char *newline = NULL;
824 int line_buffering = 0;
825
826 PyObject *res;
827 int r;
828
829 self->ok = 0;
830 self->detached = 0;
831 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
832 kwlist, &buffer, &encoding, &errors,
833 &newline, &line_buffering))
834 return -1;
835
836 if (newline && newline[0] != '\0'
837 && !(newline[0] == '\n' && newline[1] == '\0')
838 && !(newline[0] == '\r' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
840 PyErr_Format(PyExc_ValueError,
841 "illegal newline value: %s", newline);
842 return -1;
843 }
844
845 Py_CLEAR(self->buffer);
846 Py_CLEAR(self->encoding);
847 Py_CLEAR(self->encoder);
848 Py_CLEAR(self->decoder);
849 Py_CLEAR(self->readnl);
850 Py_CLEAR(self->decoded_chars);
851 Py_CLEAR(self->pending_bytes);
852 Py_CLEAR(self->snapshot);
853 Py_CLEAR(self->errors);
854 Py_CLEAR(self->raw);
855 self->decoded_chars_used = 0;
856 self->pending_bytes_count = 0;
857 self->encodefunc = NULL;
858 self->writenl = NULL;
859
860 if (encoding == NULL && self->encoding == NULL) {
861 if (_PyIO_locale_module == NULL) {
862 _PyIO_locale_module = PyImport_ImportModule("locale");
863 if (_PyIO_locale_module == NULL)
864 goto catch_ImportError;
865 else
866 goto use_locale;
867 }
868 else {
869 use_locale:
870 self->encoding = PyObject_CallMethod(
871 _PyIO_locale_module, "getpreferredencoding", NULL);
872 if (self->encoding == NULL) {
873 catch_ImportError:
874 /*
875 Importing locale can raise a ImportError because of
876 _functools, and locale.getpreferredencoding can raise a
877 ImportError if _locale is not available. These will happen
878 during module building.
879 */
880 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
881 PyErr_Clear();
882 self->encoding = PyString_FromString("ascii");
883 }
884 else
885 goto error;
886 }
887 else if (!PyString_Check(self->encoding))
888 Py_CLEAR(self->encoding);
889 }
890 }
891 if (self->encoding != NULL)
892 encoding = PyString_AsString(self->encoding);
893 else if (encoding != NULL) {
894 self->encoding = PyString_FromString(encoding);
895 if (self->encoding == NULL)
896 goto error;
897 }
898 else {
899 PyErr_SetString(PyExc_IOError,
900 "could not determine default encoding");
901 }
902
903 if (errors == NULL)
904 errors = "strict";
905 self->errors = PyBytes_FromString(errors);
906 if (self->errors == NULL)
907 goto error;
908
909 self->chunk_size = 8192;
910 self->readuniversal = (newline == NULL || newline[0] == '\0');
911 self->line_buffering = line_buffering;
912 self->readtranslate = (newline == NULL);
913 if (newline) {
914 self->readnl = PyString_FromString(newline);
915 if (self->readnl == NULL)
916 return -1;
917 }
918 self->writetranslate = (newline == NULL || newline[0] != '\0');
919 if (!self->readuniversal && self->writetranslate) {
920 self->writenl = PyString_AsString(self->readnl);
921 if (!strcmp(self->writenl, "\n"))
922 self->writenl = NULL;
923 }
924#ifdef MS_WINDOWS
925 else
926 self->writenl = "\r\n";
927#endif
928
929 /* Build the decoder object */
930 res = PyObject_CallMethod(buffer, "readable", NULL);
931 if (res == NULL)
932 goto error;
933 r = PyObject_IsTrue(res);
934 Py_DECREF(res);
935 if (r == -1)
936 goto error;
937 if (r == 1) {
938 self->decoder = PyCodec_IncrementalDecoder(
939 encoding, errors);
940 if (self->decoder == NULL)
941 goto error;
942
943 if (self->readuniversal) {
944 PyObject *incrementalDecoder = PyObject_CallFunction(
945 (PyObject *)&PyIncrementalNewlineDecoder_Type,
946 "Oi", self->decoder, (int)self->readtranslate);
947 if (incrementalDecoder == NULL)
948 goto error;
949 Py_CLEAR(self->decoder);
950 self->decoder = incrementalDecoder;
951 }
952 }
953
954 /* Build the encoder object */
955 res = PyObject_CallMethod(buffer, "writable", NULL);
956 if (res == NULL)
957 goto error;
958 r = PyObject_IsTrue(res);
959 Py_DECREF(res);
960 if (r == -1)
961 goto error;
962 if (r == 1) {
963 PyObject *ci;
964 self->encoder = PyCodec_IncrementalEncoder(
965 encoding, errors);
966 if (self->encoder == NULL)
967 goto error;
968 /* Get the normalized named of the codec */
969 ci = _PyCodec_Lookup(encoding);
970 if (ci == NULL)
971 goto error;
972 res = PyObject_GetAttrString(ci, "name");
973 Py_DECREF(ci);
974 if (res == NULL) {
975 if (PyErr_ExceptionMatches(PyExc_AttributeError))
976 PyErr_Clear();
977 else
978 goto error;
979 }
980 else if (PyString_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!strcmp(PyString_AS_STRING(res), e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
995
996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL) {
1002 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1003 PyErr_Clear();
1004 else
1005 goto error;
1006 }
1007 else if (Py_TYPE(raw) == &PyFileIO_Type)
1008 self->raw = raw;
1009 else
1010 Py_DECREF(raw);
1011 }
1012
1013 res = PyObject_CallMethod(buffer, "seekable", NULL);
1014 if (res == NULL)
1015 goto error;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001016 r = PyObject_IsTrue(res);
Antoine Pitrou19690592009-06-12 20:14:08 +00001017 Py_DECREF(res);
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001018 if (r < 0)
1019 goto error;
1020 self->seekable = self->telling = r;
Antoine Pitrou19690592009-06-12 20:14:08 +00001021
1022 self->encoding_start_of_stream = 0;
1023 if (self->seekable && self->encoder) {
1024 PyObject *cookieObj;
1025 int cmp;
1026
1027 self->encoding_start_of_stream = 1;
1028
1029 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1030 if (cookieObj == NULL)
1031 goto error;
1032
1033 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1034 Py_DECREF(cookieObj);
1035 if (cmp < 0) {
1036 goto error;
1037 }
1038
1039 if (cmp == 0) {
1040 self->encoding_start_of_stream = 0;
1041 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1042 _PyIO_zero, NULL);
1043 if (res == NULL)
1044 goto error;
1045 Py_DECREF(res);
1046 }
1047 }
1048
1049 self->ok = 1;
1050 return 0;
1051
1052 error:
1053 return -1;
1054}
1055
1056static int
1057_textiowrapper_clear(textio *self)
1058{
1059 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1060 return -1;
1061 self->ok = 0;
1062 Py_CLEAR(self->buffer);
1063 Py_CLEAR(self->encoding);
1064 Py_CLEAR(self->encoder);
1065 Py_CLEAR(self->decoder);
1066 Py_CLEAR(self->readnl);
1067 Py_CLEAR(self->decoded_chars);
1068 Py_CLEAR(self->pending_bytes);
1069 Py_CLEAR(self->snapshot);
1070 Py_CLEAR(self->errors);
1071 Py_CLEAR(self->raw);
1072 return 0;
1073}
1074
1075static void
1076textiowrapper_dealloc(textio *self)
1077{
1078 if (_textiowrapper_clear(self) < 0)
1079 return;
1080 _PyObject_GC_UNTRACK(self);
1081 if (self->weakreflist != NULL)
1082 PyObject_ClearWeakRefs((PyObject *)self);
1083 Py_CLEAR(self->dict);
1084 Py_TYPE(self)->tp_free((PyObject *)self);
1085}
1086
1087static int
1088textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1089{
1090 Py_VISIT(self->buffer);
1091 Py_VISIT(self->encoding);
1092 Py_VISIT(self->encoder);
1093 Py_VISIT(self->decoder);
1094 Py_VISIT(self->readnl);
1095 Py_VISIT(self->decoded_chars);
1096 Py_VISIT(self->pending_bytes);
1097 Py_VISIT(self->snapshot);
1098 Py_VISIT(self->errors);
1099 Py_VISIT(self->raw);
1100
1101 Py_VISIT(self->dict);
1102 return 0;
1103}
1104
1105static int
1106textiowrapper_clear(textio *self)
1107{
1108 if (_textiowrapper_clear(self) < 0)
1109 return -1;
1110 Py_CLEAR(self->dict);
1111 return 0;
1112}
1113
1114static PyObject *
1115textiowrapper_closed_get(textio *self, void *context);
1116
1117/* This macro takes some shortcuts to make the common case faster. */
1118#define CHECK_CLOSED(self) \
1119 do { \
1120 int r; \
1121 PyObject *_res; \
1122 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1123 if (self->raw != NULL) \
1124 r = _PyFileIO_closed(self->raw); \
1125 else { \
1126 _res = textiowrapper_closed_get(self, NULL); \
1127 if (_res == NULL) \
1128 return NULL; \
1129 r = PyObject_IsTrue(_res); \
1130 Py_DECREF(_res); \
1131 if (r < 0) \
1132 return NULL; \
1133 } \
1134 if (r > 0) { \
1135 PyErr_SetString(PyExc_ValueError, \
1136 "I/O operation on closed file."); \
1137 return NULL; \
1138 } \
1139 } \
1140 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1141 return NULL; \
1142 } while (0)
1143
1144#define CHECK_INITIALIZED(self) \
1145 if (self->ok <= 0) { \
1146 if (self->detached) { \
1147 PyErr_SetString(PyExc_ValueError, \
1148 "underlying buffer has been detached"); \
1149 } else { \
1150 PyErr_SetString(PyExc_ValueError, \
1151 "I/O operation on uninitialized object"); \
1152 } \
1153 return NULL; \
1154 }
1155
1156#define CHECK_INITIALIZED_INT(self) \
1157 if (self->ok <= 0) { \
1158 if (self->detached) { \
1159 PyErr_SetString(PyExc_ValueError, \
1160 "underlying buffer has been detached"); \
1161 } else { \
1162 PyErr_SetString(PyExc_ValueError, \
1163 "I/O operation on uninitialized object"); \
1164 } \
1165 return -1; \
1166 }
1167
1168
1169static PyObject *
1170textiowrapper_detach(textio *self)
1171{
1172 PyObject *buffer, *res;
1173 CHECK_INITIALIZED(self);
1174 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1175 if (res == NULL)
1176 return NULL;
1177 Py_DECREF(res);
1178 buffer = self->buffer;
1179 self->buffer = NULL;
1180 self->detached = 1;
1181 self->ok = 0;
1182 return buffer;
1183}
1184
1185Py_LOCAL_INLINE(const Py_UNICODE *)
1186findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1187{
1188 /* like wcschr, but doesn't stop at NULL characters */
1189 while (size-- > 0) {
1190 if (*s == ch)
1191 return s;
1192 s++;
1193 }
1194 return NULL;
1195}
1196
1197/* Flush the internal write buffer. This doesn't explicitly flush the
1198 underlying buffered object, though. */
1199static int
1200_textiowrapper_writeflush(textio *self)
1201{
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001202 PyObject *pending, *b, *ret;
Antoine Pitrou19690592009-06-12 20:14:08 +00001203
1204 if (self->pending_bytes == NULL)
1205 return 0;
Amaury Forgeot d'Arcfff896b2009-08-29 18:14:40 +00001206
1207 pending = self->pending_bytes;
1208 Py_INCREF(pending);
1209 self->pending_bytes_count = 0;
1210 Py_CLEAR(self->pending_bytes);
1211
1212 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1213 Py_DECREF(pending);
Antoine Pitrou19690592009-06-12 20:14:08 +00001214 if (b == NULL)
1215 return -1;
1216 ret = PyObject_CallMethodObjArgs(self->buffer,
1217 _PyIO_str_write, b, NULL);
1218 Py_DECREF(b);
1219 if (ret == NULL)
1220 return -1;
1221 Py_DECREF(ret);
Antoine Pitrou19690592009-06-12 20:14:08 +00001222 return 0;
1223}
1224
1225static PyObject *
1226textiowrapper_write(textio *self, PyObject *args)
1227{
1228 PyObject *ret;
1229 PyObject *text; /* owned reference */
1230 PyObject *b;
1231 Py_ssize_t textlen;
1232 int haslf = 0;
1233 int needflush = 0;
1234
1235 CHECK_INITIALIZED(self);
1236
1237 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1238 return NULL;
1239 }
1240
1241 CHECK_CLOSED(self);
1242
1243 if (self->encoder == NULL) {
1244 PyErr_SetString(PyExc_IOError, "not writable");
1245 return NULL;
1246 }
1247
1248 Py_INCREF(text);
1249
1250 textlen = PyUnicode_GetSize(text);
1251
1252 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1253 if (findchar(PyUnicode_AS_UNICODE(text),
1254 PyUnicode_GET_SIZE(text), '\n'))
1255 haslf = 1;
1256
1257 if (haslf && self->writetranslate && self->writenl != NULL) {
1258 PyObject *newtext = PyObject_CallMethod(
1259 text, "replace", "ss", "\n", self->writenl);
1260 Py_DECREF(text);
1261 if (newtext == NULL)
1262 return NULL;
1263 text = newtext;
1264 }
1265
1266 if (self->line_buffering &&
1267 (haslf ||
1268 findchar(PyUnicode_AS_UNICODE(text),
1269 PyUnicode_GET_SIZE(text), '\r')))
1270 needflush = 1;
1271
1272 /* XXX What if we were just reading? */
1273 if (self->encodefunc != NULL) {
1274 b = (*self->encodefunc)((PyObject *) self, text);
1275 self->encoding_start_of_stream = 0;
1276 }
1277 else
1278 b = PyObject_CallMethodObjArgs(self->encoder,
1279 _PyIO_str_encode, text, NULL);
1280 Py_DECREF(text);
1281 if (b == NULL)
1282 return NULL;
1283
1284 if (self->pending_bytes == NULL) {
1285 self->pending_bytes = PyList_New(0);
1286 if (self->pending_bytes == NULL) {
1287 Py_DECREF(b);
1288 return NULL;
1289 }
1290 self->pending_bytes_count = 0;
1291 }
1292 if (PyList_Append(self->pending_bytes, b) < 0) {
1293 Py_DECREF(b);
1294 return NULL;
1295 }
1296 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1297 Py_DECREF(b);
1298 if (self->pending_bytes_count > self->chunk_size || needflush) {
1299 if (_textiowrapper_writeflush(self) < 0)
1300 return NULL;
1301 }
1302
1303 if (needflush) {
1304 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1305 if (ret == NULL)
1306 return NULL;
1307 Py_DECREF(ret);
1308 }
1309
1310 Py_CLEAR(self->snapshot);
1311
1312 if (self->decoder) {
1313 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1314 if (ret == NULL)
1315 return NULL;
1316 Py_DECREF(ret);
1317 }
1318
1319 return PyLong_FromSsize_t(textlen);
1320}
1321
1322/* Steal a reference to chars and store it in the decoded_char buffer;
1323 */
1324static void
1325textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1326{
1327 Py_CLEAR(self->decoded_chars);
1328 self->decoded_chars = chars;
1329 self->decoded_chars_used = 0;
1330}
1331
1332static PyObject *
1333textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1334{
1335 PyObject *chars;
1336 Py_ssize_t avail;
1337
1338 if (self->decoded_chars == NULL)
1339 return PyUnicode_FromStringAndSize(NULL, 0);
1340
1341 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1342 - self->decoded_chars_used);
1343
1344 assert(avail >= 0);
1345
1346 if (n < 0 || n > avail)
1347 n = avail;
1348
1349 if (self->decoded_chars_used > 0 || n < avail) {
1350 chars = PyUnicode_FromUnicode(
1351 PyUnicode_AS_UNICODE(self->decoded_chars)
1352 + self->decoded_chars_used, n);
1353 if (chars == NULL)
1354 return NULL;
1355 }
1356 else {
1357 chars = self->decoded_chars;
1358 Py_INCREF(chars);
1359 }
1360
1361 self->decoded_chars_used += n;
1362 return chars;
1363}
1364
1365/* Read and decode the next chunk of data from the BufferedReader.
1366 */
1367static int
1368textiowrapper_read_chunk(textio *self)
1369{
1370 PyObject *dec_buffer = NULL;
1371 PyObject *dec_flags = NULL;
1372 PyObject *input_chunk = NULL;
1373 PyObject *decoded_chars, *chunk_size;
1374 int eof;
1375
1376 /* The return value is True unless EOF was reached. The decoded string is
1377 * placed in self._decoded_chars (replacing its previous value). The
1378 * entire input chunk is sent to the decoder, though some of it may remain
1379 * buffered in the decoder, yet to be converted.
1380 */
1381
1382 if (self->decoder == NULL) {
1383 PyErr_SetString(PyExc_IOError, "not readable");
1384 return -1;
1385 }
1386
1387 if (self->telling) {
1388 /* To prepare for tell(), we need to snapshot a point in the file
1389 * where the decoder's input buffer is empty.
1390 */
1391
1392 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1393 _PyIO_str_getstate, NULL);
1394 if (state == NULL)
1395 return -1;
1396 /* Given this, we know there was a valid snapshot point
1397 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1398 */
1399 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1400 Py_DECREF(state);
1401 return -1;
1402 }
1403 Py_INCREF(dec_buffer);
1404 Py_INCREF(dec_flags);
1405 Py_DECREF(state);
1406 }
1407
1408 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1409 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1410 if (chunk_size == NULL)
1411 goto fail;
1412 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1413 _PyIO_str_read1, chunk_size, NULL);
1414 Py_DECREF(chunk_size);
1415 if (input_chunk == NULL)
1416 goto fail;
1417 assert(PyBytes_Check(input_chunk));
1418
1419 eof = (PyBytes_Size(input_chunk) == 0);
1420
1421 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1422 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1423 self->decoder, input_chunk, eof);
1424 }
1425 else {
1426 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1427 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1428 }
1429
1430 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1431 if (decoded_chars == NULL)
1432 goto fail;
1433 textiowrapper_set_decoded_chars(self, decoded_chars);
1434 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1435 eof = 0;
1436
1437 if (self->telling) {
1438 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1439 * next input to be decoded is dec_buffer + input_chunk.
1440 */
1441 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1442 if (next_input == NULL)
1443 goto fail;
1444 assert (PyBytes_Check(next_input));
1445 Py_DECREF(dec_buffer);
1446 Py_CLEAR(self->snapshot);
1447 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1448 }
1449 Py_DECREF(input_chunk);
1450
1451 return (eof == 0);
1452
1453 fail:
1454 Py_XDECREF(dec_buffer);
1455 Py_XDECREF(dec_flags);
1456 Py_XDECREF(input_chunk);
1457 return -1;
1458}
1459
1460static PyObject *
1461textiowrapper_read(textio *self, PyObject *args)
1462{
1463 Py_ssize_t n = -1;
1464 PyObject *result = NULL, *chunks = NULL;
1465
1466 CHECK_INITIALIZED(self);
1467
Benjamin Petersonddd392c2009-12-13 19:19:07 +00001468 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Antoine Pitrou19690592009-06-12 20:14:08 +00001469 return NULL;
1470
1471 CHECK_CLOSED(self);
1472
1473 if (self->decoder == NULL) {
1474 PyErr_SetString(PyExc_IOError, "not readable");
1475 return NULL;
1476 }
1477
1478 if (_textiowrapper_writeflush(self) < 0)
1479 return NULL;
1480
1481 if (n < 0) {
1482 /* Read everything */
1483 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1484 PyObject *decoded, *final;
1485 if (bytes == NULL)
1486 goto fail;
1487 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1488 bytes, Py_True, NULL);
1489 Py_DECREF(bytes);
1490 if (decoded == NULL)
1491 goto fail;
1492
1493 result = textiowrapper_get_decoded_chars(self, -1);
1494
1495 if (result == NULL) {
1496 Py_DECREF(decoded);
1497 return NULL;
1498 }
1499
1500 final = PyUnicode_Concat(result, decoded);
1501 Py_DECREF(result);
1502 Py_DECREF(decoded);
1503 if (final == NULL)
1504 goto fail;
1505
1506 Py_CLEAR(self->snapshot);
1507 return final;
1508 }
1509 else {
1510 int res = 1;
1511 Py_ssize_t remaining = n;
1512
1513 result = textiowrapper_get_decoded_chars(self, n);
1514 if (result == NULL)
1515 goto fail;
1516 remaining -= PyUnicode_GET_SIZE(result);
1517
1518 /* Keep reading chunks until we have n characters to return */
1519 while (remaining > 0) {
1520 res = textiowrapper_read_chunk(self);
1521 if (res < 0)
1522 goto fail;
1523 if (res == 0) /* EOF */
1524 break;
1525 if (chunks == NULL) {
1526 chunks = PyList_New(0);
1527 if (chunks == NULL)
1528 goto fail;
1529 }
1530 if (PyList_Append(chunks, result) < 0)
1531 goto fail;
1532 Py_DECREF(result);
1533 result = textiowrapper_get_decoded_chars(self, remaining);
1534 if (result == NULL)
1535 goto fail;
1536 remaining -= PyUnicode_GET_SIZE(result);
1537 }
1538 if (chunks != NULL) {
1539 if (result != NULL && PyList_Append(chunks, result) < 0)
1540 goto fail;
1541 Py_CLEAR(result);
1542 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1543 if (result == NULL)
1544 goto fail;
1545 Py_CLEAR(chunks);
1546 }
1547 return result;
1548 }
1549 fail:
1550 Py_XDECREF(result);
1551 Py_XDECREF(chunks);
1552 return NULL;
1553}
1554
1555
1556/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1557 that is to the NUL character. Otherwise the function will produce
1558 incorrect results. */
1559static Py_UNICODE *
1560find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1561{
1562 Py_UNICODE *s = start;
1563 for (;;) {
1564 while (*s > ch)
1565 s++;
1566 if (*s == ch)
1567 return s;
1568 if (s == end)
1569 return NULL;
1570 s++;
1571 }
1572}
1573
1574Py_ssize_t
1575_PyIO_find_line_ending(
1576 int translated, int universal, PyObject *readnl,
1577 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1578{
1579 Py_ssize_t len = end - start;
1580
1581 if (translated) {
1582 /* Newlines are already translated, only search for \n */
1583 Py_UNICODE *pos = find_control_char(start, end, '\n');
1584 if (pos != NULL)
1585 return pos - start + 1;
1586 else {
1587 *consumed = len;
1588 return -1;
1589 }
1590 }
1591 else if (universal) {
1592 /* Universal newline search. Find any of \r, \r\n, \n
1593 * The decoder ensures that \r\n are not split in two pieces
1594 */
1595 Py_UNICODE *s = start;
1596 for (;;) {
1597 Py_UNICODE ch;
1598 /* Fast path for non-control chars. The loop always ends
1599 since the Py_UNICODE storage is NUL-terminated. */
1600 while (*s > '\r')
1601 s++;
1602 if (s >= end) {
1603 *consumed = len;
1604 return -1;
1605 }
1606 ch = *s++;
1607 if (ch == '\n')
1608 return s - start;
1609 if (ch == '\r') {
1610 if (*s == '\n')
1611 return s - start + 1;
1612 else
1613 return s - start;
1614 }
1615 }
1616 }
1617 else {
1618 /* Non-universal mode. */
1619 Py_ssize_t readnl_len = PyString_GET_SIZE(readnl);
1620 unsigned char *nl = (unsigned char *) PyString_AS_STRING(readnl);
1621 if (readnl_len == 1) {
1622 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1623 if (pos != NULL)
1624 return pos - start + 1;
1625 *consumed = len;
1626 return -1;
1627 }
1628 else {
1629 Py_UNICODE *s = start;
1630 Py_UNICODE *e = end - readnl_len + 1;
1631 Py_UNICODE *pos;
1632 if (e < s)
1633 e = s;
1634 while (s < e) {
1635 Py_ssize_t i;
1636 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1637 if (pos == NULL || pos >= e)
1638 break;
1639 for (i = 1; i < readnl_len; i++) {
1640 if (pos[i] != nl[i])
1641 break;
1642 }
1643 if (i == readnl_len)
1644 return pos - start + readnl_len;
1645 s = pos + 1;
1646 }
1647 pos = find_control_char(e, end, nl[0]);
1648 if (pos == NULL)
1649 *consumed = len;
1650 else
1651 *consumed = pos - start;
1652 return -1;
1653 }
1654 }
1655}
1656
1657static PyObject *
1658_textiowrapper_readline(textio *self, Py_ssize_t limit)
1659{
1660 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1661 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1662 int res;
1663
1664 CHECK_CLOSED(self);
1665
1666 if (_textiowrapper_writeflush(self) < 0)
1667 return NULL;
1668
1669 chunked = 0;
1670
1671 while (1) {
1672 Py_UNICODE *ptr;
1673 Py_ssize_t line_len;
1674 Py_ssize_t consumed = 0;
1675
1676 /* First, get some data if necessary */
1677 res = 1;
1678 while (!self->decoded_chars ||
1679 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1680 res = textiowrapper_read_chunk(self);
1681 if (res < 0)
1682 goto error;
1683 if (res == 0)
1684 break;
1685 }
1686 if (res == 0) {
1687 /* end of file */
1688 textiowrapper_set_decoded_chars(self, NULL);
1689 Py_CLEAR(self->snapshot);
1690 start = endpos = offset_to_buffer = 0;
1691 break;
1692 }
1693
1694 if (remaining == NULL) {
1695 line = self->decoded_chars;
1696 start = self->decoded_chars_used;
1697 offset_to_buffer = 0;
1698 Py_INCREF(line);
1699 }
1700 else {
1701 assert(self->decoded_chars_used == 0);
1702 line = PyUnicode_Concat(remaining, self->decoded_chars);
1703 start = 0;
1704 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1705 Py_CLEAR(remaining);
1706 if (line == NULL)
1707 goto error;
1708 }
1709
1710 ptr = PyUnicode_AS_UNICODE(line);
1711 line_len = PyUnicode_GET_SIZE(line);
1712
1713 endpos = _PyIO_find_line_ending(
1714 self->readtranslate, self->readuniversal, self->readnl,
1715 ptr + start, ptr + line_len, &consumed);
1716 if (endpos >= 0) {
1717 endpos += start;
1718 if (limit >= 0 && (endpos - start) + chunked >= limit)
1719 endpos = start + limit - chunked;
1720 break;
1721 }
1722
1723 /* We can put aside up to `endpos` */
1724 endpos = consumed + start;
1725 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1726 /* Didn't find line ending, but reached length limit */
1727 endpos = start + limit - chunked;
1728 break;
1729 }
1730
1731 if (endpos > start) {
1732 /* No line ending seen yet - put aside current data */
1733 PyObject *s;
1734 if (chunks == NULL) {
1735 chunks = PyList_New(0);
1736 if (chunks == NULL)
1737 goto error;
1738 }
1739 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1740 if (s == NULL)
1741 goto error;
1742 if (PyList_Append(chunks, s) < 0) {
1743 Py_DECREF(s);
1744 goto error;
1745 }
1746 chunked += PyUnicode_GET_SIZE(s);
1747 Py_DECREF(s);
1748 }
1749 /* There may be some remaining bytes we'll have to prepend to the
1750 next chunk of data */
1751 if (endpos < line_len) {
1752 remaining = PyUnicode_FromUnicode(
1753 ptr + endpos, line_len - endpos);
1754 if (remaining == NULL)
1755 goto error;
1756 }
1757 Py_CLEAR(line);
1758 /* We have consumed the buffer */
1759 textiowrapper_set_decoded_chars(self, NULL);
1760 }
1761
1762 if (line != NULL) {
1763 /* Our line ends in the current buffer */
1764 self->decoded_chars_used = endpos - offset_to_buffer;
1765 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1766 if (start == 0 && Py_REFCNT(line) == 1) {
1767 if (PyUnicode_Resize(&line, endpos) < 0)
1768 goto error;
1769 }
1770 else {
1771 PyObject *s = PyUnicode_FromUnicode(
1772 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1773 Py_CLEAR(line);
1774 if (s == NULL)
1775 goto error;
1776 line = s;
1777 }
1778 }
1779 }
1780 if (remaining != NULL) {
1781 if (chunks == NULL) {
1782 chunks = PyList_New(0);
1783 if (chunks == NULL)
1784 goto error;
1785 }
1786 if (PyList_Append(chunks, remaining) < 0)
1787 goto error;
1788 Py_CLEAR(remaining);
1789 }
1790 if (chunks != NULL) {
1791 if (line != NULL && PyList_Append(chunks, line) < 0)
1792 goto error;
1793 Py_CLEAR(line);
1794 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1795 if (line == NULL)
1796 goto error;
1797 Py_DECREF(chunks);
1798 }
1799 if (line == NULL)
1800 line = PyUnicode_FromStringAndSize(NULL, 0);
1801
1802 return line;
1803
1804 error:
1805 Py_XDECREF(chunks);
1806 Py_XDECREF(remaining);
1807 Py_XDECREF(line);
1808 return NULL;
1809}
1810
1811static PyObject *
1812textiowrapper_readline(textio *self, PyObject *args)
1813{
1814 PyObject *limitobj = NULL;
1815 Py_ssize_t limit = -1;
1816
1817 CHECK_INITIALIZED(self);
1818 if (!PyArg_ParseTuple(args, "|O:readline", &limitobj)) {
1819 return NULL;
1820 }
1821 if (limitobj) {
1822 if (!PyNumber_Check(limitobj)) {
1823 PyErr_Format(PyExc_TypeError,
1824 "integer argument expected, got '%.200s'",
1825 Py_TYPE(limitobj)->tp_name);
1826 return NULL;
1827 }
1828 limit = PyNumber_AsSsize_t(limitobj, PyExc_OverflowError);
1829 if (limit == -1 && PyErr_Occurred())
1830 return NULL;
1831 }
1832 return _textiowrapper_readline(self, limit);
1833}
1834
1835/* Seek and Tell */
1836
1837typedef struct {
1838 Py_off_t start_pos;
1839 int dec_flags;
1840 int bytes_to_feed;
1841 int chars_to_skip;
1842 char need_eof;
1843} cookie_type;
1844
1845/*
1846 To speed up cookie packing/unpacking, we store the fields in a temporary
1847 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1848 The following macros define at which offsets in the intermediary byte
1849 string the various CookieStruct fields will be stored.
1850 */
1851
1852#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1853
1854#if defined(WORDS_BIGENDIAN)
1855
1856# define IS_LITTLE_ENDIAN 0
1857
1858/* We want the least significant byte of start_pos to also be the least
1859 significant byte of the cookie, which means that in big-endian mode we
1860 must copy the fields in reverse order. */
1861
1862# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1863# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1864# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1865# define OFF_CHARS_TO_SKIP (sizeof(char))
1866# define OFF_NEED_EOF 0
1867
1868#else
1869
1870# define IS_LITTLE_ENDIAN 1
1871
1872/* Little-endian mode: the least significant byte of start_pos will
1873 naturally end up the least significant byte of the cookie. */
1874
1875# define OFF_START_POS 0
1876# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1877# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1878# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1879# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1880
1881#endif
1882
1883static int
1884textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
1885{
1886 unsigned char buffer[COOKIE_BUF_LEN];
1887 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1888 if (cookieLong == NULL)
1889 return -1;
1890
1891 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1892 IS_LITTLE_ENDIAN, 0) < 0) {
1893 Py_DECREF(cookieLong);
1894 return -1;
1895 }
1896 Py_DECREF(cookieLong);
1897
1898 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1899 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1900 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1901 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1902 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
1903
1904 return 0;
1905}
1906
1907static PyObject *
1908textiowrapper_build_cookie(cookie_type *cookie)
1909{
1910 unsigned char buffer[COOKIE_BUF_LEN];
1911
1912 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1913 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1914 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1915 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1916 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
1917
1918 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1919}
1920#undef IS_LITTLE_ENDIAN
1921
1922static int
1923_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
1924{
1925 PyObject *res;
1926 /* When seeking to the start of the stream, we call decoder.reset()
1927 rather than decoder.getstate().
1928 This is for a few decoders such as utf-16 for which the state value
1929 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1930 utf-16, that we are expecting a BOM).
1931 */
1932 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1933 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1934 else
1935 res = PyObject_CallMethod(self->decoder, "setstate",
1936 "((si))", "", cookie->dec_flags);
1937 if (res == NULL)
1938 return -1;
1939 Py_DECREF(res);
1940 return 0;
1941}
1942
1943static int
1944_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
1945{
1946 PyObject *res;
1947 /* Same as _textiowrapper_decoder_setstate() above. */
1948 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1949 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1950 self->encoding_start_of_stream = 1;
1951 }
1952 else {
1953 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1954 _PyIO_zero, NULL);
1955 self->encoding_start_of_stream = 0;
1956 }
1957 if (res == NULL)
1958 return -1;
1959 Py_DECREF(res);
1960 return 0;
1961}
1962
1963static PyObject *
1964textiowrapper_seek(textio *self, PyObject *args)
1965{
1966 PyObject *cookieObj, *posobj;
1967 cookie_type cookie;
1968 int whence = 0;
1969 PyObject *res;
1970 int cmp;
1971
1972 CHECK_INITIALIZED(self);
1973
1974 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1975 return NULL;
1976 CHECK_CLOSED(self);
1977
1978 Py_INCREF(cookieObj);
1979
1980 if (!self->seekable) {
1981 PyErr_SetString(PyExc_IOError,
1982 "underlying stream is not seekable");
1983 goto fail;
1984 }
1985
1986 if (whence == 1) {
1987 /* seek relative to current position */
1988 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1989 if (cmp < 0)
1990 goto fail;
1991
1992 if (cmp == 0) {
1993 PyErr_SetString(PyExc_IOError,
1994 "can't do nonzero cur-relative seeks");
1995 goto fail;
1996 }
1997
1998 /* Seeking to the current position should attempt to
1999 * sync the underlying buffer with the current position.
2000 */
2001 Py_DECREF(cookieObj);
2002 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2003 if (cookieObj == NULL)
2004 goto fail;
2005 }
2006 else if (whence == 2) {
2007 /* seek relative to end of file */
2008
2009 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
2010 if (cmp < 0)
2011 goto fail;
2012
2013 if (cmp == 0) {
2014 PyErr_SetString(PyExc_IOError,
2015 "can't do nonzero end-relative seeks");
2016 goto fail;
2017 }
2018
2019 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2020 if (res == NULL)
2021 goto fail;
2022 Py_DECREF(res);
2023
2024 textiowrapper_set_decoded_chars(self, NULL);
2025 Py_CLEAR(self->snapshot);
2026 if (self->decoder) {
2027 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2028 if (res == NULL)
2029 goto fail;
2030 Py_DECREF(res);
2031 }
2032
2033 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2034 Py_XDECREF(cookieObj);
2035 return res;
2036 }
2037 else if (whence != 0) {
2038 PyErr_Format(PyExc_ValueError,
2039 "invalid whence (%d, should be 0, 1 or 2)", whence);
2040 goto fail;
2041 }
2042
2043 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
2044 if (cmp < 0)
2045 goto fail;
2046
2047 if (cmp == 1) {
2048 PyObject *repr = PyObject_Repr(cookieObj);
2049 if (repr != NULL) {
2050 PyErr_Format(PyExc_ValueError,
2051 "negative seek position %s",
2052 PyString_AS_STRING(repr));
2053 Py_DECREF(repr);
2054 }
2055 goto fail;
2056 }
2057
2058 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2059 if (res == NULL)
2060 goto fail;
2061 Py_DECREF(res);
2062
2063 /* The strategy of seek() is to go back to the safe start point
2064 * and replay the effect of read(chars_to_skip) from there.
2065 */
2066 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2067 goto fail;
2068
2069 /* Seek back to the safe start point. */
2070 posobj = PyLong_FromOff_t(cookie.start_pos);
2071 if (posobj == NULL)
2072 goto fail;
2073 res = PyObject_CallMethodObjArgs(self->buffer,
2074 _PyIO_str_seek, posobj, NULL);
2075 Py_DECREF(posobj);
2076 if (res == NULL)
2077 goto fail;
2078 Py_DECREF(res);
2079
2080 textiowrapper_set_decoded_chars(self, NULL);
2081 Py_CLEAR(self->snapshot);
2082
2083 /* Restore the decoder to its state from the safe start point. */
2084 if (self->decoder) {
2085 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2086 goto fail;
2087 }
2088
2089 if (cookie.chars_to_skip) {
2090 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2091 PyObject *input_chunk = PyObject_CallMethod(
2092 self->buffer, "read", "i", cookie.bytes_to_feed);
2093 PyObject *decoded;
2094
2095 if (input_chunk == NULL)
2096 goto fail;
2097
2098 assert (PyBytes_Check(input_chunk));
2099
2100 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2101 if (self->snapshot == NULL) {
2102 Py_DECREF(input_chunk);
2103 goto fail;
2104 }
2105
2106 decoded = PyObject_CallMethod(self->decoder, "decode",
2107 "Oi", input_chunk, (int)cookie.need_eof);
2108
2109 if (decoded == NULL)
2110 goto fail;
2111
2112 textiowrapper_set_decoded_chars(self, decoded);
2113
2114 /* Skip chars_to_skip of the decoded characters. */
2115 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2116 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2117 goto fail;
2118 }
2119 self->decoded_chars_used = cookie.chars_to_skip;
2120 }
2121 else {
2122 self->snapshot = Py_BuildValue("is", cookie.dec_flags, "");
2123 if (self->snapshot == NULL)
2124 goto fail;
2125 }
2126
2127 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2128 if (self->encoder) {
2129 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2130 goto fail;
2131 }
2132 return cookieObj;
2133 fail:
2134 Py_XDECREF(cookieObj);
2135 return NULL;
2136
2137}
2138
2139static PyObject *
2140textiowrapper_tell(textio *self, PyObject *args)
2141{
2142 PyObject *res;
2143 PyObject *posobj = NULL;
2144 cookie_type cookie = {0,0,0,0,0};
2145 PyObject *next_input;
2146 Py_ssize_t chars_to_skip, chars_decoded;
2147 PyObject *saved_state = NULL;
2148 char *input, *input_end;
2149
2150 CHECK_INITIALIZED(self);
2151 CHECK_CLOSED(self);
2152
2153 if (!self->seekable) {
2154 PyErr_SetString(PyExc_IOError,
2155 "underlying stream is not seekable");
2156 goto fail;
2157 }
2158 if (!self->telling) {
2159 PyErr_SetString(PyExc_IOError,
2160 "telling position disabled by next() call");
2161 goto fail;
2162 }
2163
2164 if (_textiowrapper_writeflush(self) < 0)
2165 return NULL;
2166 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2167 if (res == NULL)
2168 goto fail;
2169 Py_DECREF(res);
2170
2171 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2172 if (posobj == NULL)
2173 goto fail;
2174
2175 if (self->decoder == NULL || self->snapshot == NULL) {
2176 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2177 return posobj;
2178 }
2179
2180#if defined(HAVE_LARGEFILE_SUPPORT)
2181 cookie.start_pos = PyLong_AsLongLong(posobj);
2182#else
2183 cookie.start_pos = PyLong_AsLong(posobj);
2184#endif
2185 if (PyErr_Occurred())
2186 goto fail;
2187
2188 /* Skip backward to the snapshot point (see _read_chunk). */
2189 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2190 goto fail;
2191
2192 assert (PyBytes_Check(next_input));
2193
2194 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2195
2196 /* How many decoded characters have been used up since the snapshot? */
2197 if (self->decoded_chars_used == 0) {
2198 /* We haven't moved from the snapshot point. */
2199 Py_DECREF(posobj);
2200 return textiowrapper_build_cookie(&cookie);
2201 }
2202
2203 chars_to_skip = self->decoded_chars_used;
2204
2205 /* Starting from the snapshot position, we will walk the decoder
2206 * forward until it gives us enough decoded characters.
2207 */
2208 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2209 _PyIO_str_getstate, NULL);
2210 if (saved_state == NULL)
2211 goto fail;
2212
2213 /* Note our initial start point. */
2214 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2215 goto fail;
2216
2217 /* Feed the decoder one byte at a time. As we go, note the
2218 * nearest "safe start point" before the current location
2219 * (a point where the decoder has nothing buffered, so seek()
2220 * can safely start from there and advance to this location).
2221 */
2222 chars_decoded = 0;
2223 input = PyBytes_AS_STRING(next_input);
2224 input_end = input + PyBytes_GET_SIZE(next_input);
2225 while (input < input_end) {
2226 PyObject *state;
2227 char *dec_buffer;
2228 Py_ssize_t dec_buffer_len;
2229 int dec_flags;
2230
2231 PyObject *decoded = PyObject_CallMethod(
2232 self->decoder, "decode", "s#", input, 1);
2233 if (decoded == NULL)
2234 goto fail;
2235 assert (PyUnicode_Check(decoded));
2236 chars_decoded += PyUnicode_GET_SIZE(decoded);
2237 Py_DECREF(decoded);
2238
2239 cookie.bytes_to_feed += 1;
2240
2241 state = PyObject_CallMethodObjArgs(self->decoder,
2242 _PyIO_str_getstate, NULL);
2243 if (state == NULL)
2244 goto fail;
2245 if (!PyArg_Parse(state, "(s#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2246 Py_DECREF(state);
2247 goto fail;
2248 }
2249 Py_DECREF(state);
2250
2251 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2252 /* Decoder buffer is empty, so this is a safe start point. */
2253 cookie.start_pos += cookie.bytes_to_feed;
2254 chars_to_skip -= chars_decoded;
2255 cookie.dec_flags = dec_flags;
2256 cookie.bytes_to_feed = 0;
2257 chars_decoded = 0;
2258 }
2259 if (chars_decoded >= chars_to_skip)
2260 break;
2261 input++;
2262 }
2263 if (input == input_end) {
2264 /* We didn't get enough decoded data; signal EOF to get more. */
2265 PyObject *decoded = PyObject_CallMethod(
2266 self->decoder, "decode", "si", "", /* final = */ 1);
2267 if (decoded == NULL)
2268 goto fail;
2269 assert (PyUnicode_Check(decoded));
2270 chars_decoded += PyUnicode_GET_SIZE(decoded);
2271 Py_DECREF(decoded);
2272 cookie.need_eof = 1;
2273
2274 if (chars_decoded < chars_to_skip) {
2275 PyErr_SetString(PyExc_IOError,
2276 "can't reconstruct logical file position");
2277 goto fail;
2278 }
2279 }
2280
2281 /* finally */
2282 Py_XDECREF(posobj);
2283 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2284 Py_DECREF(saved_state);
2285 if (res == NULL)
2286 return NULL;
2287 Py_DECREF(res);
2288
2289 /* The returned cookie corresponds to the last safe start point. */
2290 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2291 return textiowrapper_build_cookie(&cookie);
2292
2293 fail:
2294 Py_XDECREF(posobj);
2295 if (saved_state) {
2296 PyObject *type, *value, *traceback;
2297 PyErr_Fetch(&type, &value, &traceback);
2298
2299 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2300 Py_DECREF(saved_state);
2301 if (res == NULL)
2302 return NULL;
2303 Py_DECREF(res);
2304
2305 PyErr_Restore(type, value, traceback);
2306 }
2307 return NULL;
2308}
2309
2310static PyObject *
2311textiowrapper_truncate(textio *self, PyObject *args)
2312{
2313 PyObject *pos = Py_None;
2314 PyObject *res;
2315
2316 CHECK_INITIALIZED(self)
2317 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2318 return NULL;
2319 }
2320
2321 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2322 if (res == NULL)
2323 return NULL;
2324 Py_DECREF(res);
2325
Antoine Pitrouf3fa0742010-01-31 22:26:04 +00002326 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Antoine Pitrou19690592009-06-12 20:14:08 +00002327}
2328
2329static PyObject *
2330textiowrapper_repr(textio *self)
2331{
2332 PyObject *nameobj, *res;
2333 PyObject *namerepr = NULL, *encrepr = NULL;
2334
2335 CHECK_INITIALIZED(self);
2336
2337 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2338 if (nameobj == NULL) {
2339 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2340 PyErr_Clear();
2341 else
2342 goto error;
2343 encrepr = PyObject_Repr(self->encoding);
2344 res = PyString_FromFormat("<_io.TextIOWrapper encoding=%s>",
2345 PyString_AS_STRING(encrepr));
2346 }
2347 else {
2348 encrepr = PyObject_Repr(self->encoding);
2349 namerepr = PyObject_Repr(nameobj);
2350 res = PyString_FromFormat("<_io.TextIOWrapper name=%s encoding=%s>",
2351 PyString_AS_STRING(namerepr),
2352 PyString_AS_STRING(encrepr));
2353 Py_DECREF(nameobj);
2354 }
2355 Py_XDECREF(namerepr);
2356 Py_XDECREF(encrepr);
2357 return res;
2358
2359error:
2360 Py_XDECREF(namerepr);
2361 Py_XDECREF(encrepr);
2362 return NULL;
2363}
2364
2365
2366/* Inquiries */
2367
2368static PyObject *
2369textiowrapper_fileno(textio *self, PyObject *args)
2370{
2371 CHECK_INITIALIZED(self);
2372 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2373}
2374
2375static PyObject *
2376textiowrapper_seekable(textio *self, PyObject *args)
2377{
2378 CHECK_INITIALIZED(self);
2379 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2380}
2381
2382static PyObject *
2383textiowrapper_readable(textio *self, PyObject *args)
2384{
2385 CHECK_INITIALIZED(self);
2386 return PyObject_CallMethod(self->buffer, "readable", NULL);
2387}
2388
2389static PyObject *
2390textiowrapper_writable(textio *self, PyObject *args)
2391{
2392 CHECK_INITIALIZED(self);
2393 return PyObject_CallMethod(self->buffer, "writable", NULL);
2394}
2395
2396static PyObject *
2397textiowrapper_isatty(textio *self, PyObject *args)
2398{
2399 CHECK_INITIALIZED(self);
2400 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2401}
2402
2403static PyObject *
2404textiowrapper_flush(textio *self, PyObject *args)
2405{
2406 CHECK_INITIALIZED(self);
2407 CHECK_CLOSED(self);
2408 self->telling = self->seekable;
2409 if (_textiowrapper_writeflush(self) < 0)
2410 return NULL;
2411 return PyObject_CallMethod(self->buffer, "flush", NULL);
2412}
2413
2414static PyObject *
2415textiowrapper_close(textio *self, PyObject *args)
2416{
2417 PyObject *res;
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002418 int r;
Antoine Pitrou19690592009-06-12 20:14:08 +00002419 CHECK_INITIALIZED(self);
Antoine Pitrou19690592009-06-12 20:14:08 +00002420
Antoine Pitrouf7fd8e42010-05-03 16:25:33 +00002421 res = textiowrapper_closed_get(self, NULL);
2422 if (res == NULL)
2423 return NULL;
2424 r = PyObject_IsTrue(res);
2425 Py_DECREF(res);
2426 if (r < 0)
2427 return NULL;
2428
2429 if (r > 0) {
2430 Py_RETURN_NONE; /* stream already closed */
2431 }
2432 else {
2433 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2434 if (res == NULL) {
2435 return NULL;
2436 }
2437 else
2438 Py_DECREF(res);
2439
2440 return PyObject_CallMethod(self->buffer, "close", NULL);
2441 }
Antoine Pitrou19690592009-06-12 20:14:08 +00002442}
2443
2444static PyObject *
2445textiowrapper_iternext(textio *self)
2446{
2447 PyObject *line;
2448
2449 CHECK_INITIALIZED(self);
2450
2451 self->telling = 0;
2452 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2453 /* Skip method call overhead for speed */
2454 line = _textiowrapper_readline(self, -1);
2455 }
2456 else {
2457 line = PyObject_CallMethodObjArgs((PyObject *)self,
2458 _PyIO_str_readline, NULL);
2459 if (line && !PyUnicode_Check(line)) {
2460 PyErr_Format(PyExc_IOError,
2461 "readline() should have returned an str object, "
2462 "not '%.200s'", Py_TYPE(line)->tp_name);
2463 Py_DECREF(line);
2464 return NULL;
2465 }
2466 }
2467
2468 if (line == NULL)
2469 return NULL;
2470
2471 if (PyUnicode_GET_SIZE(line) == 0) {
2472 /* Reached EOF or would have blocked */
2473 Py_DECREF(line);
2474 Py_CLEAR(self->snapshot);
2475 self->telling = self->seekable;
2476 return NULL;
2477 }
2478
2479 return line;
2480}
2481
2482static PyObject *
2483textiowrapper_name_get(textio *self, void *context)
2484{
2485 CHECK_INITIALIZED(self);
2486 return PyObject_GetAttrString(self->buffer, "name");
2487}
2488
2489static PyObject *
2490textiowrapper_closed_get(textio *self, void *context)
2491{
2492 CHECK_INITIALIZED(self);
2493 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2494}
2495
2496static PyObject *
2497textiowrapper_newlines_get(textio *self, void *context)
2498{
2499 PyObject *res;
2500 CHECK_INITIALIZED(self);
2501 if (self->decoder == NULL)
2502 Py_RETURN_NONE;
2503 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2504 if (res == NULL) {
2505 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2506 PyErr_Clear();
2507 Py_RETURN_NONE;
2508 }
2509 else {
2510 return NULL;
2511 }
2512 }
2513 return res;
2514}
2515
2516static PyObject *
2517textiowrapper_errors_get(textio *self, void *context)
2518{
2519 CHECK_INITIALIZED(self);
2520 Py_INCREF(self->errors);
2521 return self->errors;
2522}
2523
2524static PyObject *
2525textiowrapper_chunk_size_get(textio *self, void *context)
2526{
2527 CHECK_INITIALIZED(self);
2528 return PyLong_FromSsize_t(self->chunk_size);
2529}
2530
2531static int
2532textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
2533{
2534 Py_ssize_t n;
2535 CHECK_INITIALIZED_INT(self);
2536 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2537 if (n == -1 && PyErr_Occurred())
2538 return -1;
2539 if (n <= 0) {
2540 PyErr_SetString(PyExc_ValueError,
2541 "a strictly positive integer is required");
2542 return -1;
2543 }
2544 self->chunk_size = n;
2545 return 0;
2546}
2547
2548static PyMethodDef textiowrapper_methods[] = {
2549 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2550 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2551 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2552 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2553 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2554 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
2555
2556 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2557 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2558 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2559 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2560 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
2561
2562 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2563 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2564 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
2565 {NULL, NULL}
2566};
2567
2568static PyMemberDef textiowrapper_members[] = {
2569 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2570 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2571 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
2572 {NULL}
2573};
2574
2575static PyGetSetDef textiowrapper_getset[] = {
2576 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2577 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
2578/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2579*/
2580 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2581 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2582 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2583 (setter)textiowrapper_chunk_size_set, NULL},
2584 {NULL}
2585};
2586
2587PyTypeObject PyTextIOWrapper_Type = {
2588 PyVarObject_HEAD_INIT(NULL, 0)
2589 "_io.TextIOWrapper", /*tp_name*/
2590 sizeof(textio), /*tp_basicsize*/
2591 0, /*tp_itemsize*/
2592 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
2593 0, /*tp_print*/
2594 0, /*tp_getattr*/
2595 0, /*tps_etattr*/
2596 0, /*tp_compare */
2597 (reprfunc)textiowrapper_repr,/*tp_repr*/
2598 0, /*tp_as_number*/
2599 0, /*tp_as_sequence*/
2600 0, /*tp_as_mapping*/
2601 0, /*tp_hash */
2602 0, /*tp_call*/
2603 0, /*tp_str*/
2604 0, /*tp_getattro*/
2605 0, /*tp_setattro*/
2606 0, /*tp_as_buffer*/
2607 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2608 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2609 textiowrapper_doc, /* tp_doc */
2610 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2611 (inquiry)textiowrapper_clear, /* tp_clear */
2612 0, /* tp_richcompare */
2613 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
2614 0, /* tp_iter */
2615 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2616 textiowrapper_methods, /* tp_methods */
2617 textiowrapper_members, /* tp_members */
2618 textiowrapper_getset, /* tp_getset */
2619 0, /* tp_base */
2620 0, /* tp_dict */
2621 0, /* tp_descr_get */
2622 0, /* tp_descr_set */
2623 offsetof(textio, dict), /*tp_dictoffset*/
2624 (initproc)textiowrapper_init, /* tp_init */
2625 0, /* tp_alloc */
2626 PyType_GenericNew, /* tp_new */
2627};