blob: cc229a8562f679d6522f812ad38420ff5e7be3f7 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
31PyDoc_STRVAR(TextIOBase_read_doc,
32 "Read at most n characters from stream.\n"
33 "\n"
34 "Read from underlying buffer until we have n characters or we hit EOF.\n"
35 "If n is negative or omitted, read until EOF.\n"
36 );
37
38static PyObject *
39TextIOBase_read(PyObject *self, PyObject *args)
40{
41 return _unsupported("read");
42}
43
44PyDoc_STRVAR(TextIOBase_readline_doc,
45 "Read until newline or EOF.\n"
46 "\n"
47 "Returns an empty string if EOF is hit immediately.\n"
48 );
49
50static PyObject *
51TextIOBase_readline(PyObject *self, PyObject *args)
52{
53 return _unsupported("readline");
54}
55
56PyDoc_STRVAR(TextIOBase_write_doc,
57 "Write string to stream.\n"
58 "Returns the number of characters written (which is always equal to\n"
59 "the length of the string).\n"
60 );
61
62static PyObject *
63TextIOBase_write(PyObject *self, PyObject *args)
64{
65 return _unsupported("write");
66}
67
68PyDoc_STRVAR(TextIOBase_encoding_doc,
69 "Encoding of the text stream.\n"
70 "\n"
71 "Subclasses should override.\n"
72 );
73
74static PyObject *
75TextIOBase_encoding_get(PyObject *self, void *context)
76{
77 Py_RETURN_NONE;
78}
79
80PyDoc_STRVAR(TextIOBase_newlines_doc,
81 "Line endings translated so far.\n"
82 "\n"
83 "Only line endings translated during reading are considered.\n"
84 "\n"
85 "Subclasses should override.\n"
86 );
87
88static PyObject *
89TextIOBase_newlines_get(PyObject *self, void *context)
90{
91 Py_RETURN_NONE;
92}
93
94
95static PyMethodDef TextIOBase_methods[] = {
96 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
97 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
98 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
99 {NULL, NULL}
100};
101
102static PyGetSetDef TextIOBase_getset[] = {
103 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
104 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
105 {0}
106};
107
108PyTypeObject PyTextIOBase_Type = {
109 PyVarObject_HEAD_INIT(NULL, 0)
110 "_io._TextIOBase", /*tp_name*/
111 0, /*tp_basicsize*/
112 0, /*tp_itemsize*/
113 0, /*tp_dealloc*/
114 0, /*tp_print*/
115 0, /*tp_getattr*/
116 0, /*tp_setattr*/
117 0, /*tp_compare */
118 0, /*tp_repr*/
119 0, /*tp_as_number*/
120 0, /*tp_as_sequence*/
121 0, /*tp_as_mapping*/
122 0, /*tp_hash */
123 0, /*tp_call*/
124 0, /*tp_str*/
125 0, /*tp_getattro*/
126 0, /*tp_setattro*/
127 0, /*tp_as_buffer*/
128 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
129 TextIOBase_doc, /* tp_doc */
130 0, /* tp_traverse */
131 0, /* tp_clear */
132 0, /* tp_richcompare */
133 0, /* tp_weaklistoffset */
134 0, /* tp_iter */
135 0, /* tp_iternext */
136 TextIOBase_methods, /* tp_methods */
137 0, /* tp_members */
138 TextIOBase_getset, /* tp_getset */
139 &PyIOBase_Type, /* tp_base */
140 0, /* tp_dict */
141 0, /* tp_descr_get */
142 0, /* tp_descr_set */
143 0, /* tp_dictoffset */
144 0, /* tp_init */
145 0, /* tp_alloc */
146 0, /* tp_new */
147};
148
149
150/* IncrementalNewlineDecoder */
151
152PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
153 "Codec used when reading a file in universal newlines mode. It wraps\n"
154 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
155 "records the types of newlines encountered. When used with\n"
156 "translate=False, it ensures that the newline sequence is returned in\n"
157 "one piece. When used with decoder=None, it expects unicode strings as\n"
158 "decode input and translates newlines without first invoking an external\n"
159 "decoder.\n"
160 );
161
162typedef struct {
163 PyObject_HEAD
164 PyObject *decoder;
165 PyObject *errors;
166 int pendingcr:1;
167 int translate:1;
168 unsigned int seennl:3;
169} PyNewLineDecoderObject;
170
171static int
Antoine Pitrou24f36292009-03-28 22:16:42 +0000172IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000173 PyObject *args, PyObject *kwds)
174{
175 PyObject *decoder;
176 int translate;
177 PyObject *errors = NULL;
178 char *kwlist[] = {"decoder", "translate", "errors", NULL};
179
180 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
181 kwlist, &decoder, &translate, &errors))
182 return -1;
183
184 self->decoder = decoder;
185 Py_INCREF(decoder);
186
187 if (errors == NULL) {
188 self->errors = PyUnicode_FromString("strict");
189 if (self->errors == NULL)
190 return -1;
191 }
192 else {
193 Py_INCREF(errors);
194 self->errors = errors;
195 }
196
197 self->translate = translate;
198 self->seennl = 0;
199 self->pendingcr = 0;
200
201 return 0;
202}
203
204static void
205IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
206{
207 Py_CLEAR(self->decoder);
208 Py_CLEAR(self->errors);
209 Py_TYPE(self)->tp_free((PyObject *)self);
210}
211
212#define SEEN_CR 1
213#define SEEN_LF 2
214#define SEEN_CRLF 4
215#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
216
217PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000218_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219 PyObject *input, int final)
220{
221 PyObject *output;
222 Py_ssize_t output_len;
223 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
224
225 if (self->decoder == NULL) {
226 PyErr_SetString(PyExc_ValueError,
227 "IncrementalNewlineDecoder.__init__ not called");
228 return NULL;
229 }
230
231 /* decode input (with the eventual \r from a previous pass) */
232 if (self->decoder != Py_None) {
233 output = PyObject_CallMethodObjArgs(self->decoder,
234 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
235 }
236 else {
237 output = input;
238 Py_INCREF(output);
239 }
240
241 if (output == NULL)
242 return NULL;
243
244 if (!PyUnicode_Check(output)) {
245 PyErr_SetString(PyExc_TypeError,
246 "decoder should return a string result");
247 goto error;
248 }
249
250 output_len = PyUnicode_GET_SIZE(output);
251 if (self->pendingcr && (final || output_len > 0)) {
252 Py_UNICODE *out;
253 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
254 if (modified == NULL)
255 goto error;
256 out = PyUnicode_AS_UNICODE(modified);
257 out[0] = '\r';
258 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
259 output_len * sizeof(Py_UNICODE));
260 Py_DECREF(output);
261 output = modified;
262 self->pendingcr = 0;
263 output_len++;
264 }
265
266 /* retain last \r even when not translating data:
267 * then readline() is sure to get \r\n in one pass
268 */
269 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000270 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000271 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
272
273 if (Py_REFCNT(output) == 1) {
274 if (PyUnicode_Resize(&output, output_len - 1) < 0)
275 goto error;
276 }
277 else {
278 PyObject *modified = PyUnicode_FromUnicode(
279 PyUnicode_AS_UNICODE(output),
280 output_len - 1);
281 if (modified == NULL)
282 goto error;
283 Py_DECREF(output);
284 output = modified;
285 }
286 self->pendingcr = 1;
287 }
288 }
289
290 /* Record which newlines are read and do newline translation if desired,
291 all in one pass. */
292 {
293 Py_UNICODE *in_str;
294 Py_ssize_t len;
295 int seennl = self->seennl;
296 int only_lf = 0;
297
298 in_str = PyUnicode_AS_UNICODE(output);
299 len = PyUnicode_GET_SIZE(output);
300
301 if (len == 0)
302 return output;
303
304 /* If, up to now, newlines are consistently \n, do a quick check
305 for the \r *byte* with the libc's optimized memchr.
306 */
307 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000308 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 }
310
Antoine Pitrou66913e22009-03-06 23:40:56 +0000311 if (only_lf) {
312 /* If not already seen, quick scan for a possible "\n" character.
313 (there's nothing else to be done, even when in translation mode)
314 */
315 if (seennl == 0 &&
316 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
317 Py_UNICODE *s, *end;
318 s = in_str;
319 end = in_str + len;
320 for (;;) {
321 Py_UNICODE c;
322 /* Fast loop for non-control characters */
323 while (*s > '\n')
324 s++;
325 c = *s++;
326 if (c == '\n') {
327 seennl |= SEEN_LF;
328 break;
329 }
330 if (s > end)
331 break;
332 }
333 }
334 /* Finished: we have scanned for newlines, and none of them
335 need translating */
336 }
337 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000338 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000339 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000340 if (seennl == SEEN_ALL)
341 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 s = in_str;
343 end = in_str + len;
344 for (;;) {
345 Py_UNICODE c;
346 /* Fast loop for non-control characters */
347 while (*s > '\r')
348 s++;
349 c = *s++;
350 if (c == '\n')
351 seennl |= SEEN_LF;
352 else if (c == '\r') {
353 if (*s == '\n') {
354 seennl |= SEEN_CRLF;
355 s++;
356 }
357 else
358 seennl |= SEEN_CR;
359 }
360 if (s > end)
361 break;
362 if (seennl == SEEN_ALL)
363 break;
364 }
365 endscan:
366 ;
367 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000368 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 PyObject *translated = NULL;
370 Py_UNICODE *out_str;
371 Py_UNICODE *in, *out, *end;
372 if (Py_REFCNT(output) != 1) {
373 /* We could try to optimize this so that we only do a copy
374 when there is something to translate. On the other hand,
375 most decoders should only output non-shared strings, i.e.
376 translation is done in place. */
377 translated = PyUnicode_FromUnicode(NULL, len);
378 if (translated == NULL)
379 goto error;
380 assert(Py_REFCNT(translated) == 1);
381 memcpy(PyUnicode_AS_UNICODE(translated),
382 PyUnicode_AS_UNICODE(output),
383 len * sizeof(Py_UNICODE));
384 }
385 else {
386 translated = output;
387 }
388 out_str = PyUnicode_AS_UNICODE(translated);
389 in = in_str;
390 out = out_str;
391 end = in_str + len;
392 for (;;) {
393 Py_UNICODE c;
394 /* Fast loop for non-control characters */
395 while ((c = *in++) > '\r')
396 *out++ = c;
397 if (c == '\n') {
398 *out++ = c;
399 seennl |= SEEN_LF;
400 continue;
401 }
402 if (c == '\r') {
403 if (*in == '\n') {
404 in++;
405 seennl |= SEEN_CRLF;
406 }
407 else
408 seennl |= SEEN_CR;
409 *out++ = '\n';
410 continue;
411 }
412 if (in > end)
413 break;
414 *out++ = c;
415 }
416 if (translated != output) {
417 Py_DECREF(output);
418 output = translated;
419 }
420 if (out - out_str != len) {
421 if (PyUnicode_Resize(&output, out - out_str) < 0)
422 goto error;
423 }
424 }
425 self->seennl |= seennl;
426 }
427
428 return output;
429
430 error:
431 Py_DECREF(output);
432 return NULL;
433}
434
435static PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000436IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 PyObject *args, PyObject *kwds)
438{
439 char *kwlist[] = {"input", "final", NULL};
440 PyObject *input;
441 int final = 0;
442
443 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
444 kwlist, &input, &final))
445 return NULL;
446 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
447}
448
449static PyObject *
450IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
451{
452 PyObject *buffer;
453 unsigned PY_LONG_LONG flag;
454
455 if (self->decoder != Py_None) {
456 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
457 _PyIO_str_getstate, NULL);
458 if (state == NULL)
459 return NULL;
460 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
461 Py_DECREF(state);
462 return NULL;
463 }
464 Py_INCREF(buffer);
465 Py_DECREF(state);
466 }
467 else {
468 buffer = PyBytes_FromString("");
469 flag = 0;
470 }
471 flag <<= 1;
472 if (self->pendingcr)
473 flag |= 1;
474 return Py_BuildValue("NK", buffer, flag);
475}
476
477static PyObject *
478IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
479{
480 PyObject *buffer;
481 unsigned PY_LONG_LONG flag;
482
483 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
484 return NULL;
485
486 self->pendingcr = (int) flag & 1;
487 flag >>= 1;
488
489 if (self->decoder != Py_None)
490 return PyObject_CallMethod(self->decoder,
491 "setstate", "((OK))", buffer, flag);
492 else
493 Py_RETURN_NONE;
494}
495
496static PyObject *
497IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
498{
499 self->seennl = 0;
500 self->pendingcr = 0;
501 if (self->decoder != Py_None)
502 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
503 else
504 Py_RETURN_NONE;
505}
506
507static PyObject *
508IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
509{
510 switch (self->seennl) {
511 case SEEN_CR:
512 return PyUnicode_FromString("\r");
513 case SEEN_LF:
514 return PyUnicode_FromString("\n");
515 case SEEN_CRLF:
516 return PyUnicode_FromString("\r\n");
517 case SEEN_CR | SEEN_LF:
518 return Py_BuildValue("ss", "\r", "\n");
519 case SEEN_CR | SEEN_CRLF:
520 return Py_BuildValue("ss", "\r", "\r\n");
521 case SEEN_LF | SEEN_CRLF:
522 return Py_BuildValue("ss", "\n", "\r\n");
523 case SEEN_CR | SEEN_LF | SEEN_CRLF:
524 return Py_BuildValue("sss", "\r", "\n", "\r\n");
525 default:
526 Py_RETURN_NONE;
527 }
528
529}
530
531
532static PyMethodDef IncrementalNewlineDecoder_methods[] = {
533 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
534 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
535 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
536 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
537 {0}
538};
539
540static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
541 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
542 {0}
543};
544
545PyTypeObject PyIncrementalNewlineDecoder_Type = {
546 PyVarObject_HEAD_INIT(NULL, 0)
547 "_io.IncrementalNewlineDecoder", /*tp_name*/
548 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
549 0, /*tp_itemsize*/
550 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
551 0, /*tp_print*/
552 0, /*tp_getattr*/
553 0, /*tp_setattr*/
554 0, /*tp_compare */
555 0, /*tp_repr*/
556 0, /*tp_as_number*/
557 0, /*tp_as_sequence*/
558 0, /*tp_as_mapping*/
559 0, /*tp_hash */
560 0, /*tp_call*/
561 0, /*tp_str*/
562 0, /*tp_getattro*/
563 0, /*tp_setattro*/
564 0, /*tp_as_buffer*/
565 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
566 IncrementalNewlineDecoder_doc, /* tp_doc */
567 0, /* tp_traverse */
568 0, /* tp_clear */
569 0, /* tp_richcompare */
570 0, /*tp_weaklistoffset*/
571 0, /* tp_iter */
572 0, /* tp_iternext */
573 IncrementalNewlineDecoder_methods, /* tp_methods */
574 0, /* tp_members */
575 IncrementalNewlineDecoder_getset, /* tp_getset */
576 0, /* tp_base */
577 0, /* tp_dict */
578 0, /* tp_descr_get */
579 0, /* tp_descr_set */
580 0, /* tp_dictoffset */
581 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
582 0, /* tp_alloc */
583 PyType_GenericNew, /* tp_new */
584};
585
586
587/* TextIOWrapper */
588
589PyDoc_STRVAR(TextIOWrapper_doc,
590 "Character and line based layer over a BufferedIOBase object, buffer.\n"
591 "\n"
592 "encoding gives the name of the encoding that the stream will be\n"
593 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
594 "\n"
595 "errors determines the strictness of encoding and decoding (see the\n"
596 "codecs.register) and defaults to \"strict\".\n"
597 "\n"
598 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
599 "handling of line endings. If it is None, universal newlines is\n"
600 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
601 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
602 "caller. Conversely, on output, '\\n' is translated to the system\n"
603 "default line seperator, os.linesep. If newline is any other of its\n"
604 "legal values, that newline becomes the newline when the file is read\n"
605 "and it is returned untranslated. On output, '\\n' is converted to the\n"
606 "newline.\n"
607 "\n"
608 "If line_buffering is True, a call to flush is implied when a call to\n"
609 "write contains a newline character."
610 );
611
612typedef PyObject *
613 (*encodefunc_t)(PyObject *, PyObject *);
614
615typedef struct
616{
617 PyObject_HEAD
618 int ok; /* initialized? */
619 Py_ssize_t chunk_size;
620 PyObject *buffer;
621 PyObject *encoding;
622 PyObject *encoder;
623 PyObject *decoder;
624 PyObject *readnl;
625 PyObject *errors;
626 const char *writenl; /* utf-8 encoded, NULL stands for \n */
627 char line_buffering;
628 char readuniversal;
629 char readtranslate;
630 char writetranslate;
631 char seekable;
632 char telling;
633 /* Specialized encoding func (see below) */
634 encodefunc_t encodefunc;
635
636 /* Reads and writes are internally buffered in order to speed things up.
637 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000638
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000639 Please also note that text to be written is first encoded before being
640 buffered. This is necessary so that encoding errors are immediately
641 reported to the caller, but it unfortunately means that the
642 IncrementalEncoder (whose encode() method is always written in Python)
643 becomes a bottleneck for small writes.
644 */
645 PyObject *decoded_chars; /* buffer for text returned from decoder */
646 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
647 PyObject *pending_bytes; /* list of bytes objects waiting to be
648 written, or NULL */
649 Py_ssize_t pending_bytes_count;
650 PyObject *snapshot;
651 /* snapshot is either None, or a tuple (dec_flags, next_input) where
652 * dec_flags is the second (integer) item of the decoder state and
653 * next_input is the chunk of input bytes that comes next after the
654 * snapshot point. We use this to reconstruct decoder states in tell().
655 */
656
657 /* Cache raw object if it's a FileIO object */
658 PyObject *raw;
659
660 PyObject *weakreflist;
661 PyObject *dict;
662} PyTextIOWrapperObject;
663
664
665/* A couple of specialized cases in order to bypass the slow incremental
666 encoding methods for the most popular encodings. */
667
668static PyObject *
669ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
670{
671 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
672 PyUnicode_GET_SIZE(text),
673 PyBytes_AS_STRING(self->errors));
674}
675
676static PyObject *
677utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
678{
679 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
680 PyUnicode_GET_SIZE(text),
681 PyBytes_AS_STRING(self->errors), 1);
682}
683
684static PyObject *
685utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
686{
687 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
688 PyUnicode_GET_SIZE(text),
689 PyBytes_AS_STRING(self->errors), -1);
690}
691
692static PyObject *
693utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
694{
695 PyObject *res;
696 res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
697 PyUnicode_GET_SIZE(text),
698 PyBytes_AS_STRING(self->errors), 0);
699 if (res == NULL)
700 return NULL;
701 /* Next writes will skip the BOM and use native byte ordering */
702#if defined(WORDS_BIGENDIAN)
703 self->encodefunc = (encodefunc_t) utf16be_encode;
704#else
705 self->encodefunc = (encodefunc_t) utf16le_encode;
706#endif
707 return res;
708}
709
710
711static PyObject *
712utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
713{
714 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
715 PyUnicode_GET_SIZE(text),
716 PyBytes_AS_STRING(self->errors));
717}
718
719static PyObject *
720latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
721{
722 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
723 PyUnicode_GET_SIZE(text),
724 PyBytes_AS_STRING(self->errors));
725}
726
727/* Map normalized encoding names onto the specialized encoding funcs */
728
729typedef struct {
730 const char *name;
731 encodefunc_t encodefunc;
732} encodefuncentry;
733
Antoine Pitrou24f36292009-03-28 22:16:42 +0000734static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735 {"ascii", (encodefunc_t) ascii_encode},
736 {"iso8859-1", (encodefunc_t) latin1_encode},
737 {"utf-16-be", (encodefunc_t) utf16be_encode},
738 {"utf-16-le", (encodefunc_t) utf16le_encode},
739 {"utf-16", (encodefunc_t) utf16_encode},
740 {"utf-8", (encodefunc_t) utf8_encode},
741 {NULL, NULL}
742};
743
744
745static int
746TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
747{
748 char *kwlist[] = {"buffer", "encoding", "errors",
749 "newline", "line_buffering",
750 NULL};
751 PyObject *buffer, *raw;
752 char *encoding = NULL;
753 char *errors = NULL;
754 char *newline = NULL;
755 int line_buffering = 0;
756 _PyIO_State *state = IO_STATE;
757
758 PyObject *res;
759 int r;
760
761 self->ok = 0;
762 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
763 kwlist, &buffer, &encoding, &errors,
764 &newline, &line_buffering))
765 return -1;
766
767 if (newline && newline[0] != '\0'
768 && !(newline[0] == '\n' && newline[1] == '\0')
769 && !(newline[0] == '\r' && newline[1] == '\0')
770 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
771 PyErr_Format(PyExc_ValueError,
772 "illegal newline value: %s", newline);
773 return -1;
774 }
775
776 Py_CLEAR(self->buffer);
777 Py_CLEAR(self->encoding);
778 Py_CLEAR(self->encoder);
779 Py_CLEAR(self->decoder);
780 Py_CLEAR(self->readnl);
781 Py_CLEAR(self->decoded_chars);
782 Py_CLEAR(self->pending_bytes);
783 Py_CLEAR(self->snapshot);
784 Py_CLEAR(self->errors);
785 Py_CLEAR(self->raw);
786 self->decoded_chars_used = 0;
787 self->pending_bytes_count = 0;
788 self->encodefunc = NULL;
789
790 if (encoding == NULL) {
791 /* Try os.device_encoding(fileno) */
792 PyObject *fileno;
793 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
794 /* Ignore only AttributeError and UnsupportedOperation */
795 if (fileno == NULL) {
796 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
797 PyErr_ExceptionMatches(state->unsupported_operation)) {
798 PyErr_Clear();
799 }
800 else {
801 goto error;
802 }
803 }
804 else {
805 self->encoding = PyObject_CallMethod(state->os_module,
806 "device_encoding",
807 "N", fileno);
808 if (self->encoding == NULL)
809 goto error;
810 else if (!PyUnicode_Check(self->encoding))
811 Py_CLEAR(self->encoding);
812 }
813 }
814 if (encoding == NULL && self->encoding == NULL) {
815 if (state->locale_module == NULL) {
816 state->locale_module = PyImport_ImportModule("locale");
817 if (state->locale_module == NULL)
818 goto catch_ImportError;
819 else
820 goto use_locale;
821 }
822 else {
823 use_locale:
824 self->encoding = PyObject_CallMethod(
825 state->locale_module, "getpreferredencoding", NULL);
826 if (self->encoding == NULL) {
827 catch_ImportError:
828 /*
829 Importing locale can raise a ImportError because of
830 _functools, and locale.getpreferredencoding can raise a
831 ImportError if _locale is not available. These will happen
832 during module building.
833 */
834 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
835 PyErr_Clear();
836 self->encoding = PyUnicode_FromString("ascii");
837 }
838 else
839 goto error;
840 }
841 else if (!PyUnicode_Check(self->encoding))
842 Py_CLEAR(self->encoding);
843 }
844 }
845 if (self->encoding != NULL)
846 encoding = _PyUnicode_AsString(self->encoding);
847 else if (encoding != NULL) {
848 self->encoding = PyUnicode_FromString(encoding);
849 if (self->encoding == NULL)
850 goto error;
851 }
852 else {
853 PyErr_SetString(PyExc_IOError,
854 "could not determine default encoding");
855 }
856
857 if (errors == NULL)
858 errors = "strict";
859 self->errors = PyBytes_FromString(errors);
860 if (self->errors == NULL)
861 goto error;
862
863 self->chunk_size = 8192;
864 self->readuniversal = (newline == NULL || newline[0] == '\0');
865 self->line_buffering = line_buffering;
866 self->readtranslate = (newline == NULL);
867 if (newline) {
868 self->readnl = PyUnicode_FromString(newline);
869 if (self->readnl == NULL)
870 return -1;
871 }
872 self->writetranslate = (newline == NULL || newline[0] != '\0');
873 if (!self->readuniversal && self->readnl) {
874 self->writenl = _PyUnicode_AsString(self->readnl);
875 if (!strcmp(self->writenl, "\n"))
876 self->writenl = NULL;
877 }
878#ifdef MS_WINDOWS
879 else
880 self->writenl = "\r\n";
881#endif
882
883 /* Build the decoder object */
884 res = PyObject_CallMethod(buffer, "readable", NULL);
885 if (res == NULL)
886 goto error;
887 r = PyObject_IsTrue(res);
888 Py_DECREF(res);
889 if (r == -1)
890 goto error;
891 if (r == 1) {
892 self->decoder = PyCodec_IncrementalDecoder(
893 encoding, errors);
894 if (self->decoder == NULL)
895 goto error;
896
897 if (self->readuniversal) {
898 PyObject *incrementalDecoder = PyObject_CallFunction(
899 (PyObject *)&PyIncrementalNewlineDecoder_Type,
900 "Oi", self->decoder, (int)self->readtranslate);
901 if (incrementalDecoder == NULL)
902 goto error;
903 Py_CLEAR(self->decoder);
904 self->decoder = incrementalDecoder;
905 }
906 }
907
908 /* Build the encoder object */
909 res = PyObject_CallMethod(buffer, "writable", NULL);
910 if (res == NULL)
911 goto error;
912 r = PyObject_IsTrue(res);
913 Py_DECREF(res);
914 if (r == -1)
915 goto error;
916 if (r == 1) {
917 PyObject *ci;
918 self->encoder = PyCodec_IncrementalEncoder(
919 encoding, errors);
920 if (self->encoder == NULL)
921 goto error;
922 /* Get the normalized named of the codec */
923 ci = _PyCodec_Lookup(encoding);
924 if (ci == NULL)
925 goto error;
926 res = PyObject_GetAttrString(ci, "name");
927 Py_DECREF(ci);
928 if (res == NULL)
929 PyErr_Clear();
930 else if (PyUnicode_Check(res)) {
931 encodefuncentry *e = encodefuncs;
932 while (e->name != NULL) {
933 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
934 self->encodefunc = e->encodefunc;
935 break;
936 }
937 e++;
938 }
939 }
940 Py_XDECREF(res);
941 }
942
943 self->buffer = buffer;
944 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +0000945
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
947 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
948 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
949 raw = PyObject_GetAttrString(buffer, "raw");
950 /* Cache the raw FileIO object to speed up 'closed' checks */
951 if (raw == NULL)
952 PyErr_Clear();
953 else if (Py_TYPE(raw) == &PyFileIO_Type)
954 self->raw = raw;
955 else
956 Py_DECREF(raw);
957 }
958
959 res = PyObject_CallMethod(buffer, "seekable", NULL);
960 if (res == NULL)
961 goto error;
962 self->seekable = self->telling = PyObject_IsTrue(res);
963 Py_DECREF(res);
964
965 self->ok = 1;
966 return 0;
967
968 error:
969 return -1;
970}
971
972static int
973_TextIOWrapper_clear(PyTextIOWrapperObject *self)
974{
975 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
976 return -1;
977 self->ok = 0;
978 Py_CLEAR(self->buffer);
979 Py_CLEAR(self->encoding);
980 Py_CLEAR(self->encoder);
981 Py_CLEAR(self->decoder);
982 Py_CLEAR(self->readnl);
983 Py_CLEAR(self->decoded_chars);
984 Py_CLEAR(self->pending_bytes);
985 Py_CLEAR(self->snapshot);
986 Py_CLEAR(self->errors);
987 Py_CLEAR(self->raw);
988 return 0;
989}
990
991static void
992TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
993{
994 if (_TextIOWrapper_clear(self) < 0)
995 return;
996 _PyObject_GC_UNTRACK(self);
997 if (self->weakreflist != NULL)
998 PyObject_ClearWeakRefs((PyObject *)self);
999 Py_CLEAR(self->dict);
1000 Py_TYPE(self)->tp_free((PyObject *)self);
1001}
1002
1003static int
1004TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1005{
1006 Py_VISIT(self->buffer);
1007 Py_VISIT(self->encoding);
1008 Py_VISIT(self->encoder);
1009 Py_VISIT(self->decoder);
1010 Py_VISIT(self->readnl);
1011 Py_VISIT(self->decoded_chars);
1012 Py_VISIT(self->pending_bytes);
1013 Py_VISIT(self->snapshot);
1014 Py_VISIT(self->errors);
1015 Py_VISIT(self->raw);
1016
1017 Py_VISIT(self->dict);
1018 return 0;
1019}
1020
1021static int
1022TextIOWrapper_clear(PyTextIOWrapperObject *self)
1023{
1024 if (_TextIOWrapper_clear(self) < 0)
1025 return -1;
1026 Py_CLEAR(self->dict);
1027 return 0;
1028}
1029
1030static PyObject *
1031TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1032
1033/* This macro takes some shortcuts to make the common case faster. */
1034#define CHECK_CLOSED(self) \
1035 do { \
1036 int r; \
1037 PyObject *_res; \
1038 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1039 if (self->raw != NULL) \
1040 r = _PyFileIO_closed(self->raw); \
1041 else { \
1042 _res = TextIOWrapper_closed_get(self, NULL); \
1043 if (_res == NULL) \
1044 return NULL; \
1045 r = PyObject_IsTrue(_res); \
1046 Py_DECREF(_res); \
1047 if (r < 0) \
1048 return NULL; \
1049 } \
1050 if (r > 0) { \
1051 PyErr_SetString(PyExc_ValueError, \
1052 "I/O operation on closed file."); \
1053 return NULL; \
1054 } \
1055 } \
1056 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1057 return NULL; \
1058 } while (0)
1059
1060#define CHECK_INITIALIZED(self) \
1061 if (self->ok <= 0) { \
1062 PyErr_SetString(PyExc_ValueError, \
1063 "I/O operation on uninitialized object"); \
1064 return NULL; \
1065 }
1066
1067#define CHECK_INITIALIZED_INT(self) \
1068 if (self->ok <= 0) { \
1069 PyErr_SetString(PyExc_ValueError, \
1070 "I/O operation on uninitialized object"); \
1071 return -1; \
1072 }
1073
1074
1075Py_LOCAL_INLINE(const Py_UNICODE *)
1076findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1077{
1078 /* like wcschr, but doesn't stop at NULL characters */
1079 while (size-- > 0) {
1080 if (*s == ch)
1081 return s;
1082 s++;
1083 }
1084 return NULL;
1085}
1086
Antoine Pitrou24f36292009-03-28 22:16:42 +00001087/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088 underlying buffered object, though. */
1089static int
1090_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1091{
1092 PyObject *b, *ret;
1093
1094 if (self->pending_bytes == NULL)
1095 return 0;
1096 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1097 if (b == NULL)
1098 return -1;
1099 ret = PyObject_CallMethodObjArgs(self->buffer,
1100 _PyIO_str_write, b, NULL);
1101 Py_DECREF(b);
1102 if (ret == NULL)
1103 return -1;
1104 Py_DECREF(ret);
1105 Py_CLEAR(self->pending_bytes);
1106 self->pending_bytes_count = 0;
1107 return 0;
1108}
1109
1110static PyObject *
1111TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1112{
1113 PyObject *ret;
1114 PyObject *text; /* owned reference */
1115 PyObject *b;
1116 Py_ssize_t textlen;
1117 int haslf = 0;
1118 int needflush = 0;
1119
1120 CHECK_INITIALIZED(self);
1121
1122 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1123 return NULL;
1124 }
1125
1126 CHECK_CLOSED(self);
1127
1128 Py_INCREF(text);
1129
1130 textlen = PyUnicode_GetSize(text);
1131
1132 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1133 if (findchar(PyUnicode_AS_UNICODE(text),
1134 PyUnicode_GET_SIZE(text), '\n'))
1135 haslf = 1;
1136
1137 if (haslf && self->writetranslate && self->writenl != NULL) {
1138 PyObject *newtext = PyObject_CallMethod(
1139 text, "replace", "ss", "\n", self->writenl);
1140 Py_DECREF(text);
1141 if (newtext == NULL)
1142 return NULL;
1143 text = newtext;
1144 }
1145
1146 if (self->line_buffering &&
1147 (haslf ||
1148 findchar(PyUnicode_AS_UNICODE(text),
1149 PyUnicode_GET_SIZE(text), '\r')))
1150 needflush = 1;
1151
1152 /* XXX What if we were just reading? */
1153 if (self->encodefunc != NULL)
1154 b = (*self->encodefunc)((PyObject *) self, text);
1155 else
1156 b = PyObject_CallMethodObjArgs(self->encoder,
1157 _PyIO_str_encode, text, NULL);
1158 Py_DECREF(text);
1159 if (b == NULL)
1160 return NULL;
1161
1162 if (self->pending_bytes == NULL) {
1163 self->pending_bytes = PyList_New(0);
1164 if (self->pending_bytes == NULL) {
1165 Py_DECREF(b);
1166 return NULL;
1167 }
1168 self->pending_bytes_count = 0;
1169 }
1170 if (PyList_Append(self->pending_bytes, b) < 0) {
1171 Py_DECREF(b);
1172 return NULL;
1173 }
1174 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1175 Py_DECREF(b);
1176 if (self->pending_bytes_count > self->chunk_size || needflush) {
1177 if (_TextIOWrapper_writeflush(self) < 0)
1178 return NULL;
1179 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001180
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001181 if (needflush) {
1182 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1183 if (ret == NULL)
1184 return NULL;
1185 Py_DECREF(ret);
1186 }
1187
1188 Py_CLEAR(self->snapshot);
1189
1190 if (self->decoder) {
1191 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1192 if (ret == NULL)
1193 return NULL;
1194 Py_DECREF(ret);
1195 }
1196
1197 return PyLong_FromSsize_t(textlen);
1198}
1199
1200/* Steal a reference to chars and store it in the decoded_char buffer;
1201 */
1202static void
1203TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1204{
1205 Py_CLEAR(self->decoded_chars);
1206 self->decoded_chars = chars;
1207 self->decoded_chars_used = 0;
1208}
1209
1210static PyObject *
1211TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1212{
1213 PyObject *chars;
1214 Py_ssize_t avail;
1215
1216 if (self->decoded_chars == NULL)
1217 return PyUnicode_FromStringAndSize(NULL, 0);
1218
1219 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1220 - self->decoded_chars_used);
1221
1222 assert(avail >= 0);
1223
1224 if (n < 0 || n > avail)
1225 n = avail;
1226
1227 if (self->decoded_chars_used > 0 || n < avail) {
1228 chars = PyUnicode_FromUnicode(
1229 PyUnicode_AS_UNICODE(self->decoded_chars)
1230 + self->decoded_chars_used, n);
1231 if (chars == NULL)
1232 return NULL;
1233 }
1234 else {
1235 chars = self->decoded_chars;
1236 Py_INCREF(chars);
1237 }
1238
1239 self->decoded_chars_used += n;
1240 return chars;
1241}
1242
1243/* Read and decode the next chunk of data from the BufferedReader.
1244 */
1245static int
1246TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1247{
1248 PyObject *dec_buffer = NULL;
1249 PyObject *dec_flags = NULL;
1250 PyObject *input_chunk = NULL;
1251 PyObject *decoded_chars, *chunk_size;
1252 int eof;
1253
1254 /* The return value is True unless EOF was reached. The decoded string is
1255 * placed in self._decoded_chars (replacing its previous value). The
1256 * entire input chunk is sent to the decoder, though some of it may remain
1257 * buffered in the decoder, yet to be converted.
1258 */
1259
1260 if (self->decoder == NULL) {
1261 PyErr_SetString(PyExc_ValueError, "no decoder");
1262 return -1;
1263 }
1264
1265 if (self->telling) {
1266 /* To prepare for tell(), we need to snapshot a point in the file
1267 * where the decoder's input buffer is empty.
1268 */
1269
1270 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1271 _PyIO_str_getstate, NULL);
1272 if (state == NULL)
1273 return -1;
1274 /* Given this, we know there was a valid snapshot point
1275 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1276 */
1277 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1278 Py_DECREF(state);
1279 return -1;
1280 }
1281 Py_INCREF(dec_buffer);
1282 Py_INCREF(dec_flags);
1283 Py_DECREF(state);
1284 }
1285
1286 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1287 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1288 if (chunk_size == NULL)
1289 goto fail;
1290 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1291 _PyIO_str_read1, chunk_size, NULL);
1292 Py_DECREF(chunk_size);
1293 if (input_chunk == NULL)
1294 goto fail;
1295 assert(PyBytes_Check(input_chunk));
1296
1297 eof = (PyBytes_Size(input_chunk) == 0);
1298
1299 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1300 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1301 self->decoder, input_chunk, eof);
1302 }
1303 else {
1304 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1305 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1306 }
1307
1308 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1309 if (decoded_chars == NULL)
1310 goto fail;
1311 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1312 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1313 eof = 0;
1314
1315 if (self->telling) {
1316 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1317 * next input to be decoded is dec_buffer + input_chunk.
1318 */
1319 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1320 if (next_input == NULL)
1321 goto fail;
1322 assert (PyBytes_Check(next_input));
1323 Py_DECREF(dec_buffer);
1324 Py_CLEAR(self->snapshot);
1325 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1326 }
1327 Py_DECREF(input_chunk);
1328
1329 return (eof == 0);
1330
1331 fail:
1332 Py_XDECREF(dec_buffer);
1333 Py_XDECREF(dec_flags);
1334 Py_XDECREF(input_chunk);
1335 return -1;
1336}
1337
1338static PyObject *
1339TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1340{
1341 Py_ssize_t n = -1;
1342 PyObject *result = NULL, *chunks = NULL;
1343
1344 CHECK_INITIALIZED(self);
1345
1346 if (!PyArg_ParseTuple(args, "|n:read", &n))
1347 return NULL;
1348
1349 CHECK_CLOSED(self);
1350
Benjamin Petersona1b49012009-03-31 23:11:32 +00001351 if (self->decoder == NULL) {
1352 PyErr_SetString(PyExc_IOError, "not readable");
1353 return NULL;
1354 }
1355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356 if (_TextIOWrapper_writeflush(self) < 0)
1357 return NULL;
1358
1359 if (n < 0) {
1360 /* Read everything */
1361 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1362 PyObject *decoded;
1363 if (bytes == NULL)
1364 goto fail;
1365 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1366 bytes, Py_True, NULL);
1367 Py_DECREF(bytes);
1368 if (decoded == NULL)
1369 goto fail;
1370
1371 result = TextIOWrapper_get_decoded_chars(self, -1);
1372
1373 if (result == NULL) {
1374 Py_DECREF(decoded);
1375 return NULL;
1376 }
1377
1378 PyUnicode_AppendAndDel(&result, decoded);
1379 if (result == NULL)
1380 goto fail;
1381
1382 Py_CLEAR(self->snapshot);
1383 return result;
1384 }
1385 else {
1386 int res = 1;
1387 Py_ssize_t remaining = n;
1388
1389 result = TextIOWrapper_get_decoded_chars(self, n);
1390 if (result == NULL)
1391 goto fail;
1392 remaining -= PyUnicode_GET_SIZE(result);
1393
1394 /* Keep reading chunks until we have n characters to return */
1395 while (remaining > 0) {
1396 res = TextIOWrapper_read_chunk(self);
1397 if (res < 0)
1398 goto fail;
1399 if (res == 0) /* EOF */
1400 break;
1401 if (chunks == NULL) {
1402 chunks = PyList_New(0);
1403 if (chunks == NULL)
1404 goto fail;
1405 }
1406 if (PyList_Append(chunks, result) < 0)
1407 goto fail;
1408 Py_DECREF(result);
1409 result = TextIOWrapper_get_decoded_chars(self, remaining);
1410 if (result == NULL)
1411 goto fail;
1412 remaining -= PyUnicode_GET_SIZE(result);
1413 }
1414 if (chunks != NULL) {
1415 if (result != NULL && PyList_Append(chunks, result) < 0)
1416 goto fail;
1417 Py_CLEAR(result);
1418 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1419 if (result == NULL)
1420 goto fail;
1421 Py_CLEAR(chunks);
1422 }
1423 return result;
1424 }
1425 fail:
1426 Py_XDECREF(result);
1427 Py_XDECREF(chunks);
1428 return NULL;
1429}
1430
1431
1432/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1433 that is to the NUL character. Otherwise the function will produce
1434 incorrect results. */
1435static Py_UNICODE *
1436find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1437{
1438 Py_UNICODE *s = start;
1439 for (;;) {
1440 while (*s > ch)
1441 s++;
1442 if (*s == ch)
1443 return s;
1444 if (s == end)
1445 return NULL;
1446 s++;
1447 }
1448}
1449
1450Py_ssize_t
1451_PyIO_find_line_ending(
1452 int translated, int universal, PyObject *readnl,
1453 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1454{
1455 Py_ssize_t len = end - start;
1456
1457 if (translated) {
1458 /* Newlines are already translated, only search for \n */
1459 Py_UNICODE *pos = find_control_char(start, end, '\n');
1460 if (pos != NULL)
1461 return pos - start + 1;
1462 else {
1463 *consumed = len;
1464 return -1;
1465 }
1466 }
1467 else if (universal) {
1468 /* Universal newline search. Find any of \r, \r\n, \n
1469 * The decoder ensures that \r\n are not split in two pieces
1470 */
1471 Py_UNICODE *s = start;
1472 for (;;) {
1473 Py_UNICODE ch;
1474 /* Fast path for non-control chars. The loop always ends
1475 since the Py_UNICODE storage is NUL-terminated. */
1476 while (*s > '\r')
1477 s++;
1478 if (s >= end) {
1479 *consumed = len;
1480 return -1;
1481 }
1482 ch = *s++;
1483 if (ch == '\n')
1484 return s - start;
1485 if (ch == '\r') {
1486 if (*s == '\n')
1487 return s - start + 1;
1488 else
1489 return s - start;
1490 }
1491 }
1492 }
1493 else {
1494 /* Non-universal mode. */
1495 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1496 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1497 if (readnl_len == 1) {
1498 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1499 if (pos != NULL)
1500 return pos - start + 1;
1501 *consumed = len;
1502 return -1;
1503 }
1504 else {
1505 Py_UNICODE *s = start;
1506 Py_UNICODE *e = end - readnl_len + 1;
1507 Py_UNICODE *pos;
1508 if (e < s)
1509 e = s;
1510 while (s < e) {
1511 Py_ssize_t i;
1512 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1513 if (pos == NULL || pos >= e)
1514 break;
1515 for (i = 1; i < readnl_len; i++) {
1516 if (pos[i] != nl[i])
1517 break;
1518 }
1519 if (i == readnl_len)
1520 return pos - start + readnl_len;
1521 s = pos + 1;
1522 }
1523 pos = find_control_char(e, end, nl[0]);
1524 if (pos == NULL)
1525 *consumed = len;
1526 else
1527 *consumed = pos - start;
1528 return -1;
1529 }
1530 }
1531}
1532
1533static PyObject *
1534_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1535{
1536 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1537 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1538 int res;
1539
1540 CHECK_CLOSED(self);
1541
1542 if (_TextIOWrapper_writeflush(self) < 0)
1543 return NULL;
1544
1545 chunked = 0;
1546
1547 while (1) {
1548 Py_UNICODE *ptr;
1549 Py_ssize_t line_len;
1550 Py_ssize_t consumed = 0;
1551
1552 /* First, get some data if necessary */
1553 res = 1;
1554 while (!self->decoded_chars ||
1555 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1556 res = TextIOWrapper_read_chunk(self);
1557 if (res < 0)
1558 goto error;
1559 if (res == 0)
1560 break;
1561 }
1562 if (res == 0) {
1563 /* end of file */
1564 TextIOWrapper_set_decoded_chars(self, NULL);
1565 Py_CLEAR(self->snapshot);
1566 start = endpos = offset_to_buffer = 0;
1567 break;
1568 }
1569
1570 if (remaining == NULL) {
1571 line = self->decoded_chars;
1572 start = self->decoded_chars_used;
1573 offset_to_buffer = 0;
1574 Py_INCREF(line);
1575 }
1576 else {
1577 assert(self->decoded_chars_used == 0);
1578 line = PyUnicode_Concat(remaining, self->decoded_chars);
1579 start = 0;
1580 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1581 Py_CLEAR(remaining);
1582 if (line == NULL)
1583 goto error;
1584 }
1585
1586 ptr = PyUnicode_AS_UNICODE(line);
1587 line_len = PyUnicode_GET_SIZE(line);
1588
1589 endpos = _PyIO_find_line_ending(
1590 self->readtranslate, self->readuniversal, self->readnl,
1591 ptr + start, ptr + line_len, &consumed);
1592 if (endpos >= 0) {
1593 endpos += start;
1594 if (limit >= 0 && (endpos - start) + chunked >= limit)
1595 endpos = start + limit - chunked;
1596 break;
1597 }
1598
1599 /* We can put aside up to `endpos` */
1600 endpos = consumed + start;
1601 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1602 /* Didn't find line ending, but reached length limit */
1603 endpos = start + limit - chunked;
1604 break;
1605 }
1606
1607 if (endpos > start) {
1608 /* No line ending seen yet - put aside current data */
1609 PyObject *s;
1610 if (chunks == NULL) {
1611 chunks = PyList_New(0);
1612 if (chunks == NULL)
1613 goto error;
1614 }
1615 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1616 if (s == NULL)
1617 goto error;
1618 if (PyList_Append(chunks, s) < 0) {
1619 Py_DECREF(s);
1620 goto error;
1621 }
1622 chunked += PyUnicode_GET_SIZE(s);
1623 Py_DECREF(s);
1624 }
1625 /* There may be some remaining bytes we'll have to prepend to the
1626 next chunk of data */
1627 if (endpos < line_len) {
1628 remaining = PyUnicode_FromUnicode(
1629 ptr + endpos, line_len - endpos);
1630 if (remaining == NULL)
1631 goto error;
1632 }
1633 Py_CLEAR(line);
1634 /* We have consumed the buffer */
1635 TextIOWrapper_set_decoded_chars(self, NULL);
1636 }
1637
1638 if (line != NULL) {
1639 /* Our line ends in the current buffer */
1640 self->decoded_chars_used = endpos - offset_to_buffer;
1641 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1642 if (start == 0 && Py_REFCNT(line) == 1) {
1643 if (PyUnicode_Resize(&line, endpos) < 0)
1644 goto error;
1645 }
1646 else {
1647 PyObject *s = PyUnicode_FromUnicode(
1648 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1649 Py_CLEAR(line);
1650 if (s == NULL)
1651 goto error;
1652 line = s;
1653 }
1654 }
1655 }
1656 if (remaining != NULL) {
1657 if (chunks == NULL) {
1658 chunks = PyList_New(0);
1659 if (chunks == NULL)
1660 goto error;
1661 }
1662 if (PyList_Append(chunks, remaining) < 0)
1663 goto error;
1664 Py_CLEAR(remaining);
1665 }
1666 if (chunks != NULL) {
1667 if (line != NULL && PyList_Append(chunks, line) < 0)
1668 goto error;
1669 Py_CLEAR(line);
1670 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1671 if (line == NULL)
1672 goto error;
1673 Py_DECREF(chunks);
1674 }
1675 if (line == NULL)
1676 line = PyUnicode_FromStringAndSize(NULL, 0);
1677
1678 return line;
1679
1680 error:
1681 Py_XDECREF(chunks);
1682 Py_XDECREF(remaining);
1683 Py_XDECREF(line);
1684 return NULL;
1685}
1686
1687static PyObject *
1688TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1689{
1690 Py_ssize_t limit = -1;
1691
1692 CHECK_INITIALIZED(self);
1693 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1694 return NULL;
1695 }
1696 return _TextIOWrapper_readline(self, limit);
1697}
1698
1699/* Seek and Tell */
1700
1701typedef struct {
1702 Py_off_t start_pos;
1703 int dec_flags;
1704 int bytes_to_feed;
1705 int chars_to_skip;
1706 char need_eof;
1707} CookieStruct;
1708
1709/*
1710 To speed up cookie packing/unpacking, we store the fields in a temporary
1711 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1712 The following macros define at which offsets in the intermediary byte
1713 string the various CookieStruct fields will be stored.
1714 */
1715
1716#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1717
1718#if defined(WORDS_BIGENDIAN)
1719
1720# define IS_LITTLE_ENDIAN 0
1721
1722/* We want the least significant byte of start_pos to also be the least
1723 significant byte of the cookie, which means that in big-endian mode we
1724 must copy the fields in reverse order. */
1725
1726# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1727# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1728# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1729# define OFF_CHARS_TO_SKIP (sizeof(char))
1730# define OFF_NEED_EOF 0
1731
1732#else
1733
1734# define IS_LITTLE_ENDIAN 1
1735
1736/* Little-endian mode: the least significant byte of start_pos will
1737 naturally end up the least significant byte of the cookie. */
1738
1739# define OFF_START_POS 0
1740# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1741# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1742# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1743# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1744
1745#endif
1746
1747static int
1748TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1749{
1750 unsigned char buffer[COOKIE_BUF_LEN];
1751 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1752 if (cookieLong == NULL)
1753 return -1;
1754
1755 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1756 IS_LITTLE_ENDIAN, 0) < 0) {
1757 Py_DECREF(cookieLong);
1758 return -1;
1759 }
1760 Py_DECREF(cookieLong);
1761
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001762 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1763 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1764 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1765 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1766 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767
1768 return 0;
1769}
1770
1771static PyObject *
1772TextIOWrapper_buildCookie(CookieStruct *cookie)
1773{
1774 unsigned char buffer[COOKIE_BUF_LEN];
1775
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001776 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1777 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1778 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1779 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1780 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781
1782 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1783}
1784#undef IS_LITTLE_ENDIAN
1785
1786static int
1787_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1788 CookieStruct *cookie)
1789{
1790 PyObject *res;
1791 /* When seeking to the start of the stream, we call decoder.reset()
1792 rather than decoder.getstate().
1793 This is for a few decoders such as utf-16 for which the state value
1794 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1795 utf-16, that we are expecting a BOM).
1796 */
1797 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1798 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1799 else
1800 res = PyObject_CallMethod(self->decoder, "setstate",
1801 "((yi))", "", cookie->dec_flags);
1802 if (res == NULL)
1803 return -1;
1804 Py_DECREF(res);
1805 return 0;
1806}
1807
1808static PyObject *
1809TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1810{
1811 PyObject *cookieObj, *posobj;
1812 CookieStruct cookie;
1813 int whence = 0;
1814 static PyObject *zero = NULL;
1815 PyObject *res;
1816 int cmp;
1817
1818 CHECK_INITIALIZED(self);
1819
1820 if (zero == NULL) {
1821 zero = PyLong_FromLong(0L);
1822 if (zero == NULL)
1823 return NULL;
1824 }
1825
1826 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1827 return NULL;
1828 CHECK_CLOSED(self);
1829
1830 Py_INCREF(cookieObj);
1831
1832 if (!self->seekable) {
1833 PyErr_SetString(PyExc_IOError,
1834 "underlying stream is not seekable");
1835 goto fail;
1836 }
1837
1838 if (whence == 1) {
1839 /* seek relative to current position */
1840 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1841 if (cmp < 0)
1842 goto fail;
1843
1844 if (cmp == 0) {
1845 PyErr_SetString(PyExc_IOError,
1846 "can't do nonzero cur-relative seeks");
1847 goto fail;
1848 }
1849
1850 /* Seeking to the current position should attempt to
1851 * sync the underlying buffer with the current position.
1852 */
1853 Py_DECREF(cookieObj);
1854 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1855 if (cookieObj == NULL)
1856 goto fail;
1857 }
1858 else if (whence == 2) {
1859 /* seek relative to end of file */
1860
1861 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1862 if (cmp < 0)
1863 goto fail;
1864
1865 if (cmp == 0) {
1866 PyErr_SetString(PyExc_IOError,
1867 "can't do nonzero end-relative seeks");
1868 goto fail;
1869 }
1870
1871 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1872 if (res == NULL)
1873 goto fail;
1874 Py_DECREF(res);
1875
1876 TextIOWrapper_set_decoded_chars(self, NULL);
1877 Py_CLEAR(self->snapshot);
1878 if (self->decoder) {
1879 res = PyObject_CallMethod(self->decoder, "reset", NULL);
1880 if (res == NULL)
1881 goto fail;
1882 Py_DECREF(res);
1883 }
1884
1885 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
1886 Py_XDECREF(cookieObj);
1887 return res;
1888 }
1889 else if (whence != 0) {
1890 PyErr_Format(PyExc_ValueError,
1891 "invalid whence (%d, should be 0, 1 or 2)", whence);
1892 goto fail;
1893 }
1894
1895 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
1896 if (cmp < 0)
1897 goto fail;
1898
1899 if (cmp == 1) {
1900 PyErr_Format(PyExc_ValueError,
1901 "negative seek position %R", cookieObj);
1902 goto fail;
1903 }
1904
1905 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1906 if (res == NULL)
1907 goto fail;
1908 Py_DECREF(res);
1909
1910 /* The strategy of seek() is to go back to the safe start point
1911 * and replay the effect of read(chars_to_skip) from there.
1912 */
1913 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
1914 goto fail;
1915
1916 /* Seek back to the safe start point. */
1917 posobj = PyLong_FromOff_t(cookie.start_pos);
1918 if (posobj == NULL)
1919 goto fail;
1920 res = PyObject_CallMethodObjArgs(self->buffer,
1921 _PyIO_str_seek, posobj, NULL);
1922 Py_DECREF(posobj);
1923 if (res == NULL)
1924 goto fail;
1925 Py_DECREF(res);
1926
1927 TextIOWrapper_set_decoded_chars(self, NULL);
1928 Py_CLEAR(self->snapshot);
1929
1930 /* Restore the decoder to its state from the safe start point. */
1931 if (self->decoder) {
1932 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
1933 goto fail;
1934 }
1935
1936 if (cookie.chars_to_skip) {
1937 /* Just like _read_chunk, feed the decoder and save a snapshot. */
1938 PyObject *input_chunk = PyObject_CallMethod(
1939 self->buffer, "read", "i", cookie.bytes_to_feed);
1940 PyObject *decoded;
1941
1942 if (input_chunk == NULL)
1943 goto fail;
1944
1945 assert (PyBytes_Check(input_chunk));
1946
1947 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
1948 if (self->snapshot == NULL) {
1949 Py_DECREF(input_chunk);
1950 goto fail;
1951 }
1952
1953 decoded = PyObject_CallMethod(self->decoder, "decode",
1954 "Oi", input_chunk, (int)cookie.need_eof);
1955
1956 if (decoded == NULL)
1957 goto fail;
1958
1959 TextIOWrapper_set_decoded_chars(self, decoded);
1960
1961 /* Skip chars_to_skip of the decoded characters. */
1962 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
1963 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
1964 goto fail;
1965 }
1966 self->decoded_chars_used = cookie.chars_to_skip;
1967 }
1968 else {
1969 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
1970 if (self->snapshot == NULL)
1971 goto fail;
1972 }
1973
1974 return cookieObj;
1975 fail:
1976 Py_XDECREF(cookieObj);
1977 return NULL;
1978
1979}
1980
1981static PyObject *
1982TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
1983{
1984 PyObject *res;
1985 PyObject *posobj = NULL;
1986 CookieStruct cookie = {0,0,0,0,0};
1987 PyObject *next_input;
1988 Py_ssize_t chars_to_skip, chars_decoded;
1989 PyObject *saved_state = NULL;
1990 char *input, *input_end;
1991
1992 CHECK_INITIALIZED(self);
1993 CHECK_CLOSED(self);
1994
1995 if (!self->seekable) {
1996 PyErr_SetString(PyExc_IOError,
1997 "underlying stream is not seekable");
1998 goto fail;
1999 }
2000 if (!self->telling) {
2001 PyErr_SetString(PyExc_IOError,
2002 "telling position disabled by next() call");
2003 goto fail;
2004 }
2005
2006 if (_TextIOWrapper_writeflush(self) < 0)
2007 return NULL;
2008 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2009 if (res == NULL)
2010 goto fail;
2011 Py_DECREF(res);
2012
2013 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2014 if (posobj == NULL)
2015 goto fail;
2016
2017 if (self->decoder == NULL || self->snapshot == NULL) {
2018 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2019 return posobj;
2020 }
2021
2022#if defined(HAVE_LARGEFILE_SUPPORT)
2023 cookie.start_pos = PyLong_AsLongLong(posobj);
2024#else
2025 cookie.start_pos = PyLong_AsLong(posobj);
2026#endif
2027 if (PyErr_Occurred())
2028 goto fail;
2029
2030 /* Skip backward to the snapshot point (see _read_chunk). */
2031 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2032 goto fail;
2033
2034 assert (PyBytes_Check(next_input));
2035
2036 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2037
2038 /* How many decoded characters have been used up since the snapshot? */
2039 if (self->decoded_chars_used == 0) {
2040 /* We haven't moved from the snapshot point. */
2041 Py_DECREF(posobj);
2042 return TextIOWrapper_buildCookie(&cookie);
2043 }
2044
2045 chars_to_skip = self->decoded_chars_used;
2046
2047 /* Starting from the snapshot position, we will walk the decoder
2048 * forward until it gives us enough decoded characters.
2049 */
2050 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2051 _PyIO_str_getstate, NULL);
2052 if (saved_state == NULL)
2053 goto fail;
2054
2055 /* Note our initial start point. */
2056 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2057 goto fail;
2058
2059 /* Feed the decoder one byte at a time. As we go, note the
2060 * nearest "safe start point" before the current location
2061 * (a point where the decoder has nothing buffered, so seek()
2062 * can safely start from there and advance to this location).
2063 */
2064 chars_decoded = 0;
2065 input = PyBytes_AS_STRING(next_input);
2066 input_end = input + PyBytes_GET_SIZE(next_input);
2067 while (input < input_end) {
2068 PyObject *state;
2069 char *dec_buffer;
2070 Py_ssize_t dec_buffer_len;
2071 int dec_flags;
2072
2073 PyObject *decoded = PyObject_CallMethod(
2074 self->decoder, "decode", "y#", input, 1);
2075 if (decoded == NULL)
2076 goto fail;
2077 assert (PyUnicode_Check(decoded));
2078 chars_decoded += PyUnicode_GET_SIZE(decoded);
2079 Py_DECREF(decoded);
2080
2081 cookie.bytes_to_feed += 1;
2082
2083 state = PyObject_CallMethodObjArgs(self->decoder,
2084 _PyIO_str_getstate, NULL);
2085 if (state == NULL)
2086 goto fail;
2087 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2088 Py_DECREF(state);
2089 goto fail;
2090 }
2091 Py_DECREF(state);
2092
2093 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2094 /* Decoder buffer is empty, so this is a safe start point. */
2095 cookie.start_pos += cookie.bytes_to_feed;
2096 chars_to_skip -= chars_decoded;
2097 cookie.dec_flags = dec_flags;
2098 cookie.bytes_to_feed = 0;
2099 chars_decoded = 0;
2100 }
2101 if (chars_decoded >= chars_to_skip)
2102 break;
2103 input++;
2104 }
2105 if (input == input_end) {
2106 /* We didn't get enough decoded data; signal EOF to get more. */
2107 PyObject *decoded = PyObject_CallMethod(
2108 self->decoder, "decode", "yi", "", /* final = */ 1);
2109 if (decoded == NULL)
2110 goto fail;
2111 assert (PyUnicode_Check(decoded));
2112 chars_decoded += PyUnicode_GET_SIZE(decoded);
2113 Py_DECREF(decoded);
2114 cookie.need_eof = 1;
2115
2116 if (chars_decoded < chars_to_skip) {
2117 PyErr_SetString(PyExc_IOError,
2118 "can't reconstruct logical file position");
2119 goto fail;
2120 }
2121 }
2122
2123 /* finally */
2124 Py_XDECREF(posobj);
2125 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2126 Py_DECREF(saved_state);
2127 if (res == NULL)
2128 return NULL;
2129 Py_DECREF(res);
2130
2131 /* The returned cookie corresponds to the last safe start point. */
2132 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2133 return TextIOWrapper_buildCookie(&cookie);
2134
2135 fail:
2136 Py_XDECREF(posobj);
2137 if (saved_state) {
2138 PyObject *type, *value, *traceback;
2139 PyErr_Fetch(&type, &value, &traceback);
2140
2141 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2142 Py_DECREF(saved_state);
2143 if (res == NULL)
2144 return NULL;
2145 Py_DECREF(res);
2146
2147 PyErr_Restore(type, value, traceback);
2148 }
2149 return NULL;
2150}
2151
2152static PyObject *
2153TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2154{
2155 PyObject *pos = Py_None;
2156 PyObject *res;
2157
2158 CHECK_INITIALIZED(self)
2159 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2160 return NULL;
2161 }
2162
2163 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2164 if (res == NULL)
2165 return NULL;
2166 Py_DECREF(res);
2167
2168 if (pos != Py_None) {
2169 res = PyObject_CallMethodObjArgs((PyObject *) self,
2170 _PyIO_str_seek, pos, NULL);
2171 if (res == NULL)
2172 return NULL;
2173 Py_DECREF(res);
2174 }
2175
2176 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2177}
2178
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002179static PyObject *
2180TextIOWrapper_repr(PyTextIOWrapperObject *self)
2181{
2182 CHECK_INITIALIZED(self);
2183 return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self->encoding);
2184}
2185
2186
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187/* Inquiries */
2188
2189static PyObject *
2190TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2191{
2192 CHECK_INITIALIZED(self);
2193 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2194}
2195
2196static PyObject *
2197TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2198{
2199 CHECK_INITIALIZED(self);
2200 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2201}
2202
2203static PyObject *
2204TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2205{
2206 CHECK_INITIALIZED(self);
2207 return PyObject_CallMethod(self->buffer, "readable", NULL);
2208}
2209
2210static PyObject *
2211TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2212{
2213 CHECK_INITIALIZED(self);
2214 return PyObject_CallMethod(self->buffer, "writable", NULL);
2215}
2216
2217static PyObject *
2218TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2219{
2220 CHECK_INITIALIZED(self);
2221 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2222}
2223
2224static PyObject *
2225TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2226{
2227 CHECK_INITIALIZED(self);
2228 CHECK_CLOSED(self);
2229 self->telling = self->seekable;
2230 if (_TextIOWrapper_writeflush(self) < 0)
2231 return NULL;
2232 return PyObject_CallMethod(self->buffer, "flush", NULL);
2233}
2234
2235static PyObject *
2236TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2237{
2238 PyObject *res;
2239 CHECK_INITIALIZED(self);
2240 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2241 if (res == NULL) {
2242 /* If flush() fails, just give up */
2243 PyErr_Clear();
2244 }
2245 else
2246 Py_DECREF(res);
2247
2248 return PyObject_CallMethod(self->buffer, "close", NULL);
2249}
2250
2251static PyObject *
2252TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2253{
2254 PyObject *line;
2255
2256 CHECK_INITIALIZED(self);
2257
2258 self->telling = 0;
2259 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2260 /* Skip method call overhead for speed */
2261 line = _TextIOWrapper_readline(self, -1);
2262 }
2263 else {
2264 line = PyObject_CallMethodObjArgs((PyObject *)self,
2265 _PyIO_str_readline, NULL);
2266 if (line && !PyUnicode_Check(line)) {
2267 PyErr_Format(PyExc_IOError,
2268 "readline() should have returned an str object, "
2269 "not '%.200s'", Py_TYPE(line)->tp_name);
2270 Py_DECREF(line);
2271 return NULL;
2272 }
2273 }
2274
2275 if (line == NULL)
2276 return NULL;
2277
2278 if (PyUnicode_GET_SIZE(line) == 0) {
2279 /* Reached EOF or would have blocked */
2280 Py_DECREF(line);
2281 Py_CLEAR(self->snapshot);
2282 self->telling = self->seekable;
2283 return NULL;
2284 }
2285
2286 return line;
2287}
2288
2289static PyObject *
2290TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2291{
2292 CHECK_INITIALIZED(self);
2293 return PyObject_GetAttrString(self->buffer, "name");
2294}
2295
2296static PyObject *
2297TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2298{
2299 CHECK_INITIALIZED(self);
2300 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2301}
2302
2303static PyObject *
2304TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2305{
2306 PyObject *res;
2307 CHECK_INITIALIZED(self);
2308 if (self->decoder == NULL)
2309 Py_RETURN_NONE;
2310 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2311 if (res == NULL) {
2312 PyErr_Clear();
2313 Py_RETURN_NONE;
2314 }
2315 return res;
2316}
2317
2318static PyObject *
2319TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2320{
2321 CHECK_INITIALIZED(self);
2322 return PyLong_FromSsize_t(self->chunk_size);
2323}
2324
2325static int
2326TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2327 PyObject *arg, void *context)
2328{
2329 Py_ssize_t n;
2330 CHECK_INITIALIZED_INT(self);
2331 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2332 if (n == -1 && PyErr_Occurred())
2333 return -1;
2334 if (n <= 0) {
2335 PyErr_SetString(PyExc_ValueError,
2336 "a strictly positive integer is required");
2337 return -1;
2338 }
2339 self->chunk_size = n;
2340 return 0;
2341}
2342
2343static PyMethodDef TextIOWrapper_methods[] = {
2344 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2345 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2346 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2347 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2348 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2349
2350 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2351 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2352 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2353 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2354 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2355
2356 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2357 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2358 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2359 {NULL, NULL}
2360};
2361
2362static PyMemberDef TextIOWrapper_members[] = {
2363 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2364 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2365 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2366 {NULL}
2367};
2368
2369static PyGetSetDef TextIOWrapper_getset[] = {
2370 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2371 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2372/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2373*/
2374 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2375 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2376 (setter)TextIOWrapper_chunk_size_set, NULL},
2377 {0}
2378};
2379
2380PyTypeObject PyTextIOWrapper_Type = {
2381 PyVarObject_HEAD_INIT(NULL, 0)
2382 "_io.TextIOWrapper", /*tp_name*/
2383 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2384 0, /*tp_itemsize*/
2385 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2386 0, /*tp_print*/
2387 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002388 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389 0, /*tp_compare */
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002390 (reprfunc)TextIOWrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 0, /*tp_as_number*/
2392 0, /*tp_as_sequence*/
2393 0, /*tp_as_mapping*/
2394 0, /*tp_hash */
2395 0, /*tp_call*/
2396 0, /*tp_str*/
2397 0, /*tp_getattro*/
2398 0, /*tp_setattro*/
2399 0, /*tp_as_buffer*/
2400 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2401 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2402 TextIOWrapper_doc, /* tp_doc */
2403 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2404 (inquiry)TextIOWrapper_clear, /* tp_clear */
2405 0, /* tp_richcompare */
2406 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2407 0, /* tp_iter */
2408 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2409 TextIOWrapper_methods, /* tp_methods */
2410 TextIOWrapper_members, /* tp_members */
2411 TextIOWrapper_getset, /* tp_getset */
2412 0, /* tp_base */
2413 0, /* tp_dict */
2414 0, /* tp_descr_get */
2415 0, /* tp_descr_set */
2416 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2417 (initproc)TextIOWrapper_init, /* tp_init */
2418 0, /* tp_alloc */
2419 PyType_GenericNew, /* tp_new */
2420};