blob: f201ba732435fa12915c3aefc521da8aec88616b [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Petersond2e0c792009-05-01 20:40:59 +000031PyDoc_STRVAR(TextIOBase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39TextIOBase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044PyDoc_STRVAR(TextIOBase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52TextIOBase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(TextIOBase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64TextIOBase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(TextIOBase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76TextIOBase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(TextIOBase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88TextIOBase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(TextIOBase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102TextIOBase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107
108static PyMethodDef TextIOBase_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000109 {"detach", (PyCFunction)TextIOBase_detach, METH_NOARGS, TextIOBase_detach_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
111 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
112 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
113 {NULL, NULL}
114};
115
116static PyGetSetDef TextIOBase_getset[] = {
117 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
118 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000119 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120};
121
122PyTypeObject PyTextIOBase_Type = {
123 PyVarObject_HEAD_INIT(NULL, 0)
124 "_io._TextIOBase", /*tp_name*/
125 0, /*tp_basicsize*/
126 0, /*tp_itemsize*/
127 0, /*tp_dealloc*/
128 0, /*tp_print*/
129 0, /*tp_getattr*/
130 0, /*tp_setattr*/
131 0, /*tp_compare */
132 0, /*tp_repr*/
133 0, /*tp_as_number*/
134 0, /*tp_as_sequence*/
135 0, /*tp_as_mapping*/
136 0, /*tp_hash */
137 0, /*tp_call*/
138 0, /*tp_str*/
139 0, /*tp_getattro*/
140 0, /*tp_setattro*/
141 0, /*tp_as_buffer*/
142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
143 TextIOBase_doc, /* tp_doc */
144 0, /* tp_traverse */
145 0, /* tp_clear */
146 0, /* tp_richcompare */
147 0, /* tp_weaklistoffset */
148 0, /* tp_iter */
149 0, /* tp_iternext */
150 TextIOBase_methods, /* tp_methods */
151 0, /* tp_members */
152 TextIOBase_getset, /* tp_getset */
153 &PyIOBase_Type, /* tp_base */
154 0, /* tp_dict */
155 0, /* tp_descr_get */
156 0, /* tp_descr_set */
157 0, /* tp_dictoffset */
158 0, /* tp_init */
159 0, /* tp_alloc */
160 0, /* tp_new */
161};
162
163
164/* IncrementalNewlineDecoder */
165
166PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
167 "Codec used when reading a file in universal newlines mode. It wraps\n"
168 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
169 "records the types of newlines encountered. When used with\n"
170 "translate=False, it ensures that the newline sequence is returned in\n"
171 "one piece. When used with decoder=None, it expects unicode strings as\n"
172 "decode input and translates newlines without first invoking an external\n"
173 "decoder.\n"
174 );
175
176typedef struct {
177 PyObject_HEAD
178 PyObject *decoder;
179 PyObject *errors;
180 int pendingcr:1;
181 int translate:1;
182 unsigned int seennl:3;
183} PyNewLineDecoderObject;
184
185static int
Antoine Pitrou24f36292009-03-28 22:16:42 +0000186IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 PyObject *args, PyObject *kwds)
188{
189 PyObject *decoder;
190 int translate;
191 PyObject *errors = NULL;
192 char *kwlist[] = {"decoder", "translate", "errors", NULL};
193
194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
195 kwlist, &decoder, &translate, &errors))
196 return -1;
197
198 self->decoder = decoder;
199 Py_INCREF(decoder);
200
201 if (errors == NULL) {
202 self->errors = PyUnicode_FromString("strict");
203 if (self->errors == NULL)
204 return -1;
205 }
206 else {
207 Py_INCREF(errors);
208 self->errors = errors;
209 }
210
211 self->translate = translate;
212 self->seennl = 0;
213 self->pendingcr = 0;
214
215 return 0;
216}
217
218static void
219IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
220{
221 Py_CLEAR(self->decoder);
222 Py_CLEAR(self->errors);
223 Py_TYPE(self)->tp_free((PyObject *)self);
224}
225
226#define SEEN_CR 1
227#define SEEN_LF 2
228#define SEEN_CRLF 4
229#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
230
231PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000232_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233 PyObject *input, int final)
234{
235 PyObject *output;
236 Py_ssize_t output_len;
237 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
238
239 if (self->decoder == NULL) {
240 PyErr_SetString(PyExc_ValueError,
241 "IncrementalNewlineDecoder.__init__ not called");
242 return NULL;
243 }
244
245 /* decode input (with the eventual \r from a previous pass) */
246 if (self->decoder != Py_None) {
247 output = PyObject_CallMethodObjArgs(self->decoder,
248 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
249 }
250 else {
251 output = input;
252 Py_INCREF(output);
253 }
254
255 if (output == NULL)
256 return NULL;
257
258 if (!PyUnicode_Check(output)) {
259 PyErr_SetString(PyExc_TypeError,
260 "decoder should return a string result");
261 goto error;
262 }
263
264 output_len = PyUnicode_GET_SIZE(output);
265 if (self->pendingcr && (final || output_len > 0)) {
266 Py_UNICODE *out;
267 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
268 if (modified == NULL)
269 goto error;
270 out = PyUnicode_AS_UNICODE(modified);
271 out[0] = '\r';
272 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
273 output_len * sizeof(Py_UNICODE));
274 Py_DECREF(output);
275 output = modified;
276 self->pendingcr = 0;
277 output_len++;
278 }
279
280 /* retain last \r even when not translating data:
281 * then readline() is sure to get \r\n in one pass
282 */
283 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000284 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000285 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
286
287 if (Py_REFCNT(output) == 1) {
288 if (PyUnicode_Resize(&output, output_len - 1) < 0)
289 goto error;
290 }
291 else {
292 PyObject *modified = PyUnicode_FromUnicode(
293 PyUnicode_AS_UNICODE(output),
294 output_len - 1);
295 if (modified == NULL)
296 goto error;
297 Py_DECREF(output);
298 output = modified;
299 }
300 self->pendingcr = 1;
301 }
302 }
303
304 /* Record which newlines are read and do newline translation if desired,
305 all in one pass. */
306 {
307 Py_UNICODE *in_str;
308 Py_ssize_t len;
309 int seennl = self->seennl;
310 int only_lf = 0;
311
312 in_str = PyUnicode_AS_UNICODE(output);
313 len = PyUnicode_GET_SIZE(output);
314
315 if (len == 0)
316 return output;
317
318 /* If, up to now, newlines are consistently \n, do a quick check
319 for the \r *byte* with the libc's optimized memchr.
320 */
321 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000322 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 }
324
Antoine Pitrou66913e22009-03-06 23:40:56 +0000325 if (only_lf) {
326 /* If not already seen, quick scan for a possible "\n" character.
327 (there's nothing else to be done, even when in translation mode)
328 */
329 if (seennl == 0 &&
330 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
331 Py_UNICODE *s, *end;
332 s = in_str;
333 end = in_str + len;
334 for (;;) {
335 Py_UNICODE c;
336 /* Fast loop for non-control characters */
337 while (*s > '\n')
338 s++;
339 c = *s++;
340 if (c == '\n') {
341 seennl |= SEEN_LF;
342 break;
343 }
344 if (s > end)
345 break;
346 }
347 }
348 /* Finished: we have scanned for newlines, and none of them
349 need translating */
350 }
351 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000353 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 if (seennl == SEEN_ALL)
355 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 s = in_str;
357 end = in_str + len;
358 for (;;) {
359 Py_UNICODE c;
360 /* Fast loop for non-control characters */
361 while (*s > '\r')
362 s++;
363 c = *s++;
364 if (c == '\n')
365 seennl |= SEEN_LF;
366 else if (c == '\r') {
367 if (*s == '\n') {
368 seennl |= SEEN_CRLF;
369 s++;
370 }
371 else
372 seennl |= SEEN_CR;
373 }
374 if (s > end)
375 break;
376 if (seennl == SEEN_ALL)
377 break;
378 }
379 endscan:
380 ;
381 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000382 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 PyObject *translated = NULL;
384 Py_UNICODE *out_str;
385 Py_UNICODE *in, *out, *end;
386 if (Py_REFCNT(output) != 1) {
387 /* We could try to optimize this so that we only do a copy
388 when there is something to translate. On the other hand,
389 most decoders should only output non-shared strings, i.e.
390 translation is done in place. */
391 translated = PyUnicode_FromUnicode(NULL, len);
392 if (translated == NULL)
393 goto error;
394 assert(Py_REFCNT(translated) == 1);
395 memcpy(PyUnicode_AS_UNICODE(translated),
396 PyUnicode_AS_UNICODE(output),
397 len * sizeof(Py_UNICODE));
398 }
399 else {
400 translated = output;
401 }
402 out_str = PyUnicode_AS_UNICODE(translated);
403 in = in_str;
404 out = out_str;
405 end = in_str + len;
406 for (;;) {
407 Py_UNICODE c;
408 /* Fast loop for non-control characters */
409 while ((c = *in++) > '\r')
410 *out++ = c;
411 if (c == '\n') {
412 *out++ = c;
413 seennl |= SEEN_LF;
414 continue;
415 }
416 if (c == '\r') {
417 if (*in == '\n') {
418 in++;
419 seennl |= SEEN_CRLF;
420 }
421 else
422 seennl |= SEEN_CR;
423 *out++ = '\n';
424 continue;
425 }
426 if (in > end)
427 break;
428 *out++ = c;
429 }
430 if (translated != output) {
431 Py_DECREF(output);
432 output = translated;
433 }
434 if (out - out_str != len) {
435 if (PyUnicode_Resize(&output, out - out_str) < 0)
436 goto error;
437 }
438 }
439 self->seennl |= seennl;
440 }
441
442 return output;
443
444 error:
445 Py_DECREF(output);
446 return NULL;
447}
448
449static PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000450IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 PyObject *args, PyObject *kwds)
452{
453 char *kwlist[] = {"input", "final", NULL};
454 PyObject *input;
455 int final = 0;
456
457 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
458 kwlist, &input, &final))
459 return NULL;
460 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
461}
462
463static PyObject *
464IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
465{
466 PyObject *buffer;
467 unsigned PY_LONG_LONG flag;
468
469 if (self->decoder != Py_None) {
470 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
471 _PyIO_str_getstate, NULL);
472 if (state == NULL)
473 return NULL;
474 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
475 Py_DECREF(state);
476 return NULL;
477 }
478 Py_INCREF(buffer);
479 Py_DECREF(state);
480 }
481 else {
482 buffer = PyBytes_FromString("");
483 flag = 0;
484 }
485 flag <<= 1;
486 if (self->pendingcr)
487 flag |= 1;
488 return Py_BuildValue("NK", buffer, flag);
489}
490
491static PyObject *
492IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
493{
494 PyObject *buffer;
495 unsigned PY_LONG_LONG flag;
496
497 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
498 return NULL;
499
500 self->pendingcr = (int) flag & 1;
501 flag >>= 1;
502
503 if (self->decoder != Py_None)
504 return PyObject_CallMethod(self->decoder,
505 "setstate", "((OK))", buffer, flag);
506 else
507 Py_RETURN_NONE;
508}
509
510static PyObject *
511IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
512{
513 self->seennl = 0;
514 self->pendingcr = 0;
515 if (self->decoder != Py_None)
516 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
517 else
518 Py_RETURN_NONE;
519}
520
521static PyObject *
522IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
523{
524 switch (self->seennl) {
525 case SEEN_CR:
526 return PyUnicode_FromString("\r");
527 case SEEN_LF:
528 return PyUnicode_FromString("\n");
529 case SEEN_CRLF:
530 return PyUnicode_FromString("\r\n");
531 case SEEN_CR | SEEN_LF:
532 return Py_BuildValue("ss", "\r", "\n");
533 case SEEN_CR | SEEN_CRLF:
534 return Py_BuildValue("ss", "\r", "\r\n");
535 case SEEN_LF | SEEN_CRLF:
536 return Py_BuildValue("ss", "\n", "\r\n");
537 case SEEN_CR | SEEN_LF | SEEN_CRLF:
538 return Py_BuildValue("sss", "\r", "\n", "\r\n");
539 default:
540 Py_RETURN_NONE;
541 }
542
543}
544
545
546static PyMethodDef IncrementalNewlineDecoder_methods[] = {
547 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
548 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
549 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
550 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000551 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000552};
553
554static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
555 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000556 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557};
558
559PyTypeObject PyIncrementalNewlineDecoder_Type = {
560 PyVarObject_HEAD_INIT(NULL, 0)
561 "_io.IncrementalNewlineDecoder", /*tp_name*/
562 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
563 0, /*tp_itemsize*/
564 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
565 0, /*tp_print*/
566 0, /*tp_getattr*/
567 0, /*tp_setattr*/
568 0, /*tp_compare */
569 0, /*tp_repr*/
570 0, /*tp_as_number*/
571 0, /*tp_as_sequence*/
572 0, /*tp_as_mapping*/
573 0, /*tp_hash */
574 0, /*tp_call*/
575 0, /*tp_str*/
576 0, /*tp_getattro*/
577 0, /*tp_setattro*/
578 0, /*tp_as_buffer*/
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
580 IncrementalNewlineDecoder_doc, /* tp_doc */
581 0, /* tp_traverse */
582 0, /* tp_clear */
583 0, /* tp_richcompare */
584 0, /*tp_weaklistoffset*/
585 0, /* tp_iter */
586 0, /* tp_iternext */
587 IncrementalNewlineDecoder_methods, /* tp_methods */
588 0, /* tp_members */
589 IncrementalNewlineDecoder_getset, /* tp_getset */
590 0, /* tp_base */
591 0, /* tp_dict */
592 0, /* tp_descr_get */
593 0, /* tp_descr_set */
594 0, /* tp_dictoffset */
595 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
596 0, /* tp_alloc */
597 PyType_GenericNew, /* tp_new */
598};
599
600
601/* TextIOWrapper */
602
603PyDoc_STRVAR(TextIOWrapper_doc,
604 "Character and line based layer over a BufferedIOBase object, buffer.\n"
605 "\n"
606 "encoding gives the name of the encoding that the stream will be\n"
607 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
608 "\n"
609 "errors determines the strictness of encoding and decoding (see the\n"
610 "codecs.register) and defaults to \"strict\".\n"
611 "\n"
612 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
613 "handling of line endings. If it is None, universal newlines is\n"
614 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
615 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
616 "caller. Conversely, on output, '\\n' is translated to the system\n"
617 "default line seperator, os.linesep. If newline is any other of its\n"
618 "legal values, that newline becomes the newline when the file is read\n"
619 "and it is returned untranslated. On output, '\\n' is converted to the\n"
620 "newline.\n"
621 "\n"
622 "If line_buffering is True, a call to flush is implied when a call to\n"
623 "write contains a newline character."
624 );
625
626typedef PyObject *
627 (*encodefunc_t)(PyObject *, PyObject *);
628
629typedef struct
630{
631 PyObject_HEAD
632 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000633 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 Py_ssize_t chunk_size;
635 PyObject *buffer;
636 PyObject *encoding;
637 PyObject *encoder;
638 PyObject *decoder;
639 PyObject *readnl;
640 PyObject *errors;
641 const char *writenl; /* utf-8 encoded, NULL stands for \n */
642 char line_buffering;
643 char readuniversal;
644 char readtranslate;
645 char writetranslate;
646 char seekable;
647 char telling;
648 /* Specialized encoding func (see below) */
649 encodefunc_t encodefunc;
650
651 /* Reads and writes are internally buffered in order to speed things up.
652 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000653
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 Please also note that text to be written is first encoded before being
655 buffered. This is necessary so that encoding errors are immediately
656 reported to the caller, but it unfortunately means that the
657 IncrementalEncoder (whose encode() method is always written in Python)
658 becomes a bottleneck for small writes.
659 */
660 PyObject *decoded_chars; /* buffer for text returned from decoder */
661 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
662 PyObject *pending_bytes; /* list of bytes objects waiting to be
663 written, or NULL */
664 Py_ssize_t pending_bytes_count;
665 PyObject *snapshot;
666 /* snapshot is either None, or a tuple (dec_flags, next_input) where
667 * dec_flags is the second (integer) item of the decoder state and
668 * next_input is the chunk of input bytes that comes next after the
669 * snapshot point. We use this to reconstruct decoder states in tell().
670 */
671
672 /* Cache raw object if it's a FileIO object */
673 PyObject *raw;
674
675 PyObject *weakreflist;
676 PyObject *dict;
677} PyTextIOWrapperObject;
678
679
680/* A couple of specialized cases in order to bypass the slow incremental
681 encoding methods for the most popular encodings. */
682
683static PyObject *
684ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
685{
686 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
687 PyUnicode_GET_SIZE(text),
688 PyBytes_AS_STRING(self->errors));
689}
690
691static PyObject *
692utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
693{
694 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
695 PyUnicode_GET_SIZE(text),
696 PyBytes_AS_STRING(self->errors), 1);
697}
698
699static PyObject *
700utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
701{
702 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
703 PyUnicode_GET_SIZE(text),
704 PyBytes_AS_STRING(self->errors), -1);
705}
706
707static PyObject *
708utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
709{
710 PyObject *res;
711 res = PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
712 PyUnicode_GET_SIZE(text),
713 PyBytes_AS_STRING(self->errors), 0);
714 if (res == NULL)
715 return NULL;
716 /* Next writes will skip the BOM and use native byte ordering */
717#if defined(WORDS_BIGENDIAN)
718 self->encodefunc = (encodefunc_t) utf16be_encode;
719#else
720 self->encodefunc = (encodefunc_t) utf16le_encode;
721#endif
722 return res;
723}
724
725
726static PyObject *
727utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
728{
729 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
730 PyUnicode_GET_SIZE(text),
731 PyBytes_AS_STRING(self->errors));
732}
733
734static PyObject *
735latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
736{
737 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
738 PyUnicode_GET_SIZE(text),
739 PyBytes_AS_STRING(self->errors));
740}
741
742/* Map normalized encoding names onto the specialized encoding funcs */
743
744typedef struct {
745 const char *name;
746 encodefunc_t encodefunc;
747} encodefuncentry;
748
Antoine Pitrou24f36292009-03-28 22:16:42 +0000749static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750 {"ascii", (encodefunc_t) ascii_encode},
751 {"iso8859-1", (encodefunc_t) latin1_encode},
752 {"utf-16-be", (encodefunc_t) utf16be_encode},
753 {"utf-16-le", (encodefunc_t) utf16le_encode},
754 {"utf-16", (encodefunc_t) utf16_encode},
755 {"utf-8", (encodefunc_t) utf8_encode},
756 {NULL, NULL}
757};
758
759
760static int
761TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
762{
763 char *kwlist[] = {"buffer", "encoding", "errors",
764 "newline", "line_buffering",
765 NULL};
766 PyObject *buffer, *raw;
767 char *encoding = NULL;
768 char *errors = NULL;
769 char *newline = NULL;
770 int line_buffering = 0;
771 _PyIO_State *state = IO_STATE;
772
773 PyObject *res;
774 int r;
775
776 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000777 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000778 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
779 kwlist, &buffer, &encoding, &errors,
780 &newline, &line_buffering))
781 return -1;
782
783 if (newline && newline[0] != '\0'
784 && !(newline[0] == '\n' && newline[1] == '\0')
785 && !(newline[0] == '\r' && newline[1] == '\0')
786 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
787 PyErr_Format(PyExc_ValueError,
788 "illegal newline value: %s", newline);
789 return -1;
790 }
791
792 Py_CLEAR(self->buffer);
793 Py_CLEAR(self->encoding);
794 Py_CLEAR(self->encoder);
795 Py_CLEAR(self->decoder);
796 Py_CLEAR(self->readnl);
797 Py_CLEAR(self->decoded_chars);
798 Py_CLEAR(self->pending_bytes);
799 Py_CLEAR(self->snapshot);
800 Py_CLEAR(self->errors);
801 Py_CLEAR(self->raw);
802 self->decoded_chars_used = 0;
803 self->pending_bytes_count = 0;
804 self->encodefunc = NULL;
805
806 if (encoding == NULL) {
807 /* Try os.device_encoding(fileno) */
808 PyObject *fileno;
809 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
810 /* Ignore only AttributeError and UnsupportedOperation */
811 if (fileno == NULL) {
812 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
813 PyErr_ExceptionMatches(state->unsupported_operation)) {
814 PyErr_Clear();
815 }
816 else {
817 goto error;
818 }
819 }
820 else {
821 self->encoding = PyObject_CallMethod(state->os_module,
822 "device_encoding",
823 "N", fileno);
824 if (self->encoding == NULL)
825 goto error;
826 else if (!PyUnicode_Check(self->encoding))
827 Py_CLEAR(self->encoding);
828 }
829 }
830 if (encoding == NULL && self->encoding == NULL) {
831 if (state->locale_module == NULL) {
832 state->locale_module = PyImport_ImportModule("locale");
833 if (state->locale_module == NULL)
834 goto catch_ImportError;
835 else
836 goto use_locale;
837 }
838 else {
839 use_locale:
840 self->encoding = PyObject_CallMethod(
841 state->locale_module, "getpreferredencoding", NULL);
842 if (self->encoding == NULL) {
843 catch_ImportError:
844 /*
845 Importing locale can raise a ImportError because of
846 _functools, and locale.getpreferredencoding can raise a
847 ImportError if _locale is not available. These will happen
848 during module building.
849 */
850 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
851 PyErr_Clear();
852 self->encoding = PyUnicode_FromString("ascii");
853 }
854 else
855 goto error;
856 }
857 else if (!PyUnicode_Check(self->encoding))
858 Py_CLEAR(self->encoding);
859 }
860 }
861 if (self->encoding != NULL)
862 encoding = _PyUnicode_AsString(self->encoding);
863 else if (encoding != NULL) {
864 self->encoding = PyUnicode_FromString(encoding);
865 if (self->encoding == NULL)
866 goto error;
867 }
868 else {
869 PyErr_SetString(PyExc_IOError,
870 "could not determine default encoding");
871 }
872
873 if (errors == NULL)
874 errors = "strict";
875 self->errors = PyBytes_FromString(errors);
876 if (self->errors == NULL)
877 goto error;
878
879 self->chunk_size = 8192;
880 self->readuniversal = (newline == NULL || newline[0] == '\0');
881 self->line_buffering = line_buffering;
882 self->readtranslate = (newline == NULL);
883 if (newline) {
884 self->readnl = PyUnicode_FromString(newline);
885 if (self->readnl == NULL)
886 return -1;
887 }
888 self->writetranslate = (newline == NULL || newline[0] != '\0');
889 if (!self->readuniversal && self->readnl) {
890 self->writenl = _PyUnicode_AsString(self->readnl);
891 if (!strcmp(self->writenl, "\n"))
892 self->writenl = NULL;
893 }
894#ifdef MS_WINDOWS
895 else
896 self->writenl = "\r\n";
897#endif
898
899 /* Build the decoder object */
900 res = PyObject_CallMethod(buffer, "readable", NULL);
901 if (res == NULL)
902 goto error;
903 r = PyObject_IsTrue(res);
904 Py_DECREF(res);
905 if (r == -1)
906 goto error;
907 if (r == 1) {
908 self->decoder = PyCodec_IncrementalDecoder(
909 encoding, errors);
910 if (self->decoder == NULL)
911 goto error;
912
913 if (self->readuniversal) {
914 PyObject *incrementalDecoder = PyObject_CallFunction(
915 (PyObject *)&PyIncrementalNewlineDecoder_Type,
916 "Oi", self->decoder, (int)self->readtranslate);
917 if (incrementalDecoder == NULL)
918 goto error;
919 Py_CLEAR(self->decoder);
920 self->decoder = incrementalDecoder;
921 }
922 }
923
924 /* Build the encoder object */
925 res = PyObject_CallMethod(buffer, "writable", NULL);
926 if (res == NULL)
927 goto error;
928 r = PyObject_IsTrue(res);
929 Py_DECREF(res);
930 if (r == -1)
931 goto error;
932 if (r == 1) {
933 PyObject *ci;
934 self->encoder = PyCodec_IncrementalEncoder(
935 encoding, errors);
936 if (self->encoder == NULL)
937 goto error;
938 /* Get the normalized named of the codec */
939 ci = _PyCodec_Lookup(encoding);
940 if (ci == NULL)
941 goto error;
942 res = PyObject_GetAttrString(ci, "name");
943 Py_DECREF(ci);
944 if (res == NULL)
945 PyErr_Clear();
946 else if (PyUnicode_Check(res)) {
947 encodefuncentry *e = encodefuncs;
948 while (e->name != NULL) {
949 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
950 self->encodefunc = e->encodefunc;
951 break;
952 }
953 e++;
954 }
955 }
956 Py_XDECREF(res);
957 }
958
959 self->buffer = buffer;
960 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +0000961
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000962 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
963 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
964 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
965 raw = PyObject_GetAttrString(buffer, "raw");
966 /* Cache the raw FileIO object to speed up 'closed' checks */
967 if (raw == NULL)
968 PyErr_Clear();
969 else if (Py_TYPE(raw) == &PyFileIO_Type)
970 self->raw = raw;
971 else
972 Py_DECREF(raw);
973 }
974
975 res = PyObject_CallMethod(buffer, "seekable", NULL);
976 if (res == NULL)
977 goto error;
978 self->seekable = self->telling = PyObject_IsTrue(res);
979 Py_DECREF(res);
980
981 self->ok = 1;
982 return 0;
983
984 error:
985 return -1;
986}
987
988static int
989_TextIOWrapper_clear(PyTextIOWrapperObject *self)
990{
991 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
992 return -1;
993 self->ok = 0;
994 Py_CLEAR(self->buffer);
995 Py_CLEAR(self->encoding);
996 Py_CLEAR(self->encoder);
997 Py_CLEAR(self->decoder);
998 Py_CLEAR(self->readnl);
999 Py_CLEAR(self->decoded_chars);
1000 Py_CLEAR(self->pending_bytes);
1001 Py_CLEAR(self->snapshot);
1002 Py_CLEAR(self->errors);
1003 Py_CLEAR(self->raw);
1004 return 0;
1005}
1006
1007static void
1008TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
1009{
1010 if (_TextIOWrapper_clear(self) < 0)
1011 return;
1012 _PyObject_GC_UNTRACK(self);
1013 if (self->weakreflist != NULL)
1014 PyObject_ClearWeakRefs((PyObject *)self);
1015 Py_CLEAR(self->dict);
1016 Py_TYPE(self)->tp_free((PyObject *)self);
1017}
1018
1019static int
1020TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1021{
1022 Py_VISIT(self->buffer);
1023 Py_VISIT(self->encoding);
1024 Py_VISIT(self->encoder);
1025 Py_VISIT(self->decoder);
1026 Py_VISIT(self->readnl);
1027 Py_VISIT(self->decoded_chars);
1028 Py_VISIT(self->pending_bytes);
1029 Py_VISIT(self->snapshot);
1030 Py_VISIT(self->errors);
1031 Py_VISIT(self->raw);
1032
1033 Py_VISIT(self->dict);
1034 return 0;
1035}
1036
1037static int
1038TextIOWrapper_clear(PyTextIOWrapperObject *self)
1039{
1040 if (_TextIOWrapper_clear(self) < 0)
1041 return -1;
1042 Py_CLEAR(self->dict);
1043 return 0;
1044}
1045
1046static PyObject *
1047TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1048
1049/* This macro takes some shortcuts to make the common case faster. */
1050#define CHECK_CLOSED(self) \
1051 do { \
1052 int r; \
1053 PyObject *_res; \
1054 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1055 if (self->raw != NULL) \
1056 r = _PyFileIO_closed(self->raw); \
1057 else { \
1058 _res = TextIOWrapper_closed_get(self, NULL); \
1059 if (_res == NULL) \
1060 return NULL; \
1061 r = PyObject_IsTrue(_res); \
1062 Py_DECREF(_res); \
1063 if (r < 0) \
1064 return NULL; \
1065 } \
1066 if (r > 0) { \
1067 PyErr_SetString(PyExc_ValueError, \
1068 "I/O operation on closed file."); \
1069 return NULL; \
1070 } \
1071 } \
1072 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1073 return NULL; \
1074 } while (0)
1075
1076#define CHECK_INITIALIZED(self) \
1077 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001078 if (self->detached) { \
1079 PyErr_SetString(PyExc_ValueError, \
1080 "underlying buffer has been detached"); \
1081 } else { \
1082 PyErr_SetString(PyExc_ValueError, \
1083 "I/O operation on uninitialized object"); \
1084 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001085 return NULL; \
1086 }
1087
1088#define CHECK_INITIALIZED_INT(self) \
1089 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001090 if (self->detached) { \
1091 PyErr_SetString(PyExc_ValueError, \
1092 "underlying buffer has been detached"); \
1093 } else { \
1094 PyErr_SetString(PyExc_ValueError, \
1095 "I/O operation on uninitialized object"); \
1096 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097 return -1; \
1098 }
1099
1100
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001101static PyObject *
1102TextIOWrapper_detach(PyTextIOWrapperObject *self)
1103{
1104 PyObject *buffer, *res;
1105 CHECK_INITIALIZED(self);
1106 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1107 if (res == NULL)
1108 return NULL;
1109 Py_DECREF(res);
1110 buffer = self->buffer;
1111 self->buffer = NULL;
1112 self->detached = 1;
1113 self->ok = 0;
1114 return buffer;
1115}
1116
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001117Py_LOCAL_INLINE(const Py_UNICODE *)
1118findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1119{
1120 /* like wcschr, but doesn't stop at NULL characters */
1121 while (size-- > 0) {
1122 if (*s == ch)
1123 return s;
1124 s++;
1125 }
1126 return NULL;
1127}
1128
Antoine Pitrou24f36292009-03-28 22:16:42 +00001129/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 underlying buffered object, though. */
1131static int
1132_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1133{
1134 PyObject *b, *ret;
1135
1136 if (self->pending_bytes == NULL)
1137 return 0;
1138 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1139 if (b == NULL)
1140 return -1;
1141 ret = PyObject_CallMethodObjArgs(self->buffer,
1142 _PyIO_str_write, b, NULL);
1143 Py_DECREF(b);
1144 if (ret == NULL)
1145 return -1;
1146 Py_DECREF(ret);
1147 Py_CLEAR(self->pending_bytes);
1148 self->pending_bytes_count = 0;
1149 return 0;
1150}
1151
1152static PyObject *
1153TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1154{
1155 PyObject *ret;
1156 PyObject *text; /* owned reference */
1157 PyObject *b;
1158 Py_ssize_t textlen;
1159 int haslf = 0;
1160 int needflush = 0;
1161
1162 CHECK_INITIALIZED(self);
1163
1164 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1165 return NULL;
1166 }
1167
1168 CHECK_CLOSED(self);
1169
1170 Py_INCREF(text);
1171
1172 textlen = PyUnicode_GetSize(text);
1173
1174 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1175 if (findchar(PyUnicode_AS_UNICODE(text),
1176 PyUnicode_GET_SIZE(text), '\n'))
1177 haslf = 1;
1178
1179 if (haslf && self->writetranslate && self->writenl != NULL) {
1180 PyObject *newtext = PyObject_CallMethod(
1181 text, "replace", "ss", "\n", self->writenl);
1182 Py_DECREF(text);
1183 if (newtext == NULL)
1184 return NULL;
1185 text = newtext;
1186 }
1187
1188 if (self->line_buffering &&
1189 (haslf ||
1190 findchar(PyUnicode_AS_UNICODE(text),
1191 PyUnicode_GET_SIZE(text), '\r')))
1192 needflush = 1;
1193
1194 /* XXX What if we were just reading? */
1195 if (self->encodefunc != NULL)
1196 b = (*self->encodefunc)((PyObject *) self, text);
1197 else
1198 b = PyObject_CallMethodObjArgs(self->encoder,
1199 _PyIO_str_encode, text, NULL);
1200 Py_DECREF(text);
1201 if (b == NULL)
1202 return NULL;
1203
1204 if (self->pending_bytes == NULL) {
1205 self->pending_bytes = PyList_New(0);
1206 if (self->pending_bytes == NULL) {
1207 Py_DECREF(b);
1208 return NULL;
1209 }
1210 self->pending_bytes_count = 0;
1211 }
1212 if (PyList_Append(self->pending_bytes, b) < 0) {
1213 Py_DECREF(b);
1214 return NULL;
1215 }
1216 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1217 Py_DECREF(b);
1218 if (self->pending_bytes_count > self->chunk_size || needflush) {
1219 if (_TextIOWrapper_writeflush(self) < 0)
1220 return NULL;
1221 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001222
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001223 if (needflush) {
1224 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1225 if (ret == NULL)
1226 return NULL;
1227 Py_DECREF(ret);
1228 }
1229
1230 Py_CLEAR(self->snapshot);
1231
1232 if (self->decoder) {
1233 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1234 if (ret == NULL)
1235 return NULL;
1236 Py_DECREF(ret);
1237 }
1238
1239 return PyLong_FromSsize_t(textlen);
1240}
1241
1242/* Steal a reference to chars and store it in the decoded_char buffer;
1243 */
1244static void
1245TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1246{
1247 Py_CLEAR(self->decoded_chars);
1248 self->decoded_chars = chars;
1249 self->decoded_chars_used = 0;
1250}
1251
1252static PyObject *
1253TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1254{
1255 PyObject *chars;
1256 Py_ssize_t avail;
1257
1258 if (self->decoded_chars == NULL)
1259 return PyUnicode_FromStringAndSize(NULL, 0);
1260
1261 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1262 - self->decoded_chars_used);
1263
1264 assert(avail >= 0);
1265
1266 if (n < 0 || n > avail)
1267 n = avail;
1268
1269 if (self->decoded_chars_used > 0 || n < avail) {
1270 chars = PyUnicode_FromUnicode(
1271 PyUnicode_AS_UNICODE(self->decoded_chars)
1272 + self->decoded_chars_used, n);
1273 if (chars == NULL)
1274 return NULL;
1275 }
1276 else {
1277 chars = self->decoded_chars;
1278 Py_INCREF(chars);
1279 }
1280
1281 self->decoded_chars_used += n;
1282 return chars;
1283}
1284
1285/* Read and decode the next chunk of data from the BufferedReader.
1286 */
1287static int
1288TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1289{
1290 PyObject *dec_buffer = NULL;
1291 PyObject *dec_flags = NULL;
1292 PyObject *input_chunk = NULL;
1293 PyObject *decoded_chars, *chunk_size;
1294 int eof;
1295
1296 /* The return value is True unless EOF was reached. The decoded string is
1297 * placed in self._decoded_chars (replacing its previous value). The
1298 * entire input chunk is sent to the decoder, though some of it may remain
1299 * buffered in the decoder, yet to be converted.
1300 */
1301
1302 if (self->decoder == NULL) {
1303 PyErr_SetString(PyExc_ValueError, "no decoder");
1304 return -1;
1305 }
1306
1307 if (self->telling) {
1308 /* To prepare for tell(), we need to snapshot a point in the file
1309 * where the decoder's input buffer is empty.
1310 */
1311
1312 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1313 _PyIO_str_getstate, NULL);
1314 if (state == NULL)
1315 return -1;
1316 /* Given this, we know there was a valid snapshot point
1317 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1318 */
1319 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1320 Py_DECREF(state);
1321 return -1;
1322 }
1323 Py_INCREF(dec_buffer);
1324 Py_INCREF(dec_flags);
1325 Py_DECREF(state);
1326 }
1327
1328 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1329 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1330 if (chunk_size == NULL)
1331 goto fail;
1332 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1333 _PyIO_str_read1, chunk_size, NULL);
1334 Py_DECREF(chunk_size);
1335 if (input_chunk == NULL)
1336 goto fail;
1337 assert(PyBytes_Check(input_chunk));
1338
1339 eof = (PyBytes_Size(input_chunk) == 0);
1340
1341 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1342 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1343 self->decoder, input_chunk, eof);
1344 }
1345 else {
1346 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1347 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1348 }
1349
1350 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1351 if (decoded_chars == NULL)
1352 goto fail;
1353 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1354 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1355 eof = 0;
1356
1357 if (self->telling) {
1358 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1359 * next input to be decoded is dec_buffer + input_chunk.
1360 */
1361 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1362 if (next_input == NULL)
1363 goto fail;
1364 assert (PyBytes_Check(next_input));
1365 Py_DECREF(dec_buffer);
1366 Py_CLEAR(self->snapshot);
1367 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1368 }
1369 Py_DECREF(input_chunk);
1370
1371 return (eof == 0);
1372
1373 fail:
1374 Py_XDECREF(dec_buffer);
1375 Py_XDECREF(dec_flags);
1376 Py_XDECREF(input_chunk);
1377 return -1;
1378}
1379
1380static PyObject *
1381TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1382{
1383 Py_ssize_t n = -1;
1384 PyObject *result = NULL, *chunks = NULL;
1385
1386 CHECK_INITIALIZED(self);
1387
1388 if (!PyArg_ParseTuple(args, "|n:read", &n))
1389 return NULL;
1390
1391 CHECK_CLOSED(self);
1392
Benjamin Petersona1b49012009-03-31 23:11:32 +00001393 if (self->decoder == NULL) {
1394 PyErr_SetString(PyExc_IOError, "not readable");
1395 return NULL;
1396 }
1397
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 if (_TextIOWrapper_writeflush(self) < 0)
1399 return NULL;
1400
1401 if (n < 0) {
1402 /* Read everything */
1403 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1404 PyObject *decoded;
1405 if (bytes == NULL)
1406 goto fail;
1407 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1408 bytes, Py_True, NULL);
1409 Py_DECREF(bytes);
1410 if (decoded == NULL)
1411 goto fail;
1412
1413 result = TextIOWrapper_get_decoded_chars(self, -1);
1414
1415 if (result == NULL) {
1416 Py_DECREF(decoded);
1417 return NULL;
1418 }
1419
1420 PyUnicode_AppendAndDel(&result, decoded);
1421 if (result == NULL)
1422 goto fail;
1423
1424 Py_CLEAR(self->snapshot);
1425 return result;
1426 }
1427 else {
1428 int res = 1;
1429 Py_ssize_t remaining = n;
1430
1431 result = TextIOWrapper_get_decoded_chars(self, n);
1432 if (result == NULL)
1433 goto fail;
1434 remaining -= PyUnicode_GET_SIZE(result);
1435
1436 /* Keep reading chunks until we have n characters to return */
1437 while (remaining > 0) {
1438 res = TextIOWrapper_read_chunk(self);
1439 if (res < 0)
1440 goto fail;
1441 if (res == 0) /* EOF */
1442 break;
1443 if (chunks == NULL) {
1444 chunks = PyList_New(0);
1445 if (chunks == NULL)
1446 goto fail;
1447 }
1448 if (PyList_Append(chunks, result) < 0)
1449 goto fail;
1450 Py_DECREF(result);
1451 result = TextIOWrapper_get_decoded_chars(self, remaining);
1452 if (result == NULL)
1453 goto fail;
1454 remaining -= PyUnicode_GET_SIZE(result);
1455 }
1456 if (chunks != NULL) {
1457 if (result != NULL && PyList_Append(chunks, result) < 0)
1458 goto fail;
1459 Py_CLEAR(result);
1460 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1461 if (result == NULL)
1462 goto fail;
1463 Py_CLEAR(chunks);
1464 }
1465 return result;
1466 }
1467 fail:
1468 Py_XDECREF(result);
1469 Py_XDECREF(chunks);
1470 return NULL;
1471}
1472
1473
1474/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1475 that is to the NUL character. Otherwise the function will produce
1476 incorrect results. */
1477static Py_UNICODE *
1478find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1479{
1480 Py_UNICODE *s = start;
1481 for (;;) {
1482 while (*s > ch)
1483 s++;
1484 if (*s == ch)
1485 return s;
1486 if (s == end)
1487 return NULL;
1488 s++;
1489 }
1490}
1491
1492Py_ssize_t
1493_PyIO_find_line_ending(
1494 int translated, int universal, PyObject *readnl,
1495 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1496{
1497 Py_ssize_t len = end - start;
1498
1499 if (translated) {
1500 /* Newlines are already translated, only search for \n */
1501 Py_UNICODE *pos = find_control_char(start, end, '\n');
1502 if (pos != NULL)
1503 return pos - start + 1;
1504 else {
1505 *consumed = len;
1506 return -1;
1507 }
1508 }
1509 else if (universal) {
1510 /* Universal newline search. Find any of \r, \r\n, \n
1511 * The decoder ensures that \r\n are not split in two pieces
1512 */
1513 Py_UNICODE *s = start;
1514 for (;;) {
1515 Py_UNICODE ch;
1516 /* Fast path for non-control chars. The loop always ends
1517 since the Py_UNICODE storage is NUL-terminated. */
1518 while (*s > '\r')
1519 s++;
1520 if (s >= end) {
1521 *consumed = len;
1522 return -1;
1523 }
1524 ch = *s++;
1525 if (ch == '\n')
1526 return s - start;
1527 if (ch == '\r') {
1528 if (*s == '\n')
1529 return s - start + 1;
1530 else
1531 return s - start;
1532 }
1533 }
1534 }
1535 else {
1536 /* Non-universal mode. */
1537 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1538 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1539 if (readnl_len == 1) {
1540 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1541 if (pos != NULL)
1542 return pos - start + 1;
1543 *consumed = len;
1544 return -1;
1545 }
1546 else {
1547 Py_UNICODE *s = start;
1548 Py_UNICODE *e = end - readnl_len + 1;
1549 Py_UNICODE *pos;
1550 if (e < s)
1551 e = s;
1552 while (s < e) {
1553 Py_ssize_t i;
1554 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1555 if (pos == NULL || pos >= e)
1556 break;
1557 for (i = 1; i < readnl_len; i++) {
1558 if (pos[i] != nl[i])
1559 break;
1560 }
1561 if (i == readnl_len)
1562 return pos - start + readnl_len;
1563 s = pos + 1;
1564 }
1565 pos = find_control_char(e, end, nl[0]);
1566 if (pos == NULL)
1567 *consumed = len;
1568 else
1569 *consumed = pos - start;
1570 return -1;
1571 }
1572 }
1573}
1574
1575static PyObject *
1576_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1577{
1578 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1579 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1580 int res;
1581
1582 CHECK_CLOSED(self);
1583
1584 if (_TextIOWrapper_writeflush(self) < 0)
1585 return NULL;
1586
1587 chunked = 0;
1588
1589 while (1) {
1590 Py_UNICODE *ptr;
1591 Py_ssize_t line_len;
1592 Py_ssize_t consumed = 0;
1593
1594 /* First, get some data if necessary */
1595 res = 1;
1596 while (!self->decoded_chars ||
1597 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1598 res = TextIOWrapper_read_chunk(self);
1599 if (res < 0)
1600 goto error;
1601 if (res == 0)
1602 break;
1603 }
1604 if (res == 0) {
1605 /* end of file */
1606 TextIOWrapper_set_decoded_chars(self, NULL);
1607 Py_CLEAR(self->snapshot);
1608 start = endpos = offset_to_buffer = 0;
1609 break;
1610 }
1611
1612 if (remaining == NULL) {
1613 line = self->decoded_chars;
1614 start = self->decoded_chars_used;
1615 offset_to_buffer = 0;
1616 Py_INCREF(line);
1617 }
1618 else {
1619 assert(self->decoded_chars_used == 0);
1620 line = PyUnicode_Concat(remaining, self->decoded_chars);
1621 start = 0;
1622 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1623 Py_CLEAR(remaining);
1624 if (line == NULL)
1625 goto error;
1626 }
1627
1628 ptr = PyUnicode_AS_UNICODE(line);
1629 line_len = PyUnicode_GET_SIZE(line);
1630
1631 endpos = _PyIO_find_line_ending(
1632 self->readtranslate, self->readuniversal, self->readnl,
1633 ptr + start, ptr + line_len, &consumed);
1634 if (endpos >= 0) {
1635 endpos += start;
1636 if (limit >= 0 && (endpos - start) + chunked >= limit)
1637 endpos = start + limit - chunked;
1638 break;
1639 }
1640
1641 /* We can put aside up to `endpos` */
1642 endpos = consumed + start;
1643 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1644 /* Didn't find line ending, but reached length limit */
1645 endpos = start + limit - chunked;
1646 break;
1647 }
1648
1649 if (endpos > start) {
1650 /* No line ending seen yet - put aside current data */
1651 PyObject *s;
1652 if (chunks == NULL) {
1653 chunks = PyList_New(0);
1654 if (chunks == NULL)
1655 goto error;
1656 }
1657 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1658 if (s == NULL)
1659 goto error;
1660 if (PyList_Append(chunks, s) < 0) {
1661 Py_DECREF(s);
1662 goto error;
1663 }
1664 chunked += PyUnicode_GET_SIZE(s);
1665 Py_DECREF(s);
1666 }
1667 /* There may be some remaining bytes we'll have to prepend to the
1668 next chunk of data */
1669 if (endpos < line_len) {
1670 remaining = PyUnicode_FromUnicode(
1671 ptr + endpos, line_len - endpos);
1672 if (remaining == NULL)
1673 goto error;
1674 }
1675 Py_CLEAR(line);
1676 /* We have consumed the buffer */
1677 TextIOWrapper_set_decoded_chars(self, NULL);
1678 }
1679
1680 if (line != NULL) {
1681 /* Our line ends in the current buffer */
1682 self->decoded_chars_used = endpos - offset_to_buffer;
1683 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1684 if (start == 0 && Py_REFCNT(line) == 1) {
1685 if (PyUnicode_Resize(&line, endpos) < 0)
1686 goto error;
1687 }
1688 else {
1689 PyObject *s = PyUnicode_FromUnicode(
1690 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1691 Py_CLEAR(line);
1692 if (s == NULL)
1693 goto error;
1694 line = s;
1695 }
1696 }
1697 }
1698 if (remaining != NULL) {
1699 if (chunks == NULL) {
1700 chunks = PyList_New(0);
1701 if (chunks == NULL)
1702 goto error;
1703 }
1704 if (PyList_Append(chunks, remaining) < 0)
1705 goto error;
1706 Py_CLEAR(remaining);
1707 }
1708 if (chunks != NULL) {
1709 if (line != NULL && PyList_Append(chunks, line) < 0)
1710 goto error;
1711 Py_CLEAR(line);
1712 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1713 if (line == NULL)
1714 goto error;
1715 Py_DECREF(chunks);
1716 }
1717 if (line == NULL)
1718 line = PyUnicode_FromStringAndSize(NULL, 0);
1719
1720 return line;
1721
1722 error:
1723 Py_XDECREF(chunks);
1724 Py_XDECREF(remaining);
1725 Py_XDECREF(line);
1726 return NULL;
1727}
1728
1729static PyObject *
1730TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1731{
1732 Py_ssize_t limit = -1;
1733
1734 CHECK_INITIALIZED(self);
1735 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1736 return NULL;
1737 }
1738 return _TextIOWrapper_readline(self, limit);
1739}
1740
1741/* Seek and Tell */
1742
1743typedef struct {
1744 Py_off_t start_pos;
1745 int dec_flags;
1746 int bytes_to_feed;
1747 int chars_to_skip;
1748 char need_eof;
1749} CookieStruct;
1750
1751/*
1752 To speed up cookie packing/unpacking, we store the fields in a temporary
1753 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1754 The following macros define at which offsets in the intermediary byte
1755 string the various CookieStruct fields will be stored.
1756 */
1757
1758#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1759
1760#if defined(WORDS_BIGENDIAN)
1761
1762# define IS_LITTLE_ENDIAN 0
1763
1764/* We want the least significant byte of start_pos to also be the least
1765 significant byte of the cookie, which means that in big-endian mode we
1766 must copy the fields in reverse order. */
1767
1768# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1769# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1770# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1771# define OFF_CHARS_TO_SKIP (sizeof(char))
1772# define OFF_NEED_EOF 0
1773
1774#else
1775
1776# define IS_LITTLE_ENDIAN 1
1777
1778/* Little-endian mode: the least significant byte of start_pos will
1779 naturally end up the least significant byte of the cookie. */
1780
1781# define OFF_START_POS 0
1782# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1783# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1784# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1785# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1786
1787#endif
1788
1789static int
1790TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1791{
1792 unsigned char buffer[COOKIE_BUF_LEN];
1793 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1794 if (cookieLong == NULL)
1795 return -1;
1796
1797 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1798 IS_LITTLE_ENDIAN, 0) < 0) {
1799 Py_DECREF(cookieLong);
1800 return -1;
1801 }
1802 Py_DECREF(cookieLong);
1803
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001804 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1805 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1806 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1807 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1808 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809
1810 return 0;
1811}
1812
1813static PyObject *
1814TextIOWrapper_buildCookie(CookieStruct *cookie)
1815{
1816 unsigned char buffer[COOKIE_BUF_LEN];
1817
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001818 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1819 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1820 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1821 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1822 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823
1824 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1825}
1826#undef IS_LITTLE_ENDIAN
1827
1828static int
1829_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1830 CookieStruct *cookie)
1831{
1832 PyObject *res;
1833 /* When seeking to the start of the stream, we call decoder.reset()
1834 rather than decoder.getstate().
1835 This is for a few decoders such as utf-16 for which the state value
1836 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1837 utf-16, that we are expecting a BOM).
1838 */
1839 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1840 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1841 else
1842 res = PyObject_CallMethod(self->decoder, "setstate",
1843 "((yi))", "", cookie->dec_flags);
1844 if (res == NULL)
1845 return -1;
1846 Py_DECREF(res);
1847 return 0;
1848}
1849
1850static PyObject *
1851TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1852{
1853 PyObject *cookieObj, *posobj;
1854 CookieStruct cookie;
1855 int whence = 0;
1856 static PyObject *zero = NULL;
1857 PyObject *res;
1858 int cmp;
1859
1860 CHECK_INITIALIZED(self);
1861
1862 if (zero == NULL) {
1863 zero = PyLong_FromLong(0L);
1864 if (zero == NULL)
1865 return NULL;
1866 }
1867
1868 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1869 return NULL;
1870 CHECK_CLOSED(self);
1871
1872 Py_INCREF(cookieObj);
1873
1874 if (!self->seekable) {
1875 PyErr_SetString(PyExc_IOError,
1876 "underlying stream is not seekable");
1877 goto fail;
1878 }
1879
1880 if (whence == 1) {
1881 /* seek relative to current position */
1882 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1883 if (cmp < 0)
1884 goto fail;
1885
1886 if (cmp == 0) {
1887 PyErr_SetString(PyExc_IOError,
1888 "can't do nonzero cur-relative seeks");
1889 goto fail;
1890 }
1891
1892 /* Seeking to the current position should attempt to
1893 * sync the underlying buffer with the current position.
1894 */
1895 Py_DECREF(cookieObj);
1896 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1897 if (cookieObj == NULL)
1898 goto fail;
1899 }
1900 else if (whence == 2) {
1901 /* seek relative to end of file */
1902
1903 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
1904 if (cmp < 0)
1905 goto fail;
1906
1907 if (cmp == 0) {
1908 PyErr_SetString(PyExc_IOError,
1909 "can't do nonzero end-relative seeks");
1910 goto fail;
1911 }
1912
1913 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1914 if (res == NULL)
1915 goto fail;
1916 Py_DECREF(res);
1917
1918 TextIOWrapper_set_decoded_chars(self, NULL);
1919 Py_CLEAR(self->snapshot);
1920 if (self->decoder) {
1921 res = PyObject_CallMethod(self->decoder, "reset", NULL);
1922 if (res == NULL)
1923 goto fail;
1924 Py_DECREF(res);
1925 }
1926
1927 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
1928 Py_XDECREF(cookieObj);
1929 return res;
1930 }
1931 else if (whence != 0) {
1932 PyErr_Format(PyExc_ValueError,
1933 "invalid whence (%d, should be 0, 1 or 2)", whence);
1934 goto fail;
1935 }
1936
1937 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
1938 if (cmp < 0)
1939 goto fail;
1940
1941 if (cmp == 1) {
1942 PyErr_Format(PyExc_ValueError,
1943 "negative seek position %R", cookieObj);
1944 goto fail;
1945 }
1946
1947 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1948 if (res == NULL)
1949 goto fail;
1950 Py_DECREF(res);
1951
1952 /* The strategy of seek() is to go back to the safe start point
1953 * and replay the effect of read(chars_to_skip) from there.
1954 */
1955 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
1956 goto fail;
1957
1958 /* Seek back to the safe start point. */
1959 posobj = PyLong_FromOff_t(cookie.start_pos);
1960 if (posobj == NULL)
1961 goto fail;
1962 res = PyObject_CallMethodObjArgs(self->buffer,
1963 _PyIO_str_seek, posobj, NULL);
1964 Py_DECREF(posobj);
1965 if (res == NULL)
1966 goto fail;
1967 Py_DECREF(res);
1968
1969 TextIOWrapper_set_decoded_chars(self, NULL);
1970 Py_CLEAR(self->snapshot);
1971
1972 /* Restore the decoder to its state from the safe start point. */
1973 if (self->decoder) {
1974 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
1975 goto fail;
1976 }
1977
1978 if (cookie.chars_to_skip) {
1979 /* Just like _read_chunk, feed the decoder and save a snapshot. */
1980 PyObject *input_chunk = PyObject_CallMethod(
1981 self->buffer, "read", "i", cookie.bytes_to_feed);
1982 PyObject *decoded;
1983
1984 if (input_chunk == NULL)
1985 goto fail;
1986
1987 assert (PyBytes_Check(input_chunk));
1988
1989 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
1990 if (self->snapshot == NULL) {
1991 Py_DECREF(input_chunk);
1992 goto fail;
1993 }
1994
1995 decoded = PyObject_CallMethod(self->decoder, "decode",
1996 "Oi", input_chunk, (int)cookie.need_eof);
1997
1998 if (decoded == NULL)
1999 goto fail;
2000
2001 TextIOWrapper_set_decoded_chars(self, decoded);
2002
2003 /* Skip chars_to_skip of the decoded characters. */
2004 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2005 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2006 goto fail;
2007 }
2008 self->decoded_chars_used = cookie.chars_to_skip;
2009 }
2010 else {
2011 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2012 if (self->snapshot == NULL)
2013 goto fail;
2014 }
2015
2016 return cookieObj;
2017 fail:
2018 Py_XDECREF(cookieObj);
2019 return NULL;
2020
2021}
2022
2023static PyObject *
2024TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
2025{
2026 PyObject *res;
2027 PyObject *posobj = NULL;
2028 CookieStruct cookie = {0,0,0,0,0};
2029 PyObject *next_input;
2030 Py_ssize_t chars_to_skip, chars_decoded;
2031 PyObject *saved_state = NULL;
2032 char *input, *input_end;
2033
2034 CHECK_INITIALIZED(self);
2035 CHECK_CLOSED(self);
2036
2037 if (!self->seekable) {
2038 PyErr_SetString(PyExc_IOError,
2039 "underlying stream is not seekable");
2040 goto fail;
2041 }
2042 if (!self->telling) {
2043 PyErr_SetString(PyExc_IOError,
2044 "telling position disabled by next() call");
2045 goto fail;
2046 }
2047
2048 if (_TextIOWrapper_writeflush(self) < 0)
2049 return NULL;
2050 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2051 if (res == NULL)
2052 goto fail;
2053 Py_DECREF(res);
2054
2055 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2056 if (posobj == NULL)
2057 goto fail;
2058
2059 if (self->decoder == NULL || self->snapshot == NULL) {
2060 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2061 return posobj;
2062 }
2063
2064#if defined(HAVE_LARGEFILE_SUPPORT)
2065 cookie.start_pos = PyLong_AsLongLong(posobj);
2066#else
2067 cookie.start_pos = PyLong_AsLong(posobj);
2068#endif
2069 if (PyErr_Occurred())
2070 goto fail;
2071
2072 /* Skip backward to the snapshot point (see _read_chunk). */
2073 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2074 goto fail;
2075
2076 assert (PyBytes_Check(next_input));
2077
2078 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2079
2080 /* How many decoded characters have been used up since the snapshot? */
2081 if (self->decoded_chars_used == 0) {
2082 /* We haven't moved from the snapshot point. */
2083 Py_DECREF(posobj);
2084 return TextIOWrapper_buildCookie(&cookie);
2085 }
2086
2087 chars_to_skip = self->decoded_chars_used;
2088
2089 /* Starting from the snapshot position, we will walk the decoder
2090 * forward until it gives us enough decoded characters.
2091 */
2092 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2093 _PyIO_str_getstate, NULL);
2094 if (saved_state == NULL)
2095 goto fail;
2096
2097 /* Note our initial start point. */
2098 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2099 goto fail;
2100
2101 /* Feed the decoder one byte at a time. As we go, note the
2102 * nearest "safe start point" before the current location
2103 * (a point where the decoder has nothing buffered, so seek()
2104 * can safely start from there and advance to this location).
2105 */
2106 chars_decoded = 0;
2107 input = PyBytes_AS_STRING(next_input);
2108 input_end = input + PyBytes_GET_SIZE(next_input);
2109 while (input < input_end) {
2110 PyObject *state;
2111 char *dec_buffer;
2112 Py_ssize_t dec_buffer_len;
2113 int dec_flags;
2114
2115 PyObject *decoded = PyObject_CallMethod(
2116 self->decoder, "decode", "y#", input, 1);
2117 if (decoded == NULL)
2118 goto fail;
2119 assert (PyUnicode_Check(decoded));
2120 chars_decoded += PyUnicode_GET_SIZE(decoded);
2121 Py_DECREF(decoded);
2122
2123 cookie.bytes_to_feed += 1;
2124
2125 state = PyObject_CallMethodObjArgs(self->decoder,
2126 _PyIO_str_getstate, NULL);
2127 if (state == NULL)
2128 goto fail;
2129 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2130 Py_DECREF(state);
2131 goto fail;
2132 }
2133 Py_DECREF(state);
2134
2135 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2136 /* Decoder buffer is empty, so this is a safe start point. */
2137 cookie.start_pos += cookie.bytes_to_feed;
2138 chars_to_skip -= chars_decoded;
2139 cookie.dec_flags = dec_flags;
2140 cookie.bytes_to_feed = 0;
2141 chars_decoded = 0;
2142 }
2143 if (chars_decoded >= chars_to_skip)
2144 break;
2145 input++;
2146 }
2147 if (input == input_end) {
2148 /* We didn't get enough decoded data; signal EOF to get more. */
2149 PyObject *decoded = PyObject_CallMethod(
2150 self->decoder, "decode", "yi", "", /* final = */ 1);
2151 if (decoded == NULL)
2152 goto fail;
2153 assert (PyUnicode_Check(decoded));
2154 chars_decoded += PyUnicode_GET_SIZE(decoded);
2155 Py_DECREF(decoded);
2156 cookie.need_eof = 1;
2157
2158 if (chars_decoded < chars_to_skip) {
2159 PyErr_SetString(PyExc_IOError,
2160 "can't reconstruct logical file position");
2161 goto fail;
2162 }
2163 }
2164
2165 /* finally */
2166 Py_XDECREF(posobj);
2167 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2168 Py_DECREF(saved_state);
2169 if (res == NULL)
2170 return NULL;
2171 Py_DECREF(res);
2172
2173 /* The returned cookie corresponds to the last safe start point. */
2174 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2175 return TextIOWrapper_buildCookie(&cookie);
2176
2177 fail:
2178 Py_XDECREF(posobj);
2179 if (saved_state) {
2180 PyObject *type, *value, *traceback;
2181 PyErr_Fetch(&type, &value, &traceback);
2182
2183 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2184 Py_DECREF(saved_state);
2185 if (res == NULL)
2186 return NULL;
2187 Py_DECREF(res);
2188
2189 PyErr_Restore(type, value, traceback);
2190 }
2191 return NULL;
2192}
2193
2194static PyObject *
2195TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2196{
2197 PyObject *pos = Py_None;
2198 PyObject *res;
2199
2200 CHECK_INITIALIZED(self)
2201 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2202 return NULL;
2203 }
2204
2205 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2206 if (res == NULL)
2207 return NULL;
2208 Py_DECREF(res);
2209
2210 if (pos != Py_None) {
2211 res = PyObject_CallMethodObjArgs((PyObject *) self,
2212 _PyIO_str_seek, pos, NULL);
2213 if (res == NULL)
2214 return NULL;
2215 Py_DECREF(res);
2216 }
2217
2218 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2219}
2220
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002221static PyObject *
2222TextIOWrapper_repr(PyTextIOWrapperObject *self)
2223{
2224 CHECK_INITIALIZED(self);
2225 return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self->encoding);
2226}
2227
2228
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229/* Inquiries */
2230
2231static PyObject *
2232TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2233{
2234 CHECK_INITIALIZED(self);
2235 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2236}
2237
2238static PyObject *
2239TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2240{
2241 CHECK_INITIALIZED(self);
2242 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2243}
2244
2245static PyObject *
2246TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2247{
2248 CHECK_INITIALIZED(self);
2249 return PyObject_CallMethod(self->buffer, "readable", NULL);
2250}
2251
2252static PyObject *
2253TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2254{
2255 CHECK_INITIALIZED(self);
2256 return PyObject_CallMethod(self->buffer, "writable", NULL);
2257}
2258
2259static PyObject *
2260TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2261{
2262 CHECK_INITIALIZED(self);
2263 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2264}
2265
2266static PyObject *
2267TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2268{
2269 CHECK_INITIALIZED(self);
2270 CHECK_CLOSED(self);
2271 self->telling = self->seekable;
2272 if (_TextIOWrapper_writeflush(self) < 0)
2273 return NULL;
2274 return PyObject_CallMethod(self->buffer, "flush", NULL);
2275}
2276
2277static PyObject *
2278TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2279{
2280 PyObject *res;
2281 CHECK_INITIALIZED(self);
2282 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2283 if (res == NULL) {
2284 /* If flush() fails, just give up */
2285 PyErr_Clear();
2286 }
2287 else
2288 Py_DECREF(res);
2289
2290 return PyObject_CallMethod(self->buffer, "close", NULL);
2291}
2292
2293static PyObject *
2294TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2295{
2296 PyObject *line;
2297
2298 CHECK_INITIALIZED(self);
2299
2300 self->telling = 0;
2301 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2302 /* Skip method call overhead for speed */
2303 line = _TextIOWrapper_readline(self, -1);
2304 }
2305 else {
2306 line = PyObject_CallMethodObjArgs((PyObject *)self,
2307 _PyIO_str_readline, NULL);
2308 if (line && !PyUnicode_Check(line)) {
2309 PyErr_Format(PyExc_IOError,
2310 "readline() should have returned an str object, "
2311 "not '%.200s'", Py_TYPE(line)->tp_name);
2312 Py_DECREF(line);
2313 return NULL;
2314 }
2315 }
2316
2317 if (line == NULL)
2318 return NULL;
2319
2320 if (PyUnicode_GET_SIZE(line) == 0) {
2321 /* Reached EOF or would have blocked */
2322 Py_DECREF(line);
2323 Py_CLEAR(self->snapshot);
2324 self->telling = self->seekable;
2325 return NULL;
2326 }
2327
2328 return line;
2329}
2330
2331static PyObject *
2332TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2333{
2334 CHECK_INITIALIZED(self);
2335 return PyObject_GetAttrString(self->buffer, "name");
2336}
2337
2338static PyObject *
2339TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2340{
2341 CHECK_INITIALIZED(self);
2342 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2343}
2344
2345static PyObject *
2346TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2347{
2348 PyObject *res;
2349 CHECK_INITIALIZED(self);
2350 if (self->decoder == NULL)
2351 Py_RETURN_NONE;
2352 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2353 if (res == NULL) {
2354 PyErr_Clear();
2355 Py_RETURN_NONE;
2356 }
2357 return res;
2358}
2359
2360static PyObject *
2361TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2362{
2363 CHECK_INITIALIZED(self);
2364 return PyLong_FromSsize_t(self->chunk_size);
2365}
2366
2367static int
2368TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2369 PyObject *arg, void *context)
2370{
2371 Py_ssize_t n;
2372 CHECK_INITIALIZED_INT(self);
2373 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2374 if (n == -1 && PyErr_Occurred())
2375 return -1;
2376 if (n <= 0) {
2377 PyErr_SetString(PyExc_ValueError,
2378 "a strictly positive integer is required");
2379 return -1;
2380 }
2381 self->chunk_size = n;
2382 return 0;
2383}
2384
2385static PyMethodDef TextIOWrapper_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002386 {"detach", (PyCFunction)TextIOWrapper_detach, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002387 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2388 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2389 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2390 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2391 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2392
2393 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2394 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2395 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2396 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2397 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2398
2399 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2400 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2401 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2402 {NULL, NULL}
2403};
2404
2405static PyMemberDef TextIOWrapper_members[] = {
2406 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2407 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2408 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2409 {NULL}
2410};
2411
2412static PyGetSetDef TextIOWrapper_getset[] = {
2413 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2414 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2415/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2416*/
2417 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2418 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2419 (setter)TextIOWrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002420 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002421};
2422
2423PyTypeObject PyTextIOWrapper_Type = {
2424 PyVarObject_HEAD_INIT(NULL, 0)
2425 "_io.TextIOWrapper", /*tp_name*/
2426 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2427 0, /*tp_itemsize*/
2428 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2429 0, /*tp_print*/
2430 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002431 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002432 0, /*tp_compare */
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002433 (reprfunc)TextIOWrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434 0, /*tp_as_number*/
2435 0, /*tp_as_sequence*/
2436 0, /*tp_as_mapping*/
2437 0, /*tp_hash */
2438 0, /*tp_call*/
2439 0, /*tp_str*/
2440 0, /*tp_getattro*/
2441 0, /*tp_setattro*/
2442 0, /*tp_as_buffer*/
2443 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2444 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2445 TextIOWrapper_doc, /* tp_doc */
2446 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2447 (inquiry)TextIOWrapper_clear, /* tp_clear */
2448 0, /* tp_richcompare */
2449 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2450 0, /* tp_iter */
2451 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2452 TextIOWrapper_methods, /* tp_methods */
2453 TextIOWrapper_members, /* tp_members */
2454 TextIOWrapper_getset, /* tp_getset */
2455 0, /* tp_base */
2456 0, /* tp_dict */
2457 0, /* tp_descr_get */
2458 0, /* tp_descr_set */
2459 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2460 (initproc)TextIOWrapper_init, /* tp_init */
2461 0, /* tp_alloc */
2462 PyType_GenericNew, /* tp_new */
2463};