blob: c8d28334c1456c7cfdc6f0fa328585bb03c4e49d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Petersond2e0c792009-05-01 20:40:59 +000031PyDoc_STRVAR(TextIOBase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39TextIOBase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044PyDoc_STRVAR(TextIOBase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52TextIOBase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(TextIOBase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64TextIOBase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(TextIOBase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76TextIOBase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(TextIOBase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88TextIOBase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(TextIOBase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102TextIOBase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107
108static PyMethodDef TextIOBase_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000109 {"detach", (PyCFunction)TextIOBase_detach, METH_NOARGS, TextIOBase_detach_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
111 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
112 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
113 {NULL, NULL}
114};
115
116static PyGetSetDef TextIOBase_getset[] = {
117 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
118 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000119 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120};
121
122PyTypeObject PyTextIOBase_Type = {
123 PyVarObject_HEAD_INIT(NULL, 0)
124 "_io._TextIOBase", /*tp_name*/
125 0, /*tp_basicsize*/
126 0, /*tp_itemsize*/
127 0, /*tp_dealloc*/
128 0, /*tp_print*/
129 0, /*tp_getattr*/
130 0, /*tp_setattr*/
131 0, /*tp_compare */
132 0, /*tp_repr*/
133 0, /*tp_as_number*/
134 0, /*tp_as_sequence*/
135 0, /*tp_as_mapping*/
136 0, /*tp_hash */
137 0, /*tp_call*/
138 0, /*tp_str*/
139 0, /*tp_getattro*/
140 0, /*tp_setattro*/
141 0, /*tp_as_buffer*/
142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
143 TextIOBase_doc, /* tp_doc */
144 0, /* tp_traverse */
145 0, /* tp_clear */
146 0, /* tp_richcompare */
147 0, /* tp_weaklistoffset */
148 0, /* tp_iter */
149 0, /* tp_iternext */
150 TextIOBase_methods, /* tp_methods */
151 0, /* tp_members */
152 TextIOBase_getset, /* tp_getset */
153 &PyIOBase_Type, /* tp_base */
154 0, /* tp_dict */
155 0, /* tp_descr_get */
156 0, /* tp_descr_set */
157 0, /* tp_dictoffset */
158 0, /* tp_init */
159 0, /* tp_alloc */
160 0, /* tp_new */
161};
162
163
164/* IncrementalNewlineDecoder */
165
166PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
167 "Codec used when reading a file in universal newlines mode. It wraps\n"
168 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
169 "records the types of newlines encountered. When used with\n"
170 "translate=False, it ensures that the newline sequence is returned in\n"
171 "one piece. When used with decoder=None, it expects unicode strings as\n"
172 "decode input and translates newlines without first invoking an external\n"
173 "decoder.\n"
174 );
175
176typedef struct {
177 PyObject_HEAD
178 PyObject *decoder;
179 PyObject *errors;
180 int pendingcr:1;
181 int translate:1;
182 unsigned int seennl:3;
183} PyNewLineDecoderObject;
184
185static int
Antoine Pitrou24f36292009-03-28 22:16:42 +0000186IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 PyObject *args, PyObject *kwds)
188{
189 PyObject *decoder;
190 int translate;
191 PyObject *errors = NULL;
192 char *kwlist[] = {"decoder", "translate", "errors", NULL};
193
194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
195 kwlist, &decoder, &translate, &errors))
196 return -1;
197
198 self->decoder = decoder;
199 Py_INCREF(decoder);
200
201 if (errors == NULL) {
202 self->errors = PyUnicode_FromString("strict");
203 if (self->errors == NULL)
204 return -1;
205 }
206 else {
207 Py_INCREF(errors);
208 self->errors = errors;
209 }
210
211 self->translate = translate;
212 self->seennl = 0;
213 self->pendingcr = 0;
214
215 return 0;
216}
217
218static void
219IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
220{
221 Py_CLEAR(self->decoder);
222 Py_CLEAR(self->errors);
223 Py_TYPE(self)->tp_free((PyObject *)self);
224}
225
226#define SEEN_CR 1
227#define SEEN_LF 2
228#define SEEN_CRLF 4
229#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
230
231PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000232_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233 PyObject *input, int final)
234{
235 PyObject *output;
236 Py_ssize_t output_len;
237 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
238
239 if (self->decoder == NULL) {
240 PyErr_SetString(PyExc_ValueError,
241 "IncrementalNewlineDecoder.__init__ not called");
242 return NULL;
243 }
244
245 /* decode input (with the eventual \r from a previous pass) */
246 if (self->decoder != Py_None) {
247 output = PyObject_CallMethodObjArgs(self->decoder,
248 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
249 }
250 else {
251 output = input;
252 Py_INCREF(output);
253 }
254
255 if (output == NULL)
256 return NULL;
257
258 if (!PyUnicode_Check(output)) {
259 PyErr_SetString(PyExc_TypeError,
260 "decoder should return a string result");
261 goto error;
262 }
263
264 output_len = PyUnicode_GET_SIZE(output);
265 if (self->pendingcr && (final || output_len > 0)) {
266 Py_UNICODE *out;
267 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
268 if (modified == NULL)
269 goto error;
270 out = PyUnicode_AS_UNICODE(modified);
271 out[0] = '\r';
272 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
273 output_len * sizeof(Py_UNICODE));
274 Py_DECREF(output);
275 output = modified;
276 self->pendingcr = 0;
277 output_len++;
278 }
279
280 /* retain last \r even when not translating data:
281 * then readline() is sure to get \r\n in one pass
282 */
283 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000284 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000285 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
286
287 if (Py_REFCNT(output) == 1) {
288 if (PyUnicode_Resize(&output, output_len - 1) < 0)
289 goto error;
290 }
291 else {
292 PyObject *modified = PyUnicode_FromUnicode(
293 PyUnicode_AS_UNICODE(output),
294 output_len - 1);
295 if (modified == NULL)
296 goto error;
297 Py_DECREF(output);
298 output = modified;
299 }
300 self->pendingcr = 1;
301 }
302 }
303
304 /* Record which newlines are read and do newline translation if desired,
305 all in one pass. */
306 {
307 Py_UNICODE *in_str;
308 Py_ssize_t len;
309 int seennl = self->seennl;
310 int only_lf = 0;
311
312 in_str = PyUnicode_AS_UNICODE(output);
313 len = PyUnicode_GET_SIZE(output);
314
315 if (len == 0)
316 return output;
317
318 /* If, up to now, newlines are consistently \n, do a quick check
319 for the \r *byte* with the libc's optimized memchr.
320 */
321 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000322 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 }
324
Antoine Pitrou66913e22009-03-06 23:40:56 +0000325 if (only_lf) {
326 /* If not already seen, quick scan for a possible "\n" character.
327 (there's nothing else to be done, even when in translation mode)
328 */
329 if (seennl == 0 &&
330 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
331 Py_UNICODE *s, *end;
332 s = in_str;
333 end = in_str + len;
334 for (;;) {
335 Py_UNICODE c;
336 /* Fast loop for non-control characters */
337 while (*s > '\n')
338 s++;
339 c = *s++;
340 if (c == '\n') {
341 seennl |= SEEN_LF;
342 break;
343 }
344 if (s > end)
345 break;
346 }
347 }
348 /* Finished: we have scanned for newlines, and none of them
349 need translating */
350 }
351 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000353 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 if (seennl == SEEN_ALL)
355 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 s = in_str;
357 end = in_str + len;
358 for (;;) {
359 Py_UNICODE c;
360 /* Fast loop for non-control characters */
361 while (*s > '\r')
362 s++;
363 c = *s++;
364 if (c == '\n')
365 seennl |= SEEN_LF;
366 else if (c == '\r') {
367 if (*s == '\n') {
368 seennl |= SEEN_CRLF;
369 s++;
370 }
371 else
372 seennl |= SEEN_CR;
373 }
374 if (s > end)
375 break;
376 if (seennl == SEEN_ALL)
377 break;
378 }
379 endscan:
380 ;
381 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000382 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 PyObject *translated = NULL;
384 Py_UNICODE *out_str;
385 Py_UNICODE *in, *out, *end;
386 if (Py_REFCNT(output) != 1) {
387 /* We could try to optimize this so that we only do a copy
388 when there is something to translate. On the other hand,
389 most decoders should only output non-shared strings, i.e.
390 translation is done in place. */
391 translated = PyUnicode_FromUnicode(NULL, len);
392 if (translated == NULL)
393 goto error;
394 assert(Py_REFCNT(translated) == 1);
395 memcpy(PyUnicode_AS_UNICODE(translated),
396 PyUnicode_AS_UNICODE(output),
397 len * sizeof(Py_UNICODE));
398 }
399 else {
400 translated = output;
401 }
402 out_str = PyUnicode_AS_UNICODE(translated);
403 in = in_str;
404 out = out_str;
405 end = in_str + len;
406 for (;;) {
407 Py_UNICODE c;
408 /* Fast loop for non-control characters */
409 while ((c = *in++) > '\r')
410 *out++ = c;
411 if (c == '\n') {
412 *out++ = c;
413 seennl |= SEEN_LF;
414 continue;
415 }
416 if (c == '\r') {
417 if (*in == '\n') {
418 in++;
419 seennl |= SEEN_CRLF;
420 }
421 else
422 seennl |= SEEN_CR;
423 *out++ = '\n';
424 continue;
425 }
426 if (in > end)
427 break;
428 *out++ = c;
429 }
430 if (translated != output) {
431 Py_DECREF(output);
432 output = translated;
433 }
434 if (out - out_str != len) {
435 if (PyUnicode_Resize(&output, out - out_str) < 0)
436 goto error;
437 }
438 }
439 self->seennl |= seennl;
440 }
441
442 return output;
443
444 error:
445 Py_DECREF(output);
446 return NULL;
447}
448
449static PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000450IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 PyObject *args, PyObject *kwds)
452{
453 char *kwlist[] = {"input", "final", NULL};
454 PyObject *input;
455 int final = 0;
456
457 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
458 kwlist, &input, &final))
459 return NULL;
460 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
461}
462
463static PyObject *
464IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
465{
466 PyObject *buffer;
467 unsigned PY_LONG_LONG flag;
468
469 if (self->decoder != Py_None) {
470 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
471 _PyIO_str_getstate, NULL);
472 if (state == NULL)
473 return NULL;
474 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
475 Py_DECREF(state);
476 return NULL;
477 }
478 Py_INCREF(buffer);
479 Py_DECREF(state);
480 }
481 else {
482 buffer = PyBytes_FromString("");
483 flag = 0;
484 }
485 flag <<= 1;
486 if (self->pendingcr)
487 flag |= 1;
488 return Py_BuildValue("NK", buffer, flag);
489}
490
491static PyObject *
492IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
493{
494 PyObject *buffer;
495 unsigned PY_LONG_LONG flag;
496
497 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
498 return NULL;
499
500 self->pendingcr = (int) flag & 1;
501 flag >>= 1;
502
503 if (self->decoder != Py_None)
504 return PyObject_CallMethod(self->decoder,
505 "setstate", "((OK))", buffer, flag);
506 else
507 Py_RETURN_NONE;
508}
509
510static PyObject *
511IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
512{
513 self->seennl = 0;
514 self->pendingcr = 0;
515 if (self->decoder != Py_None)
516 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
517 else
518 Py_RETURN_NONE;
519}
520
521static PyObject *
522IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
523{
524 switch (self->seennl) {
525 case SEEN_CR:
526 return PyUnicode_FromString("\r");
527 case SEEN_LF:
528 return PyUnicode_FromString("\n");
529 case SEEN_CRLF:
530 return PyUnicode_FromString("\r\n");
531 case SEEN_CR | SEEN_LF:
532 return Py_BuildValue("ss", "\r", "\n");
533 case SEEN_CR | SEEN_CRLF:
534 return Py_BuildValue("ss", "\r", "\r\n");
535 case SEEN_LF | SEEN_CRLF:
536 return Py_BuildValue("ss", "\n", "\r\n");
537 case SEEN_CR | SEEN_LF | SEEN_CRLF:
538 return Py_BuildValue("sss", "\r", "\n", "\r\n");
539 default:
540 Py_RETURN_NONE;
541 }
542
543}
544
545
546static PyMethodDef IncrementalNewlineDecoder_methods[] = {
547 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
548 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
549 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
550 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000551 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000552};
553
554static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
555 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000556 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557};
558
559PyTypeObject PyIncrementalNewlineDecoder_Type = {
560 PyVarObject_HEAD_INIT(NULL, 0)
561 "_io.IncrementalNewlineDecoder", /*tp_name*/
562 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
563 0, /*tp_itemsize*/
564 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
565 0, /*tp_print*/
566 0, /*tp_getattr*/
567 0, /*tp_setattr*/
568 0, /*tp_compare */
569 0, /*tp_repr*/
570 0, /*tp_as_number*/
571 0, /*tp_as_sequence*/
572 0, /*tp_as_mapping*/
573 0, /*tp_hash */
574 0, /*tp_call*/
575 0, /*tp_str*/
576 0, /*tp_getattro*/
577 0, /*tp_setattro*/
578 0, /*tp_as_buffer*/
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
580 IncrementalNewlineDecoder_doc, /* tp_doc */
581 0, /* tp_traverse */
582 0, /* tp_clear */
583 0, /* tp_richcompare */
584 0, /*tp_weaklistoffset*/
585 0, /* tp_iter */
586 0, /* tp_iternext */
587 IncrementalNewlineDecoder_methods, /* tp_methods */
588 0, /* tp_members */
589 IncrementalNewlineDecoder_getset, /* tp_getset */
590 0, /* tp_base */
591 0, /* tp_dict */
592 0, /* tp_descr_get */
593 0, /* tp_descr_set */
594 0, /* tp_dictoffset */
595 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
596 0, /* tp_alloc */
597 PyType_GenericNew, /* tp_new */
598};
599
600
601/* TextIOWrapper */
602
603PyDoc_STRVAR(TextIOWrapper_doc,
604 "Character and line based layer over a BufferedIOBase object, buffer.\n"
605 "\n"
606 "encoding gives the name of the encoding that the stream will be\n"
607 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
608 "\n"
609 "errors determines the strictness of encoding and decoding (see the\n"
610 "codecs.register) and defaults to \"strict\".\n"
611 "\n"
612 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
613 "handling of line endings. If it is None, universal newlines is\n"
614 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
615 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
616 "caller. Conversely, on output, '\\n' is translated to the system\n"
617 "default line seperator, os.linesep. If newline is any other of its\n"
618 "legal values, that newline becomes the newline when the file is read\n"
619 "and it is returned untranslated. On output, '\\n' is converted to the\n"
620 "newline.\n"
621 "\n"
622 "If line_buffering is True, a call to flush is implied when a call to\n"
623 "write contains a newline character."
624 );
625
626typedef PyObject *
627 (*encodefunc_t)(PyObject *, PyObject *);
628
629typedef struct
630{
631 PyObject_HEAD
632 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000633 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 Py_ssize_t chunk_size;
635 PyObject *buffer;
636 PyObject *encoding;
637 PyObject *encoder;
638 PyObject *decoder;
639 PyObject *readnl;
640 PyObject *errors;
641 const char *writenl; /* utf-8 encoded, NULL stands for \n */
642 char line_buffering;
643 char readuniversal;
644 char readtranslate;
645 char writetranslate;
646 char seekable;
647 char telling;
648 /* Specialized encoding func (see below) */
649 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000650 /* Whether or not it's the start of the stream */
651 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652
653 /* Reads and writes are internally buffered in order to speed things up.
654 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000655
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 Please also note that text to be written is first encoded before being
657 buffered. This is necessary so that encoding errors are immediately
658 reported to the caller, but it unfortunately means that the
659 IncrementalEncoder (whose encode() method is always written in Python)
660 becomes a bottleneck for small writes.
661 */
662 PyObject *decoded_chars; /* buffer for text returned from decoder */
663 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
664 PyObject *pending_bytes; /* list of bytes objects waiting to be
665 written, or NULL */
666 Py_ssize_t pending_bytes_count;
667 PyObject *snapshot;
668 /* snapshot is either None, or a tuple (dec_flags, next_input) where
669 * dec_flags is the second (integer) item of the decoder state and
670 * next_input is the chunk of input bytes that comes next after the
671 * snapshot point. We use this to reconstruct decoder states in tell().
672 */
673
674 /* Cache raw object if it's a FileIO object */
675 PyObject *raw;
676
677 PyObject *weakreflist;
678 PyObject *dict;
679} PyTextIOWrapperObject;
680
681
682/* A couple of specialized cases in order to bypass the slow incremental
683 encoding methods for the most popular encodings. */
684
685static PyObject *
686ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
687{
688 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
689 PyUnicode_GET_SIZE(text),
690 PyBytes_AS_STRING(self->errors));
691}
692
693static PyObject *
694utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
695{
696 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
697 PyUnicode_GET_SIZE(text),
698 PyBytes_AS_STRING(self->errors), 1);
699}
700
701static PyObject *
702utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
703{
704 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
705 PyUnicode_GET_SIZE(text),
706 PyBytes_AS_STRING(self->errors), -1);
707}
708
709static PyObject *
710utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
711{
Antoine Pitroue4501852009-05-14 18:55:55 +0000712 if (!self->encoding_start_of_stream) {
713 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000715 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000717 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000719 }
720 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
721 PyUnicode_GET_SIZE(text),
722 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
Antoine Pitroue4501852009-05-14 18:55:55 +0000725static PyObject *
726utf32be_encode(PyTextIOWrapperObject *self, PyObject *text)
727{
728 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
729 PyUnicode_GET_SIZE(text),
730 PyBytes_AS_STRING(self->errors), 1);
731}
732
733static PyObject *
734utf32le_encode(PyTextIOWrapperObject *self, PyObject *text)
735{
736 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
737 PyUnicode_GET_SIZE(text),
738 PyBytes_AS_STRING(self->errors), -1);
739}
740
741static PyObject *
742utf32_encode(PyTextIOWrapperObject *self, PyObject *text)
743{
744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
746#if defined(WORDS_BIGENDIAN)
747 return utf32be_encode(self, text);
748#else
749 return utf32le_encode(self, text);
750#endif
751 }
752 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
753 PyUnicode_GET_SIZE(text),
754 PyBytes_AS_STRING(self->errors), 0);
755}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000756
757static PyObject *
758utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
759{
760 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
761 PyUnicode_GET_SIZE(text),
762 PyBytes_AS_STRING(self->errors));
763}
764
765static PyObject *
766latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
767{
768 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
769 PyUnicode_GET_SIZE(text),
770 PyBytes_AS_STRING(self->errors));
771}
772
773/* Map normalized encoding names onto the specialized encoding funcs */
774
775typedef struct {
776 const char *name;
777 encodefunc_t encodefunc;
778} encodefuncentry;
779
Antoine Pitrou24f36292009-03-28 22:16:42 +0000780static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781 {"ascii", (encodefunc_t) ascii_encode},
782 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000783 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 {"utf-16-be", (encodefunc_t) utf16be_encode},
785 {"utf-16-le", (encodefunc_t) utf16le_encode},
786 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000787 {"utf-32-be", (encodefunc_t) utf32be_encode},
788 {"utf-32-le", (encodefunc_t) utf32le_encode},
789 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790 {NULL, NULL}
791};
792
793
794static int
795TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
796{
797 char *kwlist[] = {"buffer", "encoding", "errors",
798 "newline", "line_buffering",
799 NULL};
800 PyObject *buffer, *raw;
801 char *encoding = NULL;
802 char *errors = NULL;
803 char *newline = NULL;
804 int line_buffering = 0;
805 _PyIO_State *state = IO_STATE;
806
807 PyObject *res;
808 int r;
809
810 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000811 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
813 kwlist, &buffer, &encoding, &errors,
814 &newline, &line_buffering))
815 return -1;
816
817 if (newline && newline[0] != '\0'
818 && !(newline[0] == '\n' && newline[1] == '\0')
819 && !(newline[0] == '\r' && newline[1] == '\0')
820 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
821 PyErr_Format(PyExc_ValueError,
822 "illegal newline value: %s", newline);
823 return -1;
824 }
825
826 Py_CLEAR(self->buffer);
827 Py_CLEAR(self->encoding);
828 Py_CLEAR(self->encoder);
829 Py_CLEAR(self->decoder);
830 Py_CLEAR(self->readnl);
831 Py_CLEAR(self->decoded_chars);
832 Py_CLEAR(self->pending_bytes);
833 Py_CLEAR(self->snapshot);
834 Py_CLEAR(self->errors);
835 Py_CLEAR(self->raw);
836 self->decoded_chars_used = 0;
837 self->pending_bytes_count = 0;
838 self->encodefunc = NULL;
839
840 if (encoding == NULL) {
841 /* Try os.device_encoding(fileno) */
842 PyObject *fileno;
843 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
844 /* Ignore only AttributeError and UnsupportedOperation */
845 if (fileno == NULL) {
846 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
847 PyErr_ExceptionMatches(state->unsupported_operation)) {
848 PyErr_Clear();
849 }
850 else {
851 goto error;
852 }
853 }
854 else {
855 self->encoding = PyObject_CallMethod(state->os_module,
856 "device_encoding",
857 "N", fileno);
858 if (self->encoding == NULL)
859 goto error;
860 else if (!PyUnicode_Check(self->encoding))
861 Py_CLEAR(self->encoding);
862 }
863 }
864 if (encoding == NULL && self->encoding == NULL) {
865 if (state->locale_module == NULL) {
866 state->locale_module = PyImport_ImportModule("locale");
867 if (state->locale_module == NULL)
868 goto catch_ImportError;
869 else
870 goto use_locale;
871 }
872 else {
873 use_locale:
874 self->encoding = PyObject_CallMethod(
875 state->locale_module, "getpreferredencoding", NULL);
876 if (self->encoding == NULL) {
877 catch_ImportError:
878 /*
879 Importing locale can raise a ImportError because of
880 _functools, and locale.getpreferredencoding can raise a
881 ImportError if _locale is not available. These will happen
882 during module building.
883 */
884 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
885 PyErr_Clear();
886 self->encoding = PyUnicode_FromString("ascii");
887 }
888 else
889 goto error;
890 }
891 else if (!PyUnicode_Check(self->encoding))
892 Py_CLEAR(self->encoding);
893 }
894 }
895 if (self->encoding != NULL)
896 encoding = _PyUnicode_AsString(self->encoding);
897 else if (encoding != NULL) {
898 self->encoding = PyUnicode_FromString(encoding);
899 if (self->encoding == NULL)
900 goto error;
901 }
902 else {
903 PyErr_SetString(PyExc_IOError,
904 "could not determine default encoding");
905 }
906
907 if (errors == NULL)
908 errors = "strict";
909 self->errors = PyBytes_FromString(errors);
910 if (self->errors == NULL)
911 goto error;
912
913 self->chunk_size = 8192;
914 self->readuniversal = (newline == NULL || newline[0] == '\0');
915 self->line_buffering = line_buffering;
916 self->readtranslate = (newline == NULL);
917 if (newline) {
918 self->readnl = PyUnicode_FromString(newline);
919 if (self->readnl == NULL)
920 return -1;
921 }
922 self->writetranslate = (newline == NULL || newline[0] != '\0');
923 if (!self->readuniversal && self->readnl) {
924 self->writenl = _PyUnicode_AsString(self->readnl);
925 if (!strcmp(self->writenl, "\n"))
926 self->writenl = NULL;
927 }
928#ifdef MS_WINDOWS
929 else
930 self->writenl = "\r\n";
931#endif
932
933 /* Build the decoder object */
934 res = PyObject_CallMethod(buffer, "readable", NULL);
935 if (res == NULL)
936 goto error;
937 r = PyObject_IsTrue(res);
938 Py_DECREF(res);
939 if (r == -1)
940 goto error;
941 if (r == 1) {
942 self->decoder = PyCodec_IncrementalDecoder(
943 encoding, errors);
944 if (self->decoder == NULL)
945 goto error;
946
947 if (self->readuniversal) {
948 PyObject *incrementalDecoder = PyObject_CallFunction(
949 (PyObject *)&PyIncrementalNewlineDecoder_Type,
950 "Oi", self->decoder, (int)self->readtranslate);
951 if (incrementalDecoder == NULL)
952 goto error;
953 Py_CLEAR(self->decoder);
954 self->decoder = incrementalDecoder;
955 }
956 }
957
958 /* Build the encoder object */
959 res = PyObject_CallMethod(buffer, "writable", NULL);
960 if (res == NULL)
961 goto error;
962 r = PyObject_IsTrue(res);
963 Py_DECREF(res);
964 if (r == -1)
965 goto error;
966 if (r == 1) {
967 PyObject *ci;
968 self->encoder = PyCodec_IncrementalEncoder(
969 encoding, errors);
970 if (self->encoder == NULL)
971 goto error;
972 /* Get the normalized named of the codec */
973 ci = _PyCodec_Lookup(encoding);
974 if (ci == NULL)
975 goto error;
976 res = PyObject_GetAttrString(ci, "name");
977 Py_DECREF(ci);
978 if (res == NULL)
979 PyErr_Clear();
980 else if (PyUnicode_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +0000995
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL)
1002 PyErr_Clear();
1003 else if (Py_TYPE(raw) == &PyFileIO_Type)
1004 self->raw = raw;
1005 else
1006 Py_DECREF(raw);
1007 }
1008
1009 res = PyObject_CallMethod(buffer, "seekable", NULL);
1010 if (res == NULL)
1011 goto error;
1012 self->seekable = self->telling = PyObject_IsTrue(res);
1013 Py_DECREF(res);
1014
Antoine Pitroue4501852009-05-14 18:55:55 +00001015 self->encoding_start_of_stream = 0;
1016 if (self->seekable && self->encoder) {
1017 PyObject *cookieObj;
1018 int cmp;
1019
1020 self->encoding_start_of_stream = 1;
1021
1022 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1023 if (cookieObj == NULL)
1024 goto error;
1025
1026 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1027 Py_DECREF(cookieObj);
1028 if (cmp < 0) {
1029 goto error;
1030 }
1031
1032 if (cmp == 0) {
1033 self->encoding_start_of_stream = 0;
1034 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1035 _PyIO_zero, NULL);
1036 if (res == NULL)
1037 goto error;
1038 Py_DECREF(res);
1039 }
1040 }
1041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 self->ok = 1;
1043 return 0;
1044
1045 error:
1046 return -1;
1047}
1048
1049static int
1050_TextIOWrapper_clear(PyTextIOWrapperObject *self)
1051{
1052 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1053 return -1;
1054 self->ok = 0;
1055 Py_CLEAR(self->buffer);
1056 Py_CLEAR(self->encoding);
1057 Py_CLEAR(self->encoder);
1058 Py_CLEAR(self->decoder);
1059 Py_CLEAR(self->readnl);
1060 Py_CLEAR(self->decoded_chars);
1061 Py_CLEAR(self->pending_bytes);
1062 Py_CLEAR(self->snapshot);
1063 Py_CLEAR(self->errors);
1064 Py_CLEAR(self->raw);
1065 return 0;
1066}
1067
1068static void
1069TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
1070{
1071 if (_TextIOWrapper_clear(self) < 0)
1072 return;
1073 _PyObject_GC_UNTRACK(self);
1074 if (self->weakreflist != NULL)
1075 PyObject_ClearWeakRefs((PyObject *)self);
1076 Py_CLEAR(self->dict);
1077 Py_TYPE(self)->tp_free((PyObject *)self);
1078}
1079
1080static int
1081TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1082{
1083 Py_VISIT(self->buffer);
1084 Py_VISIT(self->encoding);
1085 Py_VISIT(self->encoder);
1086 Py_VISIT(self->decoder);
1087 Py_VISIT(self->readnl);
1088 Py_VISIT(self->decoded_chars);
1089 Py_VISIT(self->pending_bytes);
1090 Py_VISIT(self->snapshot);
1091 Py_VISIT(self->errors);
1092 Py_VISIT(self->raw);
1093
1094 Py_VISIT(self->dict);
1095 return 0;
1096}
1097
1098static int
1099TextIOWrapper_clear(PyTextIOWrapperObject *self)
1100{
1101 if (_TextIOWrapper_clear(self) < 0)
1102 return -1;
1103 Py_CLEAR(self->dict);
1104 return 0;
1105}
1106
1107static PyObject *
1108TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1109
1110/* This macro takes some shortcuts to make the common case faster. */
1111#define CHECK_CLOSED(self) \
1112 do { \
1113 int r; \
1114 PyObject *_res; \
1115 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1116 if (self->raw != NULL) \
1117 r = _PyFileIO_closed(self->raw); \
1118 else { \
1119 _res = TextIOWrapper_closed_get(self, NULL); \
1120 if (_res == NULL) \
1121 return NULL; \
1122 r = PyObject_IsTrue(_res); \
1123 Py_DECREF(_res); \
1124 if (r < 0) \
1125 return NULL; \
1126 } \
1127 if (r > 0) { \
1128 PyErr_SetString(PyExc_ValueError, \
1129 "I/O operation on closed file."); \
1130 return NULL; \
1131 } \
1132 } \
1133 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1134 return NULL; \
1135 } while (0)
1136
1137#define CHECK_INITIALIZED(self) \
1138 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001139 if (self->detached) { \
1140 PyErr_SetString(PyExc_ValueError, \
1141 "underlying buffer has been detached"); \
1142 } else { \
1143 PyErr_SetString(PyExc_ValueError, \
1144 "I/O operation on uninitialized object"); \
1145 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 return NULL; \
1147 }
1148
1149#define CHECK_INITIALIZED_INT(self) \
1150 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001151 if (self->detached) { \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "underlying buffer has been detached"); \
1154 } else { \
1155 PyErr_SetString(PyExc_ValueError, \
1156 "I/O operation on uninitialized object"); \
1157 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 return -1; \
1159 }
1160
1161
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001162static PyObject *
1163TextIOWrapper_detach(PyTextIOWrapperObject *self)
1164{
1165 PyObject *buffer, *res;
1166 CHECK_INITIALIZED(self);
1167 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1168 if (res == NULL)
1169 return NULL;
1170 Py_DECREF(res);
1171 buffer = self->buffer;
1172 self->buffer = NULL;
1173 self->detached = 1;
1174 self->ok = 0;
1175 return buffer;
1176}
1177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178Py_LOCAL_INLINE(const Py_UNICODE *)
1179findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1180{
1181 /* like wcschr, but doesn't stop at NULL characters */
1182 while (size-- > 0) {
1183 if (*s == ch)
1184 return s;
1185 s++;
1186 }
1187 return NULL;
1188}
1189
Antoine Pitrou24f36292009-03-28 22:16:42 +00001190/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 underlying buffered object, though. */
1192static int
1193_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1194{
1195 PyObject *b, *ret;
1196
1197 if (self->pending_bytes == NULL)
1198 return 0;
1199 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1200 if (b == NULL)
1201 return -1;
1202 ret = PyObject_CallMethodObjArgs(self->buffer,
1203 _PyIO_str_write, b, NULL);
1204 Py_DECREF(b);
1205 if (ret == NULL)
1206 return -1;
1207 Py_DECREF(ret);
1208 Py_CLEAR(self->pending_bytes);
1209 self->pending_bytes_count = 0;
1210 return 0;
1211}
1212
1213static PyObject *
1214TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1215{
1216 PyObject *ret;
1217 PyObject *text; /* owned reference */
1218 PyObject *b;
1219 Py_ssize_t textlen;
1220 int haslf = 0;
1221 int needflush = 0;
1222
1223 CHECK_INITIALIZED(self);
1224
1225 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1226 return NULL;
1227 }
1228
1229 CHECK_CLOSED(self);
1230
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001231 if (self->encoder == NULL) {
1232 PyErr_SetString(PyExc_IOError, "not writable");
1233 return NULL;
1234 }
1235
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 Py_INCREF(text);
1237
1238 textlen = PyUnicode_GetSize(text);
1239
1240 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1241 if (findchar(PyUnicode_AS_UNICODE(text),
1242 PyUnicode_GET_SIZE(text), '\n'))
1243 haslf = 1;
1244
1245 if (haslf && self->writetranslate && self->writenl != NULL) {
1246 PyObject *newtext = PyObject_CallMethod(
1247 text, "replace", "ss", "\n", self->writenl);
1248 Py_DECREF(text);
1249 if (newtext == NULL)
1250 return NULL;
1251 text = newtext;
1252 }
1253
1254 if (self->line_buffering &&
1255 (haslf ||
1256 findchar(PyUnicode_AS_UNICODE(text),
1257 PyUnicode_GET_SIZE(text), '\r')))
1258 needflush = 1;
1259
1260 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001261 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001262 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001263 self->encoding_start_of_stream = 0;
1264 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 else
1266 b = PyObject_CallMethodObjArgs(self->encoder,
1267 _PyIO_str_encode, text, NULL);
1268 Py_DECREF(text);
1269 if (b == NULL)
1270 return NULL;
1271
1272 if (self->pending_bytes == NULL) {
1273 self->pending_bytes = PyList_New(0);
1274 if (self->pending_bytes == NULL) {
1275 Py_DECREF(b);
1276 return NULL;
1277 }
1278 self->pending_bytes_count = 0;
1279 }
1280 if (PyList_Append(self->pending_bytes, b) < 0) {
1281 Py_DECREF(b);
1282 return NULL;
1283 }
1284 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1285 Py_DECREF(b);
1286 if (self->pending_bytes_count > self->chunk_size || needflush) {
1287 if (_TextIOWrapper_writeflush(self) < 0)
1288 return NULL;
1289 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001290
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 if (needflush) {
1292 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1293 if (ret == NULL)
1294 return NULL;
1295 Py_DECREF(ret);
1296 }
1297
1298 Py_CLEAR(self->snapshot);
1299
1300 if (self->decoder) {
1301 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1302 if (ret == NULL)
1303 return NULL;
1304 Py_DECREF(ret);
1305 }
1306
1307 return PyLong_FromSsize_t(textlen);
1308}
1309
1310/* Steal a reference to chars and store it in the decoded_char buffer;
1311 */
1312static void
1313TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1314{
1315 Py_CLEAR(self->decoded_chars);
1316 self->decoded_chars = chars;
1317 self->decoded_chars_used = 0;
1318}
1319
1320static PyObject *
1321TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1322{
1323 PyObject *chars;
1324 Py_ssize_t avail;
1325
1326 if (self->decoded_chars == NULL)
1327 return PyUnicode_FromStringAndSize(NULL, 0);
1328
1329 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1330 - self->decoded_chars_used);
1331
1332 assert(avail >= 0);
1333
1334 if (n < 0 || n > avail)
1335 n = avail;
1336
1337 if (self->decoded_chars_used > 0 || n < avail) {
1338 chars = PyUnicode_FromUnicode(
1339 PyUnicode_AS_UNICODE(self->decoded_chars)
1340 + self->decoded_chars_used, n);
1341 if (chars == NULL)
1342 return NULL;
1343 }
1344 else {
1345 chars = self->decoded_chars;
1346 Py_INCREF(chars);
1347 }
1348
1349 self->decoded_chars_used += n;
1350 return chars;
1351}
1352
1353/* Read and decode the next chunk of data from the BufferedReader.
1354 */
1355static int
1356TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1357{
1358 PyObject *dec_buffer = NULL;
1359 PyObject *dec_flags = NULL;
1360 PyObject *input_chunk = NULL;
1361 PyObject *decoded_chars, *chunk_size;
1362 int eof;
1363
1364 /* The return value is True unless EOF was reached. The decoded string is
1365 * placed in self._decoded_chars (replacing its previous value). The
1366 * entire input chunk is sent to the decoder, though some of it may remain
1367 * buffered in the decoder, yet to be converted.
1368 */
1369
1370 if (self->decoder == NULL) {
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001371 PyErr_SetString(PyExc_IOError, "not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372 return -1;
1373 }
1374
1375 if (self->telling) {
1376 /* To prepare for tell(), we need to snapshot a point in the file
1377 * where the decoder's input buffer is empty.
1378 */
1379
1380 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1381 _PyIO_str_getstate, NULL);
1382 if (state == NULL)
1383 return -1;
1384 /* Given this, we know there was a valid snapshot point
1385 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1386 */
1387 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1388 Py_DECREF(state);
1389 return -1;
1390 }
1391 Py_INCREF(dec_buffer);
1392 Py_INCREF(dec_flags);
1393 Py_DECREF(state);
1394 }
1395
1396 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1397 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1398 if (chunk_size == NULL)
1399 goto fail;
1400 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1401 _PyIO_str_read1, chunk_size, NULL);
1402 Py_DECREF(chunk_size);
1403 if (input_chunk == NULL)
1404 goto fail;
1405 assert(PyBytes_Check(input_chunk));
1406
1407 eof = (PyBytes_Size(input_chunk) == 0);
1408
1409 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1410 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1411 self->decoder, input_chunk, eof);
1412 }
1413 else {
1414 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1415 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1416 }
1417
1418 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1419 if (decoded_chars == NULL)
1420 goto fail;
1421 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1422 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1423 eof = 0;
1424
1425 if (self->telling) {
1426 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1427 * next input to be decoded is dec_buffer + input_chunk.
1428 */
1429 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1430 if (next_input == NULL)
1431 goto fail;
1432 assert (PyBytes_Check(next_input));
1433 Py_DECREF(dec_buffer);
1434 Py_CLEAR(self->snapshot);
1435 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1436 }
1437 Py_DECREF(input_chunk);
1438
1439 return (eof == 0);
1440
1441 fail:
1442 Py_XDECREF(dec_buffer);
1443 Py_XDECREF(dec_flags);
1444 Py_XDECREF(input_chunk);
1445 return -1;
1446}
1447
1448static PyObject *
1449TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1450{
1451 Py_ssize_t n = -1;
1452 PyObject *result = NULL, *chunks = NULL;
1453
1454 CHECK_INITIALIZED(self);
1455
1456 if (!PyArg_ParseTuple(args, "|n:read", &n))
1457 return NULL;
1458
1459 CHECK_CLOSED(self);
1460
Benjamin Petersona1b49012009-03-31 23:11:32 +00001461 if (self->decoder == NULL) {
1462 PyErr_SetString(PyExc_IOError, "not readable");
1463 return NULL;
1464 }
1465
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001466 if (_TextIOWrapper_writeflush(self) < 0)
1467 return NULL;
1468
1469 if (n < 0) {
1470 /* Read everything */
1471 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1472 PyObject *decoded;
1473 if (bytes == NULL)
1474 goto fail;
1475 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1476 bytes, Py_True, NULL);
1477 Py_DECREF(bytes);
1478 if (decoded == NULL)
1479 goto fail;
1480
1481 result = TextIOWrapper_get_decoded_chars(self, -1);
1482
1483 if (result == NULL) {
1484 Py_DECREF(decoded);
1485 return NULL;
1486 }
1487
1488 PyUnicode_AppendAndDel(&result, decoded);
1489 if (result == NULL)
1490 goto fail;
1491
1492 Py_CLEAR(self->snapshot);
1493 return result;
1494 }
1495 else {
1496 int res = 1;
1497 Py_ssize_t remaining = n;
1498
1499 result = TextIOWrapper_get_decoded_chars(self, n);
1500 if (result == NULL)
1501 goto fail;
1502 remaining -= PyUnicode_GET_SIZE(result);
1503
1504 /* Keep reading chunks until we have n characters to return */
1505 while (remaining > 0) {
1506 res = TextIOWrapper_read_chunk(self);
1507 if (res < 0)
1508 goto fail;
1509 if (res == 0) /* EOF */
1510 break;
1511 if (chunks == NULL) {
1512 chunks = PyList_New(0);
1513 if (chunks == NULL)
1514 goto fail;
1515 }
1516 if (PyList_Append(chunks, result) < 0)
1517 goto fail;
1518 Py_DECREF(result);
1519 result = TextIOWrapper_get_decoded_chars(self, remaining);
1520 if (result == NULL)
1521 goto fail;
1522 remaining -= PyUnicode_GET_SIZE(result);
1523 }
1524 if (chunks != NULL) {
1525 if (result != NULL && PyList_Append(chunks, result) < 0)
1526 goto fail;
1527 Py_CLEAR(result);
1528 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1529 if (result == NULL)
1530 goto fail;
1531 Py_CLEAR(chunks);
1532 }
1533 return result;
1534 }
1535 fail:
1536 Py_XDECREF(result);
1537 Py_XDECREF(chunks);
1538 return NULL;
1539}
1540
1541
1542/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1543 that is to the NUL character. Otherwise the function will produce
1544 incorrect results. */
1545static Py_UNICODE *
1546find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1547{
1548 Py_UNICODE *s = start;
1549 for (;;) {
1550 while (*s > ch)
1551 s++;
1552 if (*s == ch)
1553 return s;
1554 if (s == end)
1555 return NULL;
1556 s++;
1557 }
1558}
1559
1560Py_ssize_t
1561_PyIO_find_line_ending(
1562 int translated, int universal, PyObject *readnl,
1563 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1564{
1565 Py_ssize_t len = end - start;
1566
1567 if (translated) {
1568 /* Newlines are already translated, only search for \n */
1569 Py_UNICODE *pos = find_control_char(start, end, '\n');
1570 if (pos != NULL)
1571 return pos - start + 1;
1572 else {
1573 *consumed = len;
1574 return -1;
1575 }
1576 }
1577 else if (universal) {
1578 /* Universal newline search. Find any of \r, \r\n, \n
1579 * The decoder ensures that \r\n are not split in two pieces
1580 */
1581 Py_UNICODE *s = start;
1582 for (;;) {
1583 Py_UNICODE ch;
1584 /* Fast path for non-control chars. The loop always ends
1585 since the Py_UNICODE storage is NUL-terminated. */
1586 while (*s > '\r')
1587 s++;
1588 if (s >= end) {
1589 *consumed = len;
1590 return -1;
1591 }
1592 ch = *s++;
1593 if (ch == '\n')
1594 return s - start;
1595 if (ch == '\r') {
1596 if (*s == '\n')
1597 return s - start + 1;
1598 else
1599 return s - start;
1600 }
1601 }
1602 }
1603 else {
1604 /* Non-universal mode. */
1605 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1606 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1607 if (readnl_len == 1) {
1608 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1609 if (pos != NULL)
1610 return pos - start + 1;
1611 *consumed = len;
1612 return -1;
1613 }
1614 else {
1615 Py_UNICODE *s = start;
1616 Py_UNICODE *e = end - readnl_len + 1;
1617 Py_UNICODE *pos;
1618 if (e < s)
1619 e = s;
1620 while (s < e) {
1621 Py_ssize_t i;
1622 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1623 if (pos == NULL || pos >= e)
1624 break;
1625 for (i = 1; i < readnl_len; i++) {
1626 if (pos[i] != nl[i])
1627 break;
1628 }
1629 if (i == readnl_len)
1630 return pos - start + readnl_len;
1631 s = pos + 1;
1632 }
1633 pos = find_control_char(e, end, nl[0]);
1634 if (pos == NULL)
1635 *consumed = len;
1636 else
1637 *consumed = pos - start;
1638 return -1;
1639 }
1640 }
1641}
1642
1643static PyObject *
1644_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1645{
1646 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1647 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1648 int res;
1649
1650 CHECK_CLOSED(self);
1651
1652 if (_TextIOWrapper_writeflush(self) < 0)
1653 return NULL;
1654
1655 chunked = 0;
1656
1657 while (1) {
1658 Py_UNICODE *ptr;
1659 Py_ssize_t line_len;
1660 Py_ssize_t consumed = 0;
1661
1662 /* First, get some data if necessary */
1663 res = 1;
1664 while (!self->decoded_chars ||
1665 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1666 res = TextIOWrapper_read_chunk(self);
1667 if (res < 0)
1668 goto error;
1669 if (res == 0)
1670 break;
1671 }
1672 if (res == 0) {
1673 /* end of file */
1674 TextIOWrapper_set_decoded_chars(self, NULL);
1675 Py_CLEAR(self->snapshot);
1676 start = endpos = offset_to_buffer = 0;
1677 break;
1678 }
1679
1680 if (remaining == NULL) {
1681 line = self->decoded_chars;
1682 start = self->decoded_chars_used;
1683 offset_to_buffer = 0;
1684 Py_INCREF(line);
1685 }
1686 else {
1687 assert(self->decoded_chars_used == 0);
1688 line = PyUnicode_Concat(remaining, self->decoded_chars);
1689 start = 0;
1690 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1691 Py_CLEAR(remaining);
1692 if (line == NULL)
1693 goto error;
1694 }
1695
1696 ptr = PyUnicode_AS_UNICODE(line);
1697 line_len = PyUnicode_GET_SIZE(line);
1698
1699 endpos = _PyIO_find_line_ending(
1700 self->readtranslate, self->readuniversal, self->readnl,
1701 ptr + start, ptr + line_len, &consumed);
1702 if (endpos >= 0) {
1703 endpos += start;
1704 if (limit >= 0 && (endpos - start) + chunked >= limit)
1705 endpos = start + limit - chunked;
1706 break;
1707 }
1708
1709 /* We can put aside up to `endpos` */
1710 endpos = consumed + start;
1711 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1712 /* Didn't find line ending, but reached length limit */
1713 endpos = start + limit - chunked;
1714 break;
1715 }
1716
1717 if (endpos > start) {
1718 /* No line ending seen yet - put aside current data */
1719 PyObject *s;
1720 if (chunks == NULL) {
1721 chunks = PyList_New(0);
1722 if (chunks == NULL)
1723 goto error;
1724 }
1725 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1726 if (s == NULL)
1727 goto error;
1728 if (PyList_Append(chunks, s) < 0) {
1729 Py_DECREF(s);
1730 goto error;
1731 }
1732 chunked += PyUnicode_GET_SIZE(s);
1733 Py_DECREF(s);
1734 }
1735 /* There may be some remaining bytes we'll have to prepend to the
1736 next chunk of data */
1737 if (endpos < line_len) {
1738 remaining = PyUnicode_FromUnicode(
1739 ptr + endpos, line_len - endpos);
1740 if (remaining == NULL)
1741 goto error;
1742 }
1743 Py_CLEAR(line);
1744 /* We have consumed the buffer */
1745 TextIOWrapper_set_decoded_chars(self, NULL);
1746 }
1747
1748 if (line != NULL) {
1749 /* Our line ends in the current buffer */
1750 self->decoded_chars_used = endpos - offset_to_buffer;
1751 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1752 if (start == 0 && Py_REFCNT(line) == 1) {
1753 if (PyUnicode_Resize(&line, endpos) < 0)
1754 goto error;
1755 }
1756 else {
1757 PyObject *s = PyUnicode_FromUnicode(
1758 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1759 Py_CLEAR(line);
1760 if (s == NULL)
1761 goto error;
1762 line = s;
1763 }
1764 }
1765 }
1766 if (remaining != NULL) {
1767 if (chunks == NULL) {
1768 chunks = PyList_New(0);
1769 if (chunks == NULL)
1770 goto error;
1771 }
1772 if (PyList_Append(chunks, remaining) < 0)
1773 goto error;
1774 Py_CLEAR(remaining);
1775 }
1776 if (chunks != NULL) {
1777 if (line != NULL && PyList_Append(chunks, line) < 0)
1778 goto error;
1779 Py_CLEAR(line);
1780 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1781 if (line == NULL)
1782 goto error;
1783 Py_DECREF(chunks);
1784 }
1785 if (line == NULL)
1786 line = PyUnicode_FromStringAndSize(NULL, 0);
1787
1788 return line;
1789
1790 error:
1791 Py_XDECREF(chunks);
1792 Py_XDECREF(remaining);
1793 Py_XDECREF(line);
1794 return NULL;
1795}
1796
1797static PyObject *
1798TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1799{
1800 Py_ssize_t limit = -1;
1801
1802 CHECK_INITIALIZED(self);
1803 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1804 return NULL;
1805 }
1806 return _TextIOWrapper_readline(self, limit);
1807}
1808
1809/* Seek and Tell */
1810
1811typedef struct {
1812 Py_off_t start_pos;
1813 int dec_flags;
1814 int bytes_to_feed;
1815 int chars_to_skip;
1816 char need_eof;
1817} CookieStruct;
1818
1819/*
1820 To speed up cookie packing/unpacking, we store the fields in a temporary
1821 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1822 The following macros define at which offsets in the intermediary byte
1823 string the various CookieStruct fields will be stored.
1824 */
1825
1826#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1827
1828#if defined(WORDS_BIGENDIAN)
1829
1830# define IS_LITTLE_ENDIAN 0
1831
1832/* We want the least significant byte of start_pos to also be the least
1833 significant byte of the cookie, which means that in big-endian mode we
1834 must copy the fields in reverse order. */
1835
1836# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1837# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1838# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1839# define OFF_CHARS_TO_SKIP (sizeof(char))
1840# define OFF_NEED_EOF 0
1841
1842#else
1843
1844# define IS_LITTLE_ENDIAN 1
1845
1846/* Little-endian mode: the least significant byte of start_pos will
1847 naturally end up the least significant byte of the cookie. */
1848
1849# define OFF_START_POS 0
1850# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1851# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1852# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1853# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1854
1855#endif
1856
1857static int
1858TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1859{
1860 unsigned char buffer[COOKIE_BUF_LEN];
1861 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1862 if (cookieLong == NULL)
1863 return -1;
1864
1865 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1866 IS_LITTLE_ENDIAN, 0) < 0) {
1867 Py_DECREF(cookieLong);
1868 return -1;
1869 }
1870 Py_DECREF(cookieLong);
1871
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001872 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1873 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1874 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1875 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1876 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877
1878 return 0;
1879}
1880
1881static PyObject *
1882TextIOWrapper_buildCookie(CookieStruct *cookie)
1883{
1884 unsigned char buffer[COOKIE_BUF_LEN];
1885
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001886 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1887 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1888 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1889 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1890 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001891
1892 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1893}
1894#undef IS_LITTLE_ENDIAN
1895
1896static int
1897_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1898 CookieStruct *cookie)
1899{
1900 PyObject *res;
1901 /* When seeking to the start of the stream, we call decoder.reset()
1902 rather than decoder.getstate().
1903 This is for a few decoders such as utf-16 for which the state value
1904 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1905 utf-16, that we are expecting a BOM).
1906 */
1907 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1908 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1909 else
1910 res = PyObject_CallMethod(self->decoder, "setstate",
1911 "((yi))", "", cookie->dec_flags);
1912 if (res == NULL)
1913 return -1;
1914 Py_DECREF(res);
1915 return 0;
1916}
1917
Antoine Pitroue4501852009-05-14 18:55:55 +00001918static int
1919_TextIOWrapper_encoder_setstate(PyTextIOWrapperObject *self,
1920 CookieStruct *cookie)
1921{
1922 PyObject *res;
1923 /* Same as _TextIOWrapper_decoder_setstate() above. */
1924 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1925 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1926 self->encoding_start_of_stream = 1;
1927 }
1928 else {
1929 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1930 _PyIO_zero, NULL);
1931 self->encoding_start_of_stream = 0;
1932 }
1933 if (res == NULL)
1934 return -1;
1935 Py_DECREF(res);
1936 return 0;
1937}
1938
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939static PyObject *
1940TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1941{
1942 PyObject *cookieObj, *posobj;
1943 CookieStruct cookie;
1944 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 PyObject *res;
1946 int cmp;
1947
1948 CHECK_INITIALIZED(self);
1949
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1951 return NULL;
1952 CHECK_CLOSED(self);
1953
1954 Py_INCREF(cookieObj);
1955
1956 if (!self->seekable) {
1957 PyErr_SetString(PyExc_IOError,
1958 "underlying stream is not seekable");
1959 goto fail;
1960 }
1961
1962 if (whence == 1) {
1963 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001964 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965 if (cmp < 0)
1966 goto fail;
1967
1968 if (cmp == 0) {
1969 PyErr_SetString(PyExc_IOError,
1970 "can't do nonzero cur-relative seeks");
1971 goto fail;
1972 }
1973
1974 /* Seeking to the current position should attempt to
1975 * sync the underlying buffer with the current position.
1976 */
1977 Py_DECREF(cookieObj);
1978 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1979 if (cookieObj == NULL)
1980 goto fail;
1981 }
1982 else if (whence == 2) {
1983 /* seek relative to end of file */
1984
Antoine Pitroue4501852009-05-14 18:55:55 +00001985 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986 if (cmp < 0)
1987 goto fail;
1988
1989 if (cmp == 0) {
1990 PyErr_SetString(PyExc_IOError,
1991 "can't do nonzero end-relative seeks");
1992 goto fail;
1993 }
1994
1995 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1996 if (res == NULL)
1997 goto fail;
1998 Py_DECREF(res);
1999
2000 TextIOWrapper_set_decoded_chars(self, NULL);
2001 Py_CLEAR(self->snapshot);
2002 if (self->decoder) {
2003 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2004 if (res == NULL)
2005 goto fail;
2006 Py_DECREF(res);
2007 }
2008
2009 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2010 Py_XDECREF(cookieObj);
2011 return res;
2012 }
2013 else if (whence != 0) {
2014 PyErr_Format(PyExc_ValueError,
2015 "invalid whence (%d, should be 0, 1 or 2)", whence);
2016 goto fail;
2017 }
2018
Antoine Pitroue4501852009-05-14 18:55:55 +00002019 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020 if (cmp < 0)
2021 goto fail;
2022
2023 if (cmp == 1) {
2024 PyErr_Format(PyExc_ValueError,
2025 "negative seek position %R", cookieObj);
2026 goto fail;
2027 }
2028
2029 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2030 if (res == NULL)
2031 goto fail;
2032 Py_DECREF(res);
2033
2034 /* The strategy of seek() is to go back to the safe start point
2035 * and replay the effect of read(chars_to_skip) from there.
2036 */
2037 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
2038 goto fail;
2039
2040 /* Seek back to the safe start point. */
2041 posobj = PyLong_FromOff_t(cookie.start_pos);
2042 if (posobj == NULL)
2043 goto fail;
2044 res = PyObject_CallMethodObjArgs(self->buffer,
2045 _PyIO_str_seek, posobj, NULL);
2046 Py_DECREF(posobj);
2047 if (res == NULL)
2048 goto fail;
2049 Py_DECREF(res);
2050
2051 TextIOWrapper_set_decoded_chars(self, NULL);
2052 Py_CLEAR(self->snapshot);
2053
2054 /* Restore the decoder to its state from the safe start point. */
2055 if (self->decoder) {
2056 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2057 goto fail;
2058 }
2059
2060 if (cookie.chars_to_skip) {
2061 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2062 PyObject *input_chunk = PyObject_CallMethod(
2063 self->buffer, "read", "i", cookie.bytes_to_feed);
2064 PyObject *decoded;
2065
2066 if (input_chunk == NULL)
2067 goto fail;
2068
2069 assert (PyBytes_Check(input_chunk));
2070
2071 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2072 if (self->snapshot == NULL) {
2073 Py_DECREF(input_chunk);
2074 goto fail;
2075 }
2076
2077 decoded = PyObject_CallMethod(self->decoder, "decode",
2078 "Oi", input_chunk, (int)cookie.need_eof);
2079
2080 if (decoded == NULL)
2081 goto fail;
2082
2083 TextIOWrapper_set_decoded_chars(self, decoded);
2084
2085 /* Skip chars_to_skip of the decoded characters. */
2086 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2087 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2088 goto fail;
2089 }
2090 self->decoded_chars_used = cookie.chars_to_skip;
2091 }
2092 else {
2093 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2094 if (self->snapshot == NULL)
2095 goto fail;
2096 }
2097
Antoine Pitroue4501852009-05-14 18:55:55 +00002098 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2099 if (self->encoder) {
2100 if (_TextIOWrapper_encoder_setstate(self, &cookie) < 0)
2101 goto fail;
2102 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 return cookieObj;
2104 fail:
2105 Py_XDECREF(cookieObj);
2106 return NULL;
2107
2108}
2109
2110static PyObject *
2111TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
2112{
2113 PyObject *res;
2114 PyObject *posobj = NULL;
2115 CookieStruct cookie = {0,0,0,0,0};
2116 PyObject *next_input;
2117 Py_ssize_t chars_to_skip, chars_decoded;
2118 PyObject *saved_state = NULL;
2119 char *input, *input_end;
2120
2121 CHECK_INITIALIZED(self);
2122 CHECK_CLOSED(self);
2123
2124 if (!self->seekable) {
2125 PyErr_SetString(PyExc_IOError,
2126 "underlying stream is not seekable");
2127 goto fail;
2128 }
2129 if (!self->telling) {
2130 PyErr_SetString(PyExc_IOError,
2131 "telling position disabled by next() call");
2132 goto fail;
2133 }
2134
2135 if (_TextIOWrapper_writeflush(self) < 0)
2136 return NULL;
2137 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2138 if (res == NULL)
2139 goto fail;
2140 Py_DECREF(res);
2141
2142 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2143 if (posobj == NULL)
2144 goto fail;
2145
2146 if (self->decoder == NULL || self->snapshot == NULL) {
2147 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2148 return posobj;
2149 }
2150
2151#if defined(HAVE_LARGEFILE_SUPPORT)
2152 cookie.start_pos = PyLong_AsLongLong(posobj);
2153#else
2154 cookie.start_pos = PyLong_AsLong(posobj);
2155#endif
2156 if (PyErr_Occurred())
2157 goto fail;
2158
2159 /* Skip backward to the snapshot point (see _read_chunk). */
2160 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2161 goto fail;
2162
2163 assert (PyBytes_Check(next_input));
2164
2165 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2166
2167 /* How many decoded characters have been used up since the snapshot? */
2168 if (self->decoded_chars_used == 0) {
2169 /* We haven't moved from the snapshot point. */
2170 Py_DECREF(posobj);
2171 return TextIOWrapper_buildCookie(&cookie);
2172 }
2173
2174 chars_to_skip = self->decoded_chars_used;
2175
2176 /* Starting from the snapshot position, we will walk the decoder
2177 * forward until it gives us enough decoded characters.
2178 */
2179 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2180 _PyIO_str_getstate, NULL);
2181 if (saved_state == NULL)
2182 goto fail;
2183
2184 /* Note our initial start point. */
2185 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2186 goto fail;
2187
2188 /* Feed the decoder one byte at a time. As we go, note the
2189 * nearest "safe start point" before the current location
2190 * (a point where the decoder has nothing buffered, so seek()
2191 * can safely start from there and advance to this location).
2192 */
2193 chars_decoded = 0;
2194 input = PyBytes_AS_STRING(next_input);
2195 input_end = input + PyBytes_GET_SIZE(next_input);
2196 while (input < input_end) {
2197 PyObject *state;
2198 char *dec_buffer;
2199 Py_ssize_t dec_buffer_len;
2200 int dec_flags;
2201
2202 PyObject *decoded = PyObject_CallMethod(
2203 self->decoder, "decode", "y#", input, 1);
2204 if (decoded == NULL)
2205 goto fail;
2206 assert (PyUnicode_Check(decoded));
2207 chars_decoded += PyUnicode_GET_SIZE(decoded);
2208 Py_DECREF(decoded);
2209
2210 cookie.bytes_to_feed += 1;
2211
2212 state = PyObject_CallMethodObjArgs(self->decoder,
2213 _PyIO_str_getstate, NULL);
2214 if (state == NULL)
2215 goto fail;
2216 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2217 Py_DECREF(state);
2218 goto fail;
2219 }
2220 Py_DECREF(state);
2221
2222 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2223 /* Decoder buffer is empty, so this is a safe start point. */
2224 cookie.start_pos += cookie.bytes_to_feed;
2225 chars_to_skip -= chars_decoded;
2226 cookie.dec_flags = dec_flags;
2227 cookie.bytes_to_feed = 0;
2228 chars_decoded = 0;
2229 }
2230 if (chars_decoded >= chars_to_skip)
2231 break;
2232 input++;
2233 }
2234 if (input == input_end) {
2235 /* We didn't get enough decoded data; signal EOF to get more. */
2236 PyObject *decoded = PyObject_CallMethod(
2237 self->decoder, "decode", "yi", "", /* final = */ 1);
2238 if (decoded == NULL)
2239 goto fail;
2240 assert (PyUnicode_Check(decoded));
2241 chars_decoded += PyUnicode_GET_SIZE(decoded);
2242 Py_DECREF(decoded);
2243 cookie.need_eof = 1;
2244
2245 if (chars_decoded < chars_to_skip) {
2246 PyErr_SetString(PyExc_IOError,
2247 "can't reconstruct logical file position");
2248 goto fail;
2249 }
2250 }
2251
2252 /* finally */
2253 Py_XDECREF(posobj);
2254 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2255 Py_DECREF(saved_state);
2256 if (res == NULL)
2257 return NULL;
2258 Py_DECREF(res);
2259
2260 /* The returned cookie corresponds to the last safe start point. */
2261 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2262 return TextIOWrapper_buildCookie(&cookie);
2263
2264 fail:
2265 Py_XDECREF(posobj);
2266 if (saved_state) {
2267 PyObject *type, *value, *traceback;
2268 PyErr_Fetch(&type, &value, &traceback);
2269
2270 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2271 Py_DECREF(saved_state);
2272 if (res == NULL)
2273 return NULL;
2274 Py_DECREF(res);
2275
2276 PyErr_Restore(type, value, traceback);
2277 }
2278 return NULL;
2279}
2280
2281static PyObject *
2282TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2283{
2284 PyObject *pos = Py_None;
2285 PyObject *res;
2286
2287 CHECK_INITIALIZED(self)
2288 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2289 return NULL;
2290 }
2291
2292 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2293 if (res == NULL)
2294 return NULL;
2295 Py_DECREF(res);
2296
2297 if (pos != Py_None) {
2298 res = PyObject_CallMethodObjArgs((PyObject *) self,
2299 _PyIO_str_seek, pos, NULL);
2300 if (res == NULL)
2301 return NULL;
2302 Py_DECREF(res);
2303 }
2304
2305 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2306}
2307
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002308static PyObject *
2309TextIOWrapper_repr(PyTextIOWrapperObject *self)
2310{
Antoine Pitrou716c4442009-05-23 19:04:03 +00002311 PyObject *nameobj, *res;
2312
2313 CHECK_INITIALIZED(self);
2314
2315 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2316 if (nameobj == NULL) {
2317 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2318 PyErr_Clear();
2319 else
2320 return NULL;
2321 res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>",
2322 self->encoding);
2323 }
2324 else {
2325 res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>",
2326 nameobj, self->encoding);
2327 Py_DECREF(nameobj);
2328 }
2329 return res;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002330}
2331
2332
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333/* Inquiries */
2334
2335static PyObject *
2336TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2337{
2338 CHECK_INITIALIZED(self);
2339 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2340}
2341
2342static PyObject *
2343TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2344{
2345 CHECK_INITIALIZED(self);
2346 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2347}
2348
2349static PyObject *
2350TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2351{
2352 CHECK_INITIALIZED(self);
2353 return PyObject_CallMethod(self->buffer, "readable", NULL);
2354}
2355
2356static PyObject *
2357TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2358{
2359 CHECK_INITIALIZED(self);
2360 return PyObject_CallMethod(self->buffer, "writable", NULL);
2361}
2362
2363static PyObject *
2364TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2365{
2366 CHECK_INITIALIZED(self);
2367 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2368}
2369
2370static PyObject *
2371TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2372{
2373 CHECK_INITIALIZED(self);
2374 CHECK_CLOSED(self);
2375 self->telling = self->seekable;
2376 if (_TextIOWrapper_writeflush(self) < 0)
2377 return NULL;
2378 return PyObject_CallMethod(self->buffer, "flush", NULL);
2379}
2380
2381static PyObject *
2382TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2383{
2384 PyObject *res;
2385 CHECK_INITIALIZED(self);
2386 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2387 if (res == NULL) {
2388 /* If flush() fails, just give up */
2389 PyErr_Clear();
2390 }
2391 else
2392 Py_DECREF(res);
2393
2394 return PyObject_CallMethod(self->buffer, "close", NULL);
2395}
2396
2397static PyObject *
2398TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2399{
2400 PyObject *line;
2401
2402 CHECK_INITIALIZED(self);
2403
2404 self->telling = 0;
2405 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2406 /* Skip method call overhead for speed */
2407 line = _TextIOWrapper_readline(self, -1);
2408 }
2409 else {
2410 line = PyObject_CallMethodObjArgs((PyObject *)self,
2411 _PyIO_str_readline, NULL);
2412 if (line && !PyUnicode_Check(line)) {
2413 PyErr_Format(PyExc_IOError,
2414 "readline() should have returned an str object, "
2415 "not '%.200s'", Py_TYPE(line)->tp_name);
2416 Py_DECREF(line);
2417 return NULL;
2418 }
2419 }
2420
2421 if (line == NULL)
2422 return NULL;
2423
2424 if (PyUnicode_GET_SIZE(line) == 0) {
2425 /* Reached EOF or would have blocked */
2426 Py_DECREF(line);
2427 Py_CLEAR(self->snapshot);
2428 self->telling = self->seekable;
2429 return NULL;
2430 }
2431
2432 return line;
2433}
2434
2435static PyObject *
2436TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2437{
2438 CHECK_INITIALIZED(self);
2439 return PyObject_GetAttrString(self->buffer, "name");
2440}
2441
2442static PyObject *
2443TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2444{
2445 CHECK_INITIALIZED(self);
2446 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2447}
2448
2449static PyObject *
2450TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2451{
2452 PyObject *res;
2453 CHECK_INITIALIZED(self);
2454 if (self->decoder == NULL)
2455 Py_RETURN_NONE;
2456 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2457 if (res == NULL) {
2458 PyErr_Clear();
2459 Py_RETURN_NONE;
2460 }
2461 return res;
2462}
2463
2464static PyObject *
2465TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2466{
2467 CHECK_INITIALIZED(self);
2468 return PyLong_FromSsize_t(self->chunk_size);
2469}
2470
2471static int
2472TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2473 PyObject *arg, void *context)
2474{
2475 Py_ssize_t n;
2476 CHECK_INITIALIZED_INT(self);
2477 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2478 if (n == -1 && PyErr_Occurred())
2479 return -1;
2480 if (n <= 0) {
2481 PyErr_SetString(PyExc_ValueError,
2482 "a strictly positive integer is required");
2483 return -1;
2484 }
2485 self->chunk_size = n;
2486 return 0;
2487}
2488
2489static PyMethodDef TextIOWrapper_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002490 {"detach", (PyCFunction)TextIOWrapper_detach, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2492 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2493 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2494 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2495 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2496
2497 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2498 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2499 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2500 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2501 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2502
2503 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2504 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2505 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2506 {NULL, NULL}
2507};
2508
2509static PyMemberDef TextIOWrapper_members[] = {
2510 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2511 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2512 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2513 {NULL}
2514};
2515
2516static PyGetSetDef TextIOWrapper_getset[] = {
2517 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2518 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2519/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2520*/
2521 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2522 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2523 (setter)TextIOWrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002524 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525};
2526
2527PyTypeObject PyTextIOWrapper_Type = {
2528 PyVarObject_HEAD_INIT(NULL, 0)
2529 "_io.TextIOWrapper", /*tp_name*/
2530 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2531 0, /*tp_itemsize*/
2532 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2533 0, /*tp_print*/
2534 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002535 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 0, /*tp_compare */
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002537 (reprfunc)TextIOWrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538 0, /*tp_as_number*/
2539 0, /*tp_as_sequence*/
2540 0, /*tp_as_mapping*/
2541 0, /*tp_hash */
2542 0, /*tp_call*/
2543 0, /*tp_str*/
2544 0, /*tp_getattro*/
2545 0, /*tp_setattro*/
2546 0, /*tp_as_buffer*/
2547 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2548 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2549 TextIOWrapper_doc, /* tp_doc */
2550 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2551 (inquiry)TextIOWrapper_clear, /* tp_clear */
2552 0, /* tp_richcompare */
2553 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2554 0, /* tp_iter */
2555 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2556 TextIOWrapper_methods, /* tp_methods */
2557 TextIOWrapper_members, /* tp_members */
2558 TextIOWrapper_getset, /* tp_getset */
2559 0, /* tp_base */
2560 0, /* tp_dict */
2561 0, /* tp_descr_get */
2562 0, /* tp_descr_set */
2563 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2564 (initproc)TextIOWrapper_init, /* tp_init */
2565 0, /* tp_alloc */
2566 PyType_GenericNew, /* tp_new */
2567};