blob: 8d2a686d80a86481bf1f0824bbc044712f8384bd [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
16PyDoc_STRVAR(TextIOBase_doc,
17 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Petersond2e0c792009-05-01 20:40:59 +000031PyDoc_STRVAR(TextIOBase_detach_doc,
32 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
39TextIOBase_detach(PyObject *self)
40{
41 return _unsupported("detach");
42}
43
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000044PyDoc_STRVAR(TextIOBase_read_doc,
45 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
52TextIOBase_read(PyObject *self, PyObject *args)
53{
54 return _unsupported("read");
55}
56
57PyDoc_STRVAR(TextIOBase_readline_doc,
58 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
64TextIOBase_readline(PyObject *self, PyObject *args)
65{
66 return _unsupported("readline");
67}
68
69PyDoc_STRVAR(TextIOBase_write_doc,
70 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
76TextIOBase_write(PyObject *self, PyObject *args)
77{
78 return _unsupported("write");
79}
80
81PyDoc_STRVAR(TextIOBase_encoding_doc,
82 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
88TextIOBase_encoding_get(PyObject *self, void *context)
89{
90 Py_RETURN_NONE;
91}
92
93PyDoc_STRVAR(TextIOBase_newlines_doc,
94 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
102TextIOBase_newlines_get(PyObject *self, void *context)
103{
104 Py_RETURN_NONE;
105}
106
107
108static PyMethodDef TextIOBase_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000109 {"detach", (PyCFunction)TextIOBase_detach, METH_NOARGS, TextIOBase_detach_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110 {"read", TextIOBase_read, METH_VARARGS, TextIOBase_read_doc},
111 {"readline", TextIOBase_readline, METH_VARARGS, TextIOBase_readline_doc},
112 {"write", TextIOBase_write, METH_VARARGS, TextIOBase_write_doc},
113 {NULL, NULL}
114};
115
116static PyGetSetDef TextIOBase_getset[] = {
117 {"encoding", (getter)TextIOBase_encoding_get, NULL, TextIOBase_encoding_doc},
118 {"newlines", (getter)TextIOBase_newlines_get, NULL, TextIOBase_newlines_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000119 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000120};
121
122PyTypeObject PyTextIOBase_Type = {
123 PyVarObject_HEAD_INIT(NULL, 0)
124 "_io._TextIOBase", /*tp_name*/
125 0, /*tp_basicsize*/
126 0, /*tp_itemsize*/
127 0, /*tp_dealloc*/
128 0, /*tp_print*/
129 0, /*tp_getattr*/
130 0, /*tp_setattr*/
131 0, /*tp_compare */
132 0, /*tp_repr*/
133 0, /*tp_as_number*/
134 0, /*tp_as_sequence*/
135 0, /*tp_as_mapping*/
136 0, /*tp_hash */
137 0, /*tp_call*/
138 0, /*tp_str*/
139 0, /*tp_getattro*/
140 0, /*tp_setattro*/
141 0, /*tp_as_buffer*/
142 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
143 TextIOBase_doc, /* tp_doc */
144 0, /* tp_traverse */
145 0, /* tp_clear */
146 0, /* tp_richcompare */
147 0, /* tp_weaklistoffset */
148 0, /* tp_iter */
149 0, /* tp_iternext */
150 TextIOBase_methods, /* tp_methods */
151 0, /* tp_members */
152 TextIOBase_getset, /* tp_getset */
153 &PyIOBase_Type, /* tp_base */
154 0, /* tp_dict */
155 0, /* tp_descr_get */
156 0, /* tp_descr_set */
157 0, /* tp_dictoffset */
158 0, /* tp_init */
159 0, /* tp_alloc */
160 0, /* tp_new */
161};
162
163
164/* IncrementalNewlineDecoder */
165
166PyDoc_STRVAR(IncrementalNewlineDecoder_doc,
167 "Codec used when reading a file in universal newlines mode. It wraps\n"
168 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
169 "records the types of newlines encountered. When used with\n"
170 "translate=False, it ensures that the newline sequence is returned in\n"
171 "one piece. When used with decoder=None, it expects unicode strings as\n"
172 "decode input and translates newlines without first invoking an external\n"
173 "decoder.\n"
174 );
175
176typedef struct {
177 PyObject_HEAD
178 PyObject *decoder;
179 PyObject *errors;
180 int pendingcr:1;
181 int translate:1;
182 unsigned int seennl:3;
183} PyNewLineDecoderObject;
184
185static int
Antoine Pitrou24f36292009-03-28 22:16:42 +0000186IncrementalNewlineDecoder_init(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 PyObject *args, PyObject *kwds)
188{
189 PyObject *decoder;
190 int translate;
191 PyObject *errors = NULL;
192 char *kwlist[] = {"decoder", "translate", "errors", NULL};
193
194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
195 kwlist, &decoder, &translate, &errors))
196 return -1;
197
198 self->decoder = decoder;
199 Py_INCREF(decoder);
200
201 if (errors == NULL) {
202 self->errors = PyUnicode_FromString("strict");
203 if (self->errors == NULL)
204 return -1;
205 }
206 else {
207 Py_INCREF(errors);
208 self->errors = errors;
209 }
210
211 self->translate = translate;
212 self->seennl = 0;
213 self->pendingcr = 0;
214
215 return 0;
216}
217
218static void
219IncrementalNewlineDecoder_dealloc(PyNewLineDecoderObject *self)
220{
221 Py_CLEAR(self->decoder);
222 Py_CLEAR(self->errors);
223 Py_TYPE(self)->tp_free((PyObject *)self);
224}
225
226#define SEEN_CR 1
227#define SEEN_LF 2
228#define SEEN_CRLF 4
229#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
230
231PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000232_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233 PyObject *input, int final)
234{
235 PyObject *output;
236 Py_ssize_t output_len;
237 PyNewLineDecoderObject *self = (PyNewLineDecoderObject *) _self;
238
239 if (self->decoder == NULL) {
240 PyErr_SetString(PyExc_ValueError,
241 "IncrementalNewlineDecoder.__init__ not called");
242 return NULL;
243 }
244
245 /* decode input (with the eventual \r from a previous pass) */
246 if (self->decoder != Py_None) {
247 output = PyObject_CallMethodObjArgs(self->decoder,
248 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
249 }
250 else {
251 output = input;
252 Py_INCREF(output);
253 }
254
255 if (output == NULL)
256 return NULL;
257
258 if (!PyUnicode_Check(output)) {
259 PyErr_SetString(PyExc_TypeError,
260 "decoder should return a string result");
261 goto error;
262 }
263
264 output_len = PyUnicode_GET_SIZE(output);
265 if (self->pendingcr && (final || output_len > 0)) {
266 Py_UNICODE *out;
267 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
268 if (modified == NULL)
269 goto error;
270 out = PyUnicode_AS_UNICODE(modified);
271 out[0] = '\r';
272 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
273 output_len * sizeof(Py_UNICODE));
274 Py_DECREF(output);
275 output = modified;
276 self->pendingcr = 0;
277 output_len++;
278 }
279
280 /* retain last \r even when not translating data:
281 * then readline() is sure to get \r\n in one pass
282 */
283 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000284 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000285 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
286
287 if (Py_REFCNT(output) == 1) {
288 if (PyUnicode_Resize(&output, output_len - 1) < 0)
289 goto error;
290 }
291 else {
292 PyObject *modified = PyUnicode_FromUnicode(
293 PyUnicode_AS_UNICODE(output),
294 output_len - 1);
295 if (modified == NULL)
296 goto error;
297 Py_DECREF(output);
298 output = modified;
299 }
300 self->pendingcr = 1;
301 }
302 }
303
304 /* Record which newlines are read and do newline translation if desired,
305 all in one pass. */
306 {
307 Py_UNICODE *in_str;
308 Py_ssize_t len;
309 int seennl = self->seennl;
310 int only_lf = 0;
311
312 in_str = PyUnicode_AS_UNICODE(output);
313 len = PyUnicode_GET_SIZE(output);
314
315 if (len == 0)
316 return output;
317
318 /* If, up to now, newlines are consistently \n, do a quick check
319 for the \r *byte* with the libc's optimized memchr.
320 */
321 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000322 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 }
324
Antoine Pitrou66913e22009-03-06 23:40:56 +0000325 if (only_lf) {
326 /* If not already seen, quick scan for a possible "\n" character.
327 (there's nothing else to be done, even when in translation mode)
328 */
329 if (seennl == 0 &&
330 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
331 Py_UNICODE *s, *end;
332 s = in_str;
333 end = in_str + len;
334 for (;;) {
335 Py_UNICODE c;
336 /* Fast loop for non-control characters */
337 while (*s > '\n')
338 s++;
339 c = *s++;
340 if (c == '\n') {
341 seennl |= SEEN_LF;
342 break;
343 }
344 if (s > end)
345 break;
346 }
347 }
348 /* Finished: we have scanned for newlines, and none of them
349 need translating */
350 }
351 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000353 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000354 if (seennl == SEEN_ALL)
355 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 s = in_str;
357 end = in_str + len;
358 for (;;) {
359 Py_UNICODE c;
360 /* Fast loop for non-control characters */
361 while (*s > '\r')
362 s++;
363 c = *s++;
364 if (c == '\n')
365 seennl |= SEEN_LF;
366 else if (c == '\r') {
367 if (*s == '\n') {
368 seennl |= SEEN_CRLF;
369 s++;
370 }
371 else
372 seennl |= SEEN_CR;
373 }
374 if (s > end)
375 break;
376 if (seennl == SEEN_ALL)
377 break;
378 }
379 endscan:
380 ;
381 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000382 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000383 PyObject *translated = NULL;
384 Py_UNICODE *out_str;
385 Py_UNICODE *in, *out, *end;
386 if (Py_REFCNT(output) != 1) {
387 /* We could try to optimize this so that we only do a copy
388 when there is something to translate. On the other hand,
389 most decoders should only output non-shared strings, i.e.
390 translation is done in place. */
391 translated = PyUnicode_FromUnicode(NULL, len);
392 if (translated == NULL)
393 goto error;
394 assert(Py_REFCNT(translated) == 1);
395 memcpy(PyUnicode_AS_UNICODE(translated),
396 PyUnicode_AS_UNICODE(output),
397 len * sizeof(Py_UNICODE));
398 }
399 else {
400 translated = output;
401 }
402 out_str = PyUnicode_AS_UNICODE(translated);
403 in = in_str;
404 out = out_str;
405 end = in_str + len;
406 for (;;) {
407 Py_UNICODE c;
408 /* Fast loop for non-control characters */
409 while ((c = *in++) > '\r')
410 *out++ = c;
411 if (c == '\n') {
412 *out++ = c;
413 seennl |= SEEN_LF;
414 continue;
415 }
416 if (c == '\r') {
417 if (*in == '\n') {
418 in++;
419 seennl |= SEEN_CRLF;
420 }
421 else
422 seennl |= SEEN_CR;
423 *out++ = '\n';
424 continue;
425 }
426 if (in > end)
427 break;
428 *out++ = c;
429 }
430 if (translated != output) {
431 Py_DECREF(output);
432 output = translated;
433 }
434 if (out - out_str != len) {
435 if (PyUnicode_Resize(&output, out - out_str) < 0)
436 goto error;
437 }
438 }
439 self->seennl |= seennl;
440 }
441
442 return output;
443
444 error:
445 Py_DECREF(output);
446 return NULL;
447}
448
449static PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000450IncrementalNewlineDecoder_decode(PyNewLineDecoderObject *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 PyObject *args, PyObject *kwds)
452{
453 char *kwlist[] = {"input", "final", NULL};
454 PyObject *input;
455 int final = 0;
456
457 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
458 kwlist, &input, &final))
459 return NULL;
460 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
461}
462
463static PyObject *
464IncrementalNewlineDecoder_getstate(PyNewLineDecoderObject *self, PyObject *args)
465{
466 PyObject *buffer;
467 unsigned PY_LONG_LONG flag;
468
469 if (self->decoder != Py_None) {
470 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
471 _PyIO_str_getstate, NULL);
472 if (state == NULL)
473 return NULL;
474 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
475 Py_DECREF(state);
476 return NULL;
477 }
478 Py_INCREF(buffer);
479 Py_DECREF(state);
480 }
481 else {
482 buffer = PyBytes_FromString("");
483 flag = 0;
484 }
485 flag <<= 1;
486 if (self->pendingcr)
487 flag |= 1;
488 return Py_BuildValue("NK", buffer, flag);
489}
490
491static PyObject *
492IncrementalNewlineDecoder_setstate(PyNewLineDecoderObject *self, PyObject *state)
493{
494 PyObject *buffer;
495 unsigned PY_LONG_LONG flag;
496
497 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
498 return NULL;
499
500 self->pendingcr = (int) flag & 1;
501 flag >>= 1;
502
503 if (self->decoder != Py_None)
504 return PyObject_CallMethod(self->decoder,
505 "setstate", "((OK))", buffer, flag);
506 else
507 Py_RETURN_NONE;
508}
509
510static PyObject *
511IncrementalNewlineDecoder_reset(PyNewLineDecoderObject *self, PyObject *args)
512{
513 self->seennl = 0;
514 self->pendingcr = 0;
515 if (self->decoder != Py_None)
516 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
517 else
518 Py_RETURN_NONE;
519}
520
521static PyObject *
522IncrementalNewlineDecoder_newlines_get(PyNewLineDecoderObject *self, void *context)
523{
524 switch (self->seennl) {
525 case SEEN_CR:
526 return PyUnicode_FromString("\r");
527 case SEEN_LF:
528 return PyUnicode_FromString("\n");
529 case SEEN_CRLF:
530 return PyUnicode_FromString("\r\n");
531 case SEEN_CR | SEEN_LF:
532 return Py_BuildValue("ss", "\r", "\n");
533 case SEEN_CR | SEEN_CRLF:
534 return Py_BuildValue("ss", "\r", "\r\n");
535 case SEEN_LF | SEEN_CRLF:
536 return Py_BuildValue("ss", "\n", "\r\n");
537 case SEEN_CR | SEEN_LF | SEEN_CRLF:
538 return Py_BuildValue("sss", "\r", "\n", "\r\n");
539 default:
540 Py_RETURN_NONE;
541 }
542
543}
544
545
546static PyMethodDef IncrementalNewlineDecoder_methods[] = {
547 {"decode", (PyCFunction)IncrementalNewlineDecoder_decode, METH_VARARGS|METH_KEYWORDS},
548 {"getstate", (PyCFunction)IncrementalNewlineDecoder_getstate, METH_NOARGS},
549 {"setstate", (PyCFunction)IncrementalNewlineDecoder_setstate, METH_O},
550 {"reset", (PyCFunction)IncrementalNewlineDecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000551 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000552};
553
554static PyGetSetDef IncrementalNewlineDecoder_getset[] = {
555 {"newlines", (getter)IncrementalNewlineDecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000556 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000557};
558
559PyTypeObject PyIncrementalNewlineDecoder_Type = {
560 PyVarObject_HEAD_INIT(NULL, 0)
561 "_io.IncrementalNewlineDecoder", /*tp_name*/
562 sizeof(PyNewLineDecoderObject), /*tp_basicsize*/
563 0, /*tp_itemsize*/
564 (destructor)IncrementalNewlineDecoder_dealloc, /*tp_dealloc*/
565 0, /*tp_print*/
566 0, /*tp_getattr*/
567 0, /*tp_setattr*/
568 0, /*tp_compare */
569 0, /*tp_repr*/
570 0, /*tp_as_number*/
571 0, /*tp_as_sequence*/
572 0, /*tp_as_mapping*/
573 0, /*tp_hash */
574 0, /*tp_call*/
575 0, /*tp_str*/
576 0, /*tp_getattro*/
577 0, /*tp_setattro*/
578 0, /*tp_as_buffer*/
579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
580 IncrementalNewlineDecoder_doc, /* tp_doc */
581 0, /* tp_traverse */
582 0, /* tp_clear */
583 0, /* tp_richcompare */
584 0, /*tp_weaklistoffset*/
585 0, /* tp_iter */
586 0, /* tp_iternext */
587 IncrementalNewlineDecoder_methods, /* tp_methods */
588 0, /* tp_members */
589 IncrementalNewlineDecoder_getset, /* tp_getset */
590 0, /* tp_base */
591 0, /* tp_dict */
592 0, /* tp_descr_get */
593 0, /* tp_descr_set */
594 0, /* tp_dictoffset */
595 (initproc)IncrementalNewlineDecoder_init, /* tp_init */
596 0, /* tp_alloc */
597 PyType_GenericNew, /* tp_new */
598};
599
600
601/* TextIOWrapper */
602
603PyDoc_STRVAR(TextIOWrapper_doc,
604 "Character and line based layer over a BufferedIOBase object, buffer.\n"
605 "\n"
606 "encoding gives the name of the encoding that the stream will be\n"
607 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
608 "\n"
609 "errors determines the strictness of encoding and decoding (see the\n"
610 "codecs.register) and defaults to \"strict\".\n"
611 "\n"
612 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
613 "handling of line endings. If it is None, universal newlines is\n"
614 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
615 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
616 "caller. Conversely, on output, '\\n' is translated to the system\n"
617 "default line seperator, os.linesep. If newline is any other of its\n"
618 "legal values, that newline becomes the newline when the file is read\n"
619 "and it is returned untranslated. On output, '\\n' is converted to the\n"
620 "newline.\n"
621 "\n"
622 "If line_buffering is True, a call to flush is implied when a call to\n"
623 "write contains a newline character."
624 );
625
626typedef PyObject *
627 (*encodefunc_t)(PyObject *, PyObject *);
628
629typedef struct
630{
631 PyObject_HEAD
632 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000633 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 Py_ssize_t chunk_size;
635 PyObject *buffer;
636 PyObject *encoding;
637 PyObject *encoder;
638 PyObject *decoder;
639 PyObject *readnl;
640 PyObject *errors;
641 const char *writenl; /* utf-8 encoded, NULL stands for \n */
642 char line_buffering;
643 char readuniversal;
644 char readtranslate;
645 char writetranslate;
646 char seekable;
647 char telling;
648 /* Specialized encoding func (see below) */
649 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000650 /* Whether or not it's the start of the stream */
651 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652
653 /* Reads and writes are internally buffered in order to speed things up.
654 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000655
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 Please also note that text to be written is first encoded before being
657 buffered. This is necessary so that encoding errors are immediately
658 reported to the caller, but it unfortunately means that the
659 IncrementalEncoder (whose encode() method is always written in Python)
660 becomes a bottleneck for small writes.
661 */
662 PyObject *decoded_chars; /* buffer for text returned from decoder */
663 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
664 PyObject *pending_bytes; /* list of bytes objects waiting to be
665 written, or NULL */
666 Py_ssize_t pending_bytes_count;
667 PyObject *snapshot;
668 /* snapshot is either None, or a tuple (dec_flags, next_input) where
669 * dec_flags is the second (integer) item of the decoder state and
670 * next_input is the chunk of input bytes that comes next after the
671 * snapshot point. We use this to reconstruct decoder states in tell().
672 */
673
674 /* Cache raw object if it's a FileIO object */
675 PyObject *raw;
676
677 PyObject *weakreflist;
678 PyObject *dict;
679} PyTextIOWrapperObject;
680
681
682/* A couple of specialized cases in order to bypass the slow incremental
683 encoding methods for the most popular encodings. */
684
685static PyObject *
686ascii_encode(PyTextIOWrapperObject *self, PyObject *text)
687{
688 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
689 PyUnicode_GET_SIZE(text),
690 PyBytes_AS_STRING(self->errors));
691}
692
693static PyObject *
694utf16be_encode(PyTextIOWrapperObject *self, PyObject *text)
695{
696 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
697 PyUnicode_GET_SIZE(text),
698 PyBytes_AS_STRING(self->errors), 1);
699}
700
701static PyObject *
702utf16le_encode(PyTextIOWrapperObject *self, PyObject *text)
703{
704 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
705 PyUnicode_GET_SIZE(text),
706 PyBytes_AS_STRING(self->errors), -1);
707}
708
709static PyObject *
710utf16_encode(PyTextIOWrapperObject *self, PyObject *text)
711{
Antoine Pitroue4501852009-05-14 18:55:55 +0000712 if (!self->encoding_start_of_stream) {
713 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000715 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000717 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000719 }
720 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
721 PyUnicode_GET_SIZE(text),
722 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
Antoine Pitroue4501852009-05-14 18:55:55 +0000725static PyObject *
726utf32be_encode(PyTextIOWrapperObject *self, PyObject *text)
727{
728 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
729 PyUnicode_GET_SIZE(text),
730 PyBytes_AS_STRING(self->errors), 1);
731}
732
733static PyObject *
734utf32le_encode(PyTextIOWrapperObject *self, PyObject *text)
735{
736 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
737 PyUnicode_GET_SIZE(text),
738 PyBytes_AS_STRING(self->errors), -1);
739}
740
741static PyObject *
742utf32_encode(PyTextIOWrapperObject *self, PyObject *text)
743{
744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
746#if defined(WORDS_BIGENDIAN)
747 return utf32be_encode(self, text);
748#else
749 return utf32le_encode(self, text);
750#endif
751 }
752 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
753 PyUnicode_GET_SIZE(text),
754 PyBytes_AS_STRING(self->errors), 0);
755}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000756
757static PyObject *
758utf8_encode(PyTextIOWrapperObject *self, PyObject *text)
759{
760 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
761 PyUnicode_GET_SIZE(text),
762 PyBytes_AS_STRING(self->errors));
763}
764
765static PyObject *
766latin1_encode(PyTextIOWrapperObject *self, PyObject *text)
767{
768 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
769 PyUnicode_GET_SIZE(text),
770 PyBytes_AS_STRING(self->errors));
771}
772
773/* Map normalized encoding names onto the specialized encoding funcs */
774
775typedef struct {
776 const char *name;
777 encodefunc_t encodefunc;
778} encodefuncentry;
779
Antoine Pitrou24f36292009-03-28 22:16:42 +0000780static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781 {"ascii", (encodefunc_t) ascii_encode},
782 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000783 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784 {"utf-16-be", (encodefunc_t) utf16be_encode},
785 {"utf-16-le", (encodefunc_t) utf16le_encode},
786 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000787 {"utf-32-be", (encodefunc_t) utf32be_encode},
788 {"utf-32-le", (encodefunc_t) utf32le_encode},
789 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790 {NULL, NULL}
791};
792
793
794static int
795TextIOWrapper_init(PyTextIOWrapperObject *self, PyObject *args, PyObject *kwds)
796{
797 char *kwlist[] = {"buffer", "encoding", "errors",
798 "newline", "line_buffering",
799 NULL};
800 PyObject *buffer, *raw;
801 char *encoding = NULL;
802 char *errors = NULL;
803 char *newline = NULL;
804 int line_buffering = 0;
805 _PyIO_State *state = IO_STATE;
806
807 PyObject *res;
808 int r;
809
810 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000811 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
813 kwlist, &buffer, &encoding, &errors,
814 &newline, &line_buffering))
815 return -1;
816
817 if (newline && newline[0] != '\0'
818 && !(newline[0] == '\n' && newline[1] == '\0')
819 && !(newline[0] == '\r' && newline[1] == '\0')
820 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
821 PyErr_Format(PyExc_ValueError,
822 "illegal newline value: %s", newline);
823 return -1;
824 }
825
826 Py_CLEAR(self->buffer);
827 Py_CLEAR(self->encoding);
828 Py_CLEAR(self->encoder);
829 Py_CLEAR(self->decoder);
830 Py_CLEAR(self->readnl);
831 Py_CLEAR(self->decoded_chars);
832 Py_CLEAR(self->pending_bytes);
833 Py_CLEAR(self->snapshot);
834 Py_CLEAR(self->errors);
835 Py_CLEAR(self->raw);
836 self->decoded_chars_used = 0;
837 self->pending_bytes_count = 0;
838 self->encodefunc = NULL;
839
840 if (encoding == NULL) {
841 /* Try os.device_encoding(fileno) */
842 PyObject *fileno;
843 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
844 /* Ignore only AttributeError and UnsupportedOperation */
845 if (fileno == NULL) {
846 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
847 PyErr_ExceptionMatches(state->unsupported_operation)) {
848 PyErr_Clear();
849 }
850 else {
851 goto error;
852 }
853 }
854 else {
855 self->encoding = PyObject_CallMethod(state->os_module,
856 "device_encoding",
857 "N", fileno);
858 if (self->encoding == NULL)
859 goto error;
860 else if (!PyUnicode_Check(self->encoding))
861 Py_CLEAR(self->encoding);
862 }
863 }
864 if (encoding == NULL && self->encoding == NULL) {
865 if (state->locale_module == NULL) {
866 state->locale_module = PyImport_ImportModule("locale");
867 if (state->locale_module == NULL)
868 goto catch_ImportError;
869 else
870 goto use_locale;
871 }
872 else {
873 use_locale:
874 self->encoding = PyObject_CallMethod(
875 state->locale_module, "getpreferredencoding", NULL);
876 if (self->encoding == NULL) {
877 catch_ImportError:
878 /*
879 Importing locale can raise a ImportError because of
880 _functools, and locale.getpreferredencoding can raise a
881 ImportError if _locale is not available. These will happen
882 during module building.
883 */
884 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
885 PyErr_Clear();
886 self->encoding = PyUnicode_FromString("ascii");
887 }
888 else
889 goto error;
890 }
891 else if (!PyUnicode_Check(self->encoding))
892 Py_CLEAR(self->encoding);
893 }
894 }
895 if (self->encoding != NULL)
896 encoding = _PyUnicode_AsString(self->encoding);
897 else if (encoding != NULL) {
898 self->encoding = PyUnicode_FromString(encoding);
899 if (self->encoding == NULL)
900 goto error;
901 }
902 else {
903 PyErr_SetString(PyExc_IOError,
904 "could not determine default encoding");
905 }
906
907 if (errors == NULL)
908 errors = "strict";
909 self->errors = PyBytes_FromString(errors);
910 if (self->errors == NULL)
911 goto error;
912
913 self->chunk_size = 8192;
914 self->readuniversal = (newline == NULL || newline[0] == '\0');
915 self->line_buffering = line_buffering;
916 self->readtranslate = (newline == NULL);
917 if (newline) {
918 self->readnl = PyUnicode_FromString(newline);
919 if (self->readnl == NULL)
920 return -1;
921 }
922 self->writetranslate = (newline == NULL || newline[0] != '\0');
923 if (!self->readuniversal && self->readnl) {
924 self->writenl = _PyUnicode_AsString(self->readnl);
925 if (!strcmp(self->writenl, "\n"))
926 self->writenl = NULL;
927 }
928#ifdef MS_WINDOWS
929 else
930 self->writenl = "\r\n";
931#endif
932
933 /* Build the decoder object */
934 res = PyObject_CallMethod(buffer, "readable", NULL);
935 if (res == NULL)
936 goto error;
937 r = PyObject_IsTrue(res);
938 Py_DECREF(res);
939 if (r == -1)
940 goto error;
941 if (r == 1) {
942 self->decoder = PyCodec_IncrementalDecoder(
943 encoding, errors);
944 if (self->decoder == NULL)
945 goto error;
946
947 if (self->readuniversal) {
948 PyObject *incrementalDecoder = PyObject_CallFunction(
949 (PyObject *)&PyIncrementalNewlineDecoder_Type,
950 "Oi", self->decoder, (int)self->readtranslate);
951 if (incrementalDecoder == NULL)
952 goto error;
953 Py_CLEAR(self->decoder);
954 self->decoder = incrementalDecoder;
955 }
956 }
957
958 /* Build the encoder object */
959 res = PyObject_CallMethod(buffer, "writable", NULL);
960 if (res == NULL)
961 goto error;
962 r = PyObject_IsTrue(res);
963 Py_DECREF(res);
964 if (r == -1)
965 goto error;
966 if (r == 1) {
967 PyObject *ci;
968 self->encoder = PyCodec_IncrementalEncoder(
969 encoding, errors);
970 if (self->encoder == NULL)
971 goto error;
972 /* Get the normalized named of the codec */
973 ci = _PyCodec_Lookup(encoding);
974 if (ci == NULL)
975 goto error;
976 res = PyObject_GetAttrString(ci, "name");
977 Py_DECREF(ci);
978 if (res == NULL)
979 PyErr_Clear();
980 else if (PyUnicode_Check(res)) {
981 encodefuncentry *e = encodefuncs;
982 while (e->name != NULL) {
983 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
984 self->encodefunc = e->encodefunc;
985 break;
986 }
987 e++;
988 }
989 }
990 Py_XDECREF(res);
991 }
992
993 self->buffer = buffer;
994 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +0000995
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000996 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
997 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
998 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
999 raw = PyObject_GetAttrString(buffer, "raw");
1000 /* Cache the raw FileIO object to speed up 'closed' checks */
1001 if (raw == NULL)
1002 PyErr_Clear();
1003 else if (Py_TYPE(raw) == &PyFileIO_Type)
1004 self->raw = raw;
1005 else
1006 Py_DECREF(raw);
1007 }
1008
1009 res = PyObject_CallMethod(buffer, "seekable", NULL);
1010 if (res == NULL)
1011 goto error;
1012 self->seekable = self->telling = PyObject_IsTrue(res);
1013 Py_DECREF(res);
1014
Antoine Pitroue4501852009-05-14 18:55:55 +00001015 self->encoding_start_of_stream = 0;
1016 if (self->seekable && self->encoder) {
1017 PyObject *cookieObj;
1018 int cmp;
1019
1020 self->encoding_start_of_stream = 1;
1021
1022 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1023 if (cookieObj == NULL)
1024 goto error;
1025
1026 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1027 Py_DECREF(cookieObj);
1028 if (cmp < 0) {
1029 goto error;
1030 }
1031
1032 if (cmp == 0) {
1033 self->encoding_start_of_stream = 0;
1034 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1035 _PyIO_zero, NULL);
1036 if (res == NULL)
1037 goto error;
1038 Py_DECREF(res);
1039 }
1040 }
1041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 self->ok = 1;
1043 return 0;
1044
1045 error:
1046 return -1;
1047}
1048
1049static int
1050_TextIOWrapper_clear(PyTextIOWrapperObject *self)
1051{
1052 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1053 return -1;
1054 self->ok = 0;
1055 Py_CLEAR(self->buffer);
1056 Py_CLEAR(self->encoding);
1057 Py_CLEAR(self->encoder);
1058 Py_CLEAR(self->decoder);
1059 Py_CLEAR(self->readnl);
1060 Py_CLEAR(self->decoded_chars);
1061 Py_CLEAR(self->pending_bytes);
1062 Py_CLEAR(self->snapshot);
1063 Py_CLEAR(self->errors);
1064 Py_CLEAR(self->raw);
1065 return 0;
1066}
1067
1068static void
1069TextIOWrapper_dealloc(PyTextIOWrapperObject *self)
1070{
1071 if (_TextIOWrapper_clear(self) < 0)
1072 return;
1073 _PyObject_GC_UNTRACK(self);
1074 if (self->weakreflist != NULL)
1075 PyObject_ClearWeakRefs((PyObject *)self);
1076 Py_CLEAR(self->dict);
1077 Py_TYPE(self)->tp_free((PyObject *)self);
1078}
1079
1080static int
1081TextIOWrapper_traverse(PyTextIOWrapperObject *self, visitproc visit, void *arg)
1082{
1083 Py_VISIT(self->buffer);
1084 Py_VISIT(self->encoding);
1085 Py_VISIT(self->encoder);
1086 Py_VISIT(self->decoder);
1087 Py_VISIT(self->readnl);
1088 Py_VISIT(self->decoded_chars);
1089 Py_VISIT(self->pending_bytes);
1090 Py_VISIT(self->snapshot);
1091 Py_VISIT(self->errors);
1092 Py_VISIT(self->raw);
1093
1094 Py_VISIT(self->dict);
1095 return 0;
1096}
1097
1098static int
1099TextIOWrapper_clear(PyTextIOWrapperObject *self)
1100{
1101 if (_TextIOWrapper_clear(self) < 0)
1102 return -1;
1103 Py_CLEAR(self->dict);
1104 return 0;
1105}
1106
1107static PyObject *
1108TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
1109
1110/* This macro takes some shortcuts to make the common case faster. */
1111#define CHECK_CLOSED(self) \
1112 do { \
1113 int r; \
1114 PyObject *_res; \
1115 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1116 if (self->raw != NULL) \
1117 r = _PyFileIO_closed(self->raw); \
1118 else { \
1119 _res = TextIOWrapper_closed_get(self, NULL); \
1120 if (_res == NULL) \
1121 return NULL; \
1122 r = PyObject_IsTrue(_res); \
1123 Py_DECREF(_res); \
1124 if (r < 0) \
1125 return NULL; \
1126 } \
1127 if (r > 0) { \
1128 PyErr_SetString(PyExc_ValueError, \
1129 "I/O operation on closed file."); \
1130 return NULL; \
1131 } \
1132 } \
1133 else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
1134 return NULL; \
1135 } while (0)
1136
1137#define CHECK_INITIALIZED(self) \
1138 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001139 if (self->detached) { \
1140 PyErr_SetString(PyExc_ValueError, \
1141 "underlying buffer has been detached"); \
1142 } else { \
1143 PyErr_SetString(PyExc_ValueError, \
1144 "I/O operation on uninitialized object"); \
1145 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146 return NULL; \
1147 }
1148
1149#define CHECK_INITIALIZED_INT(self) \
1150 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001151 if (self->detached) { \
1152 PyErr_SetString(PyExc_ValueError, \
1153 "underlying buffer has been detached"); \
1154 } else { \
1155 PyErr_SetString(PyExc_ValueError, \
1156 "I/O operation on uninitialized object"); \
1157 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 return -1; \
1159 }
1160
1161
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001162static PyObject *
1163TextIOWrapper_detach(PyTextIOWrapperObject *self)
1164{
1165 PyObject *buffer, *res;
1166 CHECK_INITIALIZED(self);
1167 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1168 if (res == NULL)
1169 return NULL;
1170 Py_DECREF(res);
1171 buffer = self->buffer;
1172 self->buffer = NULL;
1173 self->detached = 1;
1174 self->ok = 0;
1175 return buffer;
1176}
1177
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001178Py_LOCAL_INLINE(const Py_UNICODE *)
1179findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1180{
1181 /* like wcschr, but doesn't stop at NULL characters */
1182 while (size-- > 0) {
1183 if (*s == ch)
1184 return s;
1185 s++;
1186 }
1187 return NULL;
1188}
1189
Antoine Pitrou24f36292009-03-28 22:16:42 +00001190/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001191 underlying buffered object, though. */
1192static int
1193_TextIOWrapper_writeflush(PyTextIOWrapperObject *self)
1194{
1195 PyObject *b, *ret;
1196
1197 if (self->pending_bytes == NULL)
1198 return 0;
1199 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1200 if (b == NULL)
1201 return -1;
1202 ret = PyObject_CallMethodObjArgs(self->buffer,
1203 _PyIO_str_write, b, NULL);
1204 Py_DECREF(b);
1205 if (ret == NULL)
1206 return -1;
1207 Py_DECREF(ret);
1208 Py_CLEAR(self->pending_bytes);
1209 self->pending_bytes_count = 0;
1210 return 0;
1211}
1212
1213static PyObject *
1214TextIOWrapper_write(PyTextIOWrapperObject *self, PyObject *args)
1215{
1216 PyObject *ret;
1217 PyObject *text; /* owned reference */
1218 PyObject *b;
1219 Py_ssize_t textlen;
1220 int haslf = 0;
1221 int needflush = 0;
1222
1223 CHECK_INITIALIZED(self);
1224
1225 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1226 return NULL;
1227 }
1228
1229 CHECK_CLOSED(self);
1230
1231 Py_INCREF(text);
1232
1233 textlen = PyUnicode_GetSize(text);
1234
1235 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1236 if (findchar(PyUnicode_AS_UNICODE(text),
1237 PyUnicode_GET_SIZE(text), '\n'))
1238 haslf = 1;
1239
1240 if (haslf && self->writetranslate && self->writenl != NULL) {
1241 PyObject *newtext = PyObject_CallMethod(
1242 text, "replace", "ss", "\n", self->writenl);
1243 Py_DECREF(text);
1244 if (newtext == NULL)
1245 return NULL;
1246 text = newtext;
1247 }
1248
1249 if (self->line_buffering &&
1250 (haslf ||
1251 findchar(PyUnicode_AS_UNICODE(text),
1252 PyUnicode_GET_SIZE(text), '\r')))
1253 needflush = 1;
1254
1255 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001256 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001258 self->encoding_start_of_stream = 0;
1259 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260 else
1261 b = PyObject_CallMethodObjArgs(self->encoder,
1262 _PyIO_str_encode, text, NULL);
1263 Py_DECREF(text);
1264 if (b == NULL)
1265 return NULL;
1266
1267 if (self->pending_bytes == NULL) {
1268 self->pending_bytes = PyList_New(0);
1269 if (self->pending_bytes == NULL) {
1270 Py_DECREF(b);
1271 return NULL;
1272 }
1273 self->pending_bytes_count = 0;
1274 }
1275 if (PyList_Append(self->pending_bytes, b) < 0) {
1276 Py_DECREF(b);
1277 return NULL;
1278 }
1279 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1280 Py_DECREF(b);
1281 if (self->pending_bytes_count > self->chunk_size || needflush) {
1282 if (_TextIOWrapper_writeflush(self) < 0)
1283 return NULL;
1284 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001285
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 if (needflush) {
1287 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1288 if (ret == NULL)
1289 return NULL;
1290 Py_DECREF(ret);
1291 }
1292
1293 Py_CLEAR(self->snapshot);
1294
1295 if (self->decoder) {
1296 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1297 if (ret == NULL)
1298 return NULL;
1299 Py_DECREF(ret);
1300 }
1301
1302 return PyLong_FromSsize_t(textlen);
1303}
1304
1305/* Steal a reference to chars and store it in the decoded_char buffer;
1306 */
1307static void
1308TextIOWrapper_set_decoded_chars(PyTextIOWrapperObject *self, PyObject *chars)
1309{
1310 Py_CLEAR(self->decoded_chars);
1311 self->decoded_chars = chars;
1312 self->decoded_chars_used = 0;
1313}
1314
1315static PyObject *
1316TextIOWrapper_get_decoded_chars(PyTextIOWrapperObject *self, Py_ssize_t n)
1317{
1318 PyObject *chars;
1319 Py_ssize_t avail;
1320
1321 if (self->decoded_chars == NULL)
1322 return PyUnicode_FromStringAndSize(NULL, 0);
1323
1324 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1325 - self->decoded_chars_used);
1326
1327 assert(avail >= 0);
1328
1329 if (n < 0 || n > avail)
1330 n = avail;
1331
1332 if (self->decoded_chars_used > 0 || n < avail) {
1333 chars = PyUnicode_FromUnicode(
1334 PyUnicode_AS_UNICODE(self->decoded_chars)
1335 + self->decoded_chars_used, n);
1336 if (chars == NULL)
1337 return NULL;
1338 }
1339 else {
1340 chars = self->decoded_chars;
1341 Py_INCREF(chars);
1342 }
1343
1344 self->decoded_chars_used += n;
1345 return chars;
1346}
1347
1348/* Read and decode the next chunk of data from the BufferedReader.
1349 */
1350static int
1351TextIOWrapper_read_chunk(PyTextIOWrapperObject *self)
1352{
1353 PyObject *dec_buffer = NULL;
1354 PyObject *dec_flags = NULL;
1355 PyObject *input_chunk = NULL;
1356 PyObject *decoded_chars, *chunk_size;
1357 int eof;
1358
1359 /* The return value is True unless EOF was reached. The decoded string is
1360 * placed in self._decoded_chars (replacing its previous value). The
1361 * entire input chunk is sent to the decoder, though some of it may remain
1362 * buffered in the decoder, yet to be converted.
1363 */
1364
1365 if (self->decoder == NULL) {
1366 PyErr_SetString(PyExc_ValueError, "no decoder");
1367 return -1;
1368 }
1369
1370 if (self->telling) {
1371 /* To prepare for tell(), we need to snapshot a point in the file
1372 * where the decoder's input buffer is empty.
1373 */
1374
1375 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1376 _PyIO_str_getstate, NULL);
1377 if (state == NULL)
1378 return -1;
1379 /* Given this, we know there was a valid snapshot point
1380 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1381 */
1382 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1383 Py_DECREF(state);
1384 return -1;
1385 }
1386 Py_INCREF(dec_buffer);
1387 Py_INCREF(dec_flags);
1388 Py_DECREF(state);
1389 }
1390
1391 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1392 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1393 if (chunk_size == NULL)
1394 goto fail;
1395 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1396 _PyIO_str_read1, chunk_size, NULL);
1397 Py_DECREF(chunk_size);
1398 if (input_chunk == NULL)
1399 goto fail;
1400 assert(PyBytes_Check(input_chunk));
1401
1402 eof = (PyBytes_Size(input_chunk) == 0);
1403
1404 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1405 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1406 self->decoder, input_chunk, eof);
1407 }
1408 else {
1409 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1410 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1411 }
1412
1413 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1414 if (decoded_chars == NULL)
1415 goto fail;
1416 TextIOWrapper_set_decoded_chars(self, decoded_chars);
1417 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1418 eof = 0;
1419
1420 if (self->telling) {
1421 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1422 * next input to be decoded is dec_buffer + input_chunk.
1423 */
1424 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1425 if (next_input == NULL)
1426 goto fail;
1427 assert (PyBytes_Check(next_input));
1428 Py_DECREF(dec_buffer);
1429 Py_CLEAR(self->snapshot);
1430 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1431 }
1432 Py_DECREF(input_chunk);
1433
1434 return (eof == 0);
1435
1436 fail:
1437 Py_XDECREF(dec_buffer);
1438 Py_XDECREF(dec_flags);
1439 Py_XDECREF(input_chunk);
1440 return -1;
1441}
1442
1443static PyObject *
1444TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
1445{
1446 Py_ssize_t n = -1;
1447 PyObject *result = NULL, *chunks = NULL;
1448
1449 CHECK_INITIALIZED(self);
1450
1451 if (!PyArg_ParseTuple(args, "|n:read", &n))
1452 return NULL;
1453
1454 CHECK_CLOSED(self);
1455
Benjamin Petersona1b49012009-03-31 23:11:32 +00001456 if (self->decoder == NULL) {
1457 PyErr_SetString(PyExc_IOError, "not readable");
1458 return NULL;
1459 }
1460
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001461 if (_TextIOWrapper_writeflush(self) < 0)
1462 return NULL;
1463
1464 if (n < 0) {
1465 /* Read everything */
1466 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1467 PyObject *decoded;
1468 if (bytes == NULL)
1469 goto fail;
1470 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1471 bytes, Py_True, NULL);
1472 Py_DECREF(bytes);
1473 if (decoded == NULL)
1474 goto fail;
1475
1476 result = TextIOWrapper_get_decoded_chars(self, -1);
1477
1478 if (result == NULL) {
1479 Py_DECREF(decoded);
1480 return NULL;
1481 }
1482
1483 PyUnicode_AppendAndDel(&result, decoded);
1484 if (result == NULL)
1485 goto fail;
1486
1487 Py_CLEAR(self->snapshot);
1488 return result;
1489 }
1490 else {
1491 int res = 1;
1492 Py_ssize_t remaining = n;
1493
1494 result = TextIOWrapper_get_decoded_chars(self, n);
1495 if (result == NULL)
1496 goto fail;
1497 remaining -= PyUnicode_GET_SIZE(result);
1498
1499 /* Keep reading chunks until we have n characters to return */
1500 while (remaining > 0) {
1501 res = TextIOWrapper_read_chunk(self);
1502 if (res < 0)
1503 goto fail;
1504 if (res == 0) /* EOF */
1505 break;
1506 if (chunks == NULL) {
1507 chunks = PyList_New(0);
1508 if (chunks == NULL)
1509 goto fail;
1510 }
1511 if (PyList_Append(chunks, result) < 0)
1512 goto fail;
1513 Py_DECREF(result);
1514 result = TextIOWrapper_get_decoded_chars(self, remaining);
1515 if (result == NULL)
1516 goto fail;
1517 remaining -= PyUnicode_GET_SIZE(result);
1518 }
1519 if (chunks != NULL) {
1520 if (result != NULL && PyList_Append(chunks, result) < 0)
1521 goto fail;
1522 Py_CLEAR(result);
1523 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1524 if (result == NULL)
1525 goto fail;
1526 Py_CLEAR(chunks);
1527 }
1528 return result;
1529 }
1530 fail:
1531 Py_XDECREF(result);
1532 Py_XDECREF(chunks);
1533 return NULL;
1534}
1535
1536
1537/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1538 that is to the NUL character. Otherwise the function will produce
1539 incorrect results. */
1540static Py_UNICODE *
1541find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1542{
1543 Py_UNICODE *s = start;
1544 for (;;) {
1545 while (*s > ch)
1546 s++;
1547 if (*s == ch)
1548 return s;
1549 if (s == end)
1550 return NULL;
1551 s++;
1552 }
1553}
1554
1555Py_ssize_t
1556_PyIO_find_line_ending(
1557 int translated, int universal, PyObject *readnl,
1558 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1559{
1560 Py_ssize_t len = end - start;
1561
1562 if (translated) {
1563 /* Newlines are already translated, only search for \n */
1564 Py_UNICODE *pos = find_control_char(start, end, '\n');
1565 if (pos != NULL)
1566 return pos - start + 1;
1567 else {
1568 *consumed = len;
1569 return -1;
1570 }
1571 }
1572 else if (universal) {
1573 /* Universal newline search. Find any of \r, \r\n, \n
1574 * The decoder ensures that \r\n are not split in two pieces
1575 */
1576 Py_UNICODE *s = start;
1577 for (;;) {
1578 Py_UNICODE ch;
1579 /* Fast path for non-control chars. The loop always ends
1580 since the Py_UNICODE storage is NUL-terminated. */
1581 while (*s > '\r')
1582 s++;
1583 if (s >= end) {
1584 *consumed = len;
1585 return -1;
1586 }
1587 ch = *s++;
1588 if (ch == '\n')
1589 return s - start;
1590 if (ch == '\r') {
1591 if (*s == '\n')
1592 return s - start + 1;
1593 else
1594 return s - start;
1595 }
1596 }
1597 }
1598 else {
1599 /* Non-universal mode. */
1600 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1601 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1602 if (readnl_len == 1) {
1603 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1604 if (pos != NULL)
1605 return pos - start + 1;
1606 *consumed = len;
1607 return -1;
1608 }
1609 else {
1610 Py_UNICODE *s = start;
1611 Py_UNICODE *e = end - readnl_len + 1;
1612 Py_UNICODE *pos;
1613 if (e < s)
1614 e = s;
1615 while (s < e) {
1616 Py_ssize_t i;
1617 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1618 if (pos == NULL || pos >= e)
1619 break;
1620 for (i = 1; i < readnl_len; i++) {
1621 if (pos[i] != nl[i])
1622 break;
1623 }
1624 if (i == readnl_len)
1625 return pos - start + readnl_len;
1626 s = pos + 1;
1627 }
1628 pos = find_control_char(e, end, nl[0]);
1629 if (pos == NULL)
1630 *consumed = len;
1631 else
1632 *consumed = pos - start;
1633 return -1;
1634 }
1635 }
1636}
1637
1638static PyObject *
1639_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
1640{
1641 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1642 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1643 int res;
1644
1645 CHECK_CLOSED(self);
1646
1647 if (_TextIOWrapper_writeflush(self) < 0)
1648 return NULL;
1649
1650 chunked = 0;
1651
1652 while (1) {
1653 Py_UNICODE *ptr;
1654 Py_ssize_t line_len;
1655 Py_ssize_t consumed = 0;
1656
1657 /* First, get some data if necessary */
1658 res = 1;
1659 while (!self->decoded_chars ||
1660 !PyUnicode_GET_SIZE(self->decoded_chars)) {
1661 res = TextIOWrapper_read_chunk(self);
1662 if (res < 0)
1663 goto error;
1664 if (res == 0)
1665 break;
1666 }
1667 if (res == 0) {
1668 /* end of file */
1669 TextIOWrapper_set_decoded_chars(self, NULL);
1670 Py_CLEAR(self->snapshot);
1671 start = endpos = offset_to_buffer = 0;
1672 break;
1673 }
1674
1675 if (remaining == NULL) {
1676 line = self->decoded_chars;
1677 start = self->decoded_chars_used;
1678 offset_to_buffer = 0;
1679 Py_INCREF(line);
1680 }
1681 else {
1682 assert(self->decoded_chars_used == 0);
1683 line = PyUnicode_Concat(remaining, self->decoded_chars);
1684 start = 0;
1685 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1686 Py_CLEAR(remaining);
1687 if (line == NULL)
1688 goto error;
1689 }
1690
1691 ptr = PyUnicode_AS_UNICODE(line);
1692 line_len = PyUnicode_GET_SIZE(line);
1693
1694 endpos = _PyIO_find_line_ending(
1695 self->readtranslate, self->readuniversal, self->readnl,
1696 ptr + start, ptr + line_len, &consumed);
1697 if (endpos >= 0) {
1698 endpos += start;
1699 if (limit >= 0 && (endpos - start) + chunked >= limit)
1700 endpos = start + limit - chunked;
1701 break;
1702 }
1703
1704 /* We can put aside up to `endpos` */
1705 endpos = consumed + start;
1706 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1707 /* Didn't find line ending, but reached length limit */
1708 endpos = start + limit - chunked;
1709 break;
1710 }
1711
1712 if (endpos > start) {
1713 /* No line ending seen yet - put aside current data */
1714 PyObject *s;
1715 if (chunks == NULL) {
1716 chunks = PyList_New(0);
1717 if (chunks == NULL)
1718 goto error;
1719 }
1720 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1721 if (s == NULL)
1722 goto error;
1723 if (PyList_Append(chunks, s) < 0) {
1724 Py_DECREF(s);
1725 goto error;
1726 }
1727 chunked += PyUnicode_GET_SIZE(s);
1728 Py_DECREF(s);
1729 }
1730 /* There may be some remaining bytes we'll have to prepend to the
1731 next chunk of data */
1732 if (endpos < line_len) {
1733 remaining = PyUnicode_FromUnicode(
1734 ptr + endpos, line_len - endpos);
1735 if (remaining == NULL)
1736 goto error;
1737 }
1738 Py_CLEAR(line);
1739 /* We have consumed the buffer */
1740 TextIOWrapper_set_decoded_chars(self, NULL);
1741 }
1742
1743 if (line != NULL) {
1744 /* Our line ends in the current buffer */
1745 self->decoded_chars_used = endpos - offset_to_buffer;
1746 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1747 if (start == 0 && Py_REFCNT(line) == 1) {
1748 if (PyUnicode_Resize(&line, endpos) < 0)
1749 goto error;
1750 }
1751 else {
1752 PyObject *s = PyUnicode_FromUnicode(
1753 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1754 Py_CLEAR(line);
1755 if (s == NULL)
1756 goto error;
1757 line = s;
1758 }
1759 }
1760 }
1761 if (remaining != NULL) {
1762 if (chunks == NULL) {
1763 chunks = PyList_New(0);
1764 if (chunks == NULL)
1765 goto error;
1766 }
1767 if (PyList_Append(chunks, remaining) < 0)
1768 goto error;
1769 Py_CLEAR(remaining);
1770 }
1771 if (chunks != NULL) {
1772 if (line != NULL && PyList_Append(chunks, line) < 0)
1773 goto error;
1774 Py_CLEAR(line);
1775 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1776 if (line == NULL)
1777 goto error;
1778 Py_DECREF(chunks);
1779 }
1780 if (line == NULL)
1781 line = PyUnicode_FromStringAndSize(NULL, 0);
1782
1783 return line;
1784
1785 error:
1786 Py_XDECREF(chunks);
1787 Py_XDECREF(remaining);
1788 Py_XDECREF(line);
1789 return NULL;
1790}
1791
1792static PyObject *
1793TextIOWrapper_readline(PyTextIOWrapperObject *self, PyObject *args)
1794{
1795 Py_ssize_t limit = -1;
1796
1797 CHECK_INITIALIZED(self);
1798 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1799 return NULL;
1800 }
1801 return _TextIOWrapper_readline(self, limit);
1802}
1803
1804/* Seek and Tell */
1805
1806typedef struct {
1807 Py_off_t start_pos;
1808 int dec_flags;
1809 int bytes_to_feed;
1810 int chars_to_skip;
1811 char need_eof;
1812} CookieStruct;
1813
1814/*
1815 To speed up cookie packing/unpacking, we store the fields in a temporary
1816 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1817 The following macros define at which offsets in the intermediary byte
1818 string the various CookieStruct fields will be stored.
1819 */
1820
1821#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1822
1823#if defined(WORDS_BIGENDIAN)
1824
1825# define IS_LITTLE_ENDIAN 0
1826
1827/* We want the least significant byte of start_pos to also be the least
1828 significant byte of the cookie, which means that in big-endian mode we
1829 must copy the fields in reverse order. */
1830
1831# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1832# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1833# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1834# define OFF_CHARS_TO_SKIP (sizeof(char))
1835# define OFF_NEED_EOF 0
1836
1837#else
1838
1839# define IS_LITTLE_ENDIAN 1
1840
1841/* Little-endian mode: the least significant byte of start_pos will
1842 naturally end up the least significant byte of the cookie. */
1843
1844# define OFF_START_POS 0
1845# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1846# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1847# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1848# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1849
1850#endif
1851
1852static int
1853TextIOWrapper_parseCookie(CookieStruct *cookie, PyObject *cookieObj)
1854{
1855 unsigned char buffer[COOKIE_BUF_LEN];
1856 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1857 if (cookieLong == NULL)
1858 return -1;
1859
1860 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1861 IS_LITTLE_ENDIAN, 0) < 0) {
1862 Py_DECREF(cookieLong);
1863 return -1;
1864 }
1865 Py_DECREF(cookieLong);
1866
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001867 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1868 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1869 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1870 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1871 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872
1873 return 0;
1874}
1875
1876static PyObject *
1877TextIOWrapper_buildCookie(CookieStruct *cookie)
1878{
1879 unsigned char buffer[COOKIE_BUF_LEN];
1880
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001881 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1882 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1883 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1884 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1885 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001886
1887 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1888}
1889#undef IS_LITTLE_ENDIAN
1890
1891static int
1892_TextIOWrapper_decoder_setstate(PyTextIOWrapperObject *self,
1893 CookieStruct *cookie)
1894{
1895 PyObject *res;
1896 /* When seeking to the start of the stream, we call decoder.reset()
1897 rather than decoder.getstate().
1898 This is for a few decoders such as utf-16 for which the state value
1899 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1900 utf-16, that we are expecting a BOM).
1901 */
1902 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1903 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1904 else
1905 res = PyObject_CallMethod(self->decoder, "setstate",
1906 "((yi))", "", cookie->dec_flags);
1907 if (res == NULL)
1908 return -1;
1909 Py_DECREF(res);
1910 return 0;
1911}
1912
Antoine Pitroue4501852009-05-14 18:55:55 +00001913static int
1914_TextIOWrapper_encoder_setstate(PyTextIOWrapperObject *self,
1915 CookieStruct *cookie)
1916{
1917 PyObject *res;
1918 /* Same as _TextIOWrapper_decoder_setstate() above. */
1919 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1920 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1921 self->encoding_start_of_stream = 1;
1922 }
1923 else {
1924 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1925 _PyIO_zero, NULL);
1926 self->encoding_start_of_stream = 0;
1927 }
1928 if (res == NULL)
1929 return -1;
1930 Py_DECREF(res);
1931 return 0;
1932}
1933
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934static PyObject *
1935TextIOWrapper_seek(PyTextIOWrapperObject *self, PyObject *args)
1936{
1937 PyObject *cookieObj, *posobj;
1938 CookieStruct cookie;
1939 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940 PyObject *res;
1941 int cmp;
1942
1943 CHECK_INITIALIZED(self);
1944
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1946 return NULL;
1947 CHECK_CLOSED(self);
1948
1949 Py_INCREF(cookieObj);
1950
1951 if (!self->seekable) {
1952 PyErr_SetString(PyExc_IOError,
1953 "underlying stream is not seekable");
1954 goto fail;
1955 }
1956
1957 if (whence == 1) {
1958 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001959 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960 if (cmp < 0)
1961 goto fail;
1962
1963 if (cmp == 0) {
1964 PyErr_SetString(PyExc_IOError,
1965 "can't do nonzero cur-relative seeks");
1966 goto fail;
1967 }
1968
1969 /* Seeking to the current position should attempt to
1970 * sync the underlying buffer with the current position.
1971 */
1972 Py_DECREF(cookieObj);
1973 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1974 if (cookieObj == NULL)
1975 goto fail;
1976 }
1977 else if (whence == 2) {
1978 /* seek relative to end of file */
1979
Antoine Pitroue4501852009-05-14 18:55:55 +00001980 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001981 if (cmp < 0)
1982 goto fail;
1983
1984 if (cmp == 0) {
1985 PyErr_SetString(PyExc_IOError,
1986 "can't do nonzero end-relative seeks");
1987 goto fail;
1988 }
1989
1990 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
1991 if (res == NULL)
1992 goto fail;
1993 Py_DECREF(res);
1994
1995 TextIOWrapper_set_decoded_chars(self, NULL);
1996 Py_CLEAR(self->snapshot);
1997 if (self->decoder) {
1998 res = PyObject_CallMethod(self->decoder, "reset", NULL);
1999 if (res == NULL)
2000 goto fail;
2001 Py_DECREF(res);
2002 }
2003
2004 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2005 Py_XDECREF(cookieObj);
2006 return res;
2007 }
2008 else if (whence != 0) {
2009 PyErr_Format(PyExc_ValueError,
2010 "invalid whence (%d, should be 0, 1 or 2)", whence);
2011 goto fail;
2012 }
2013
Antoine Pitroue4501852009-05-14 18:55:55 +00002014 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015 if (cmp < 0)
2016 goto fail;
2017
2018 if (cmp == 1) {
2019 PyErr_Format(PyExc_ValueError,
2020 "negative seek position %R", cookieObj);
2021 goto fail;
2022 }
2023
2024 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2025 if (res == NULL)
2026 goto fail;
2027 Py_DECREF(res);
2028
2029 /* The strategy of seek() is to go back to the safe start point
2030 * and replay the effect of read(chars_to_skip) from there.
2031 */
2032 if (TextIOWrapper_parseCookie(&cookie, cookieObj) < 0)
2033 goto fail;
2034
2035 /* Seek back to the safe start point. */
2036 posobj = PyLong_FromOff_t(cookie.start_pos);
2037 if (posobj == NULL)
2038 goto fail;
2039 res = PyObject_CallMethodObjArgs(self->buffer,
2040 _PyIO_str_seek, posobj, NULL);
2041 Py_DECREF(posobj);
2042 if (res == NULL)
2043 goto fail;
2044 Py_DECREF(res);
2045
2046 TextIOWrapper_set_decoded_chars(self, NULL);
2047 Py_CLEAR(self->snapshot);
2048
2049 /* Restore the decoder to its state from the safe start point. */
2050 if (self->decoder) {
2051 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2052 goto fail;
2053 }
2054
2055 if (cookie.chars_to_skip) {
2056 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2057 PyObject *input_chunk = PyObject_CallMethod(
2058 self->buffer, "read", "i", cookie.bytes_to_feed);
2059 PyObject *decoded;
2060
2061 if (input_chunk == NULL)
2062 goto fail;
2063
2064 assert (PyBytes_Check(input_chunk));
2065
2066 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2067 if (self->snapshot == NULL) {
2068 Py_DECREF(input_chunk);
2069 goto fail;
2070 }
2071
2072 decoded = PyObject_CallMethod(self->decoder, "decode",
2073 "Oi", input_chunk, (int)cookie.need_eof);
2074
2075 if (decoded == NULL)
2076 goto fail;
2077
2078 TextIOWrapper_set_decoded_chars(self, decoded);
2079
2080 /* Skip chars_to_skip of the decoded characters. */
2081 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2082 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2083 goto fail;
2084 }
2085 self->decoded_chars_used = cookie.chars_to_skip;
2086 }
2087 else {
2088 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2089 if (self->snapshot == NULL)
2090 goto fail;
2091 }
2092
Antoine Pitroue4501852009-05-14 18:55:55 +00002093 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2094 if (self->encoder) {
2095 if (_TextIOWrapper_encoder_setstate(self, &cookie) < 0)
2096 goto fail;
2097 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 return cookieObj;
2099 fail:
2100 Py_XDECREF(cookieObj);
2101 return NULL;
2102
2103}
2104
2105static PyObject *
2106TextIOWrapper_tell(PyTextIOWrapperObject *self, PyObject *args)
2107{
2108 PyObject *res;
2109 PyObject *posobj = NULL;
2110 CookieStruct cookie = {0,0,0,0,0};
2111 PyObject *next_input;
2112 Py_ssize_t chars_to_skip, chars_decoded;
2113 PyObject *saved_state = NULL;
2114 char *input, *input_end;
2115
2116 CHECK_INITIALIZED(self);
2117 CHECK_CLOSED(self);
2118
2119 if (!self->seekable) {
2120 PyErr_SetString(PyExc_IOError,
2121 "underlying stream is not seekable");
2122 goto fail;
2123 }
2124 if (!self->telling) {
2125 PyErr_SetString(PyExc_IOError,
2126 "telling position disabled by next() call");
2127 goto fail;
2128 }
2129
2130 if (_TextIOWrapper_writeflush(self) < 0)
2131 return NULL;
2132 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2133 if (res == NULL)
2134 goto fail;
2135 Py_DECREF(res);
2136
2137 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2138 if (posobj == NULL)
2139 goto fail;
2140
2141 if (self->decoder == NULL || self->snapshot == NULL) {
2142 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2143 return posobj;
2144 }
2145
2146#if defined(HAVE_LARGEFILE_SUPPORT)
2147 cookie.start_pos = PyLong_AsLongLong(posobj);
2148#else
2149 cookie.start_pos = PyLong_AsLong(posobj);
2150#endif
2151 if (PyErr_Occurred())
2152 goto fail;
2153
2154 /* Skip backward to the snapshot point (see _read_chunk). */
2155 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2156 goto fail;
2157
2158 assert (PyBytes_Check(next_input));
2159
2160 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2161
2162 /* How many decoded characters have been used up since the snapshot? */
2163 if (self->decoded_chars_used == 0) {
2164 /* We haven't moved from the snapshot point. */
2165 Py_DECREF(posobj);
2166 return TextIOWrapper_buildCookie(&cookie);
2167 }
2168
2169 chars_to_skip = self->decoded_chars_used;
2170
2171 /* Starting from the snapshot position, we will walk the decoder
2172 * forward until it gives us enough decoded characters.
2173 */
2174 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2175 _PyIO_str_getstate, NULL);
2176 if (saved_state == NULL)
2177 goto fail;
2178
2179 /* Note our initial start point. */
2180 if (_TextIOWrapper_decoder_setstate(self, &cookie) < 0)
2181 goto fail;
2182
2183 /* Feed the decoder one byte at a time. As we go, note the
2184 * nearest "safe start point" before the current location
2185 * (a point where the decoder has nothing buffered, so seek()
2186 * can safely start from there and advance to this location).
2187 */
2188 chars_decoded = 0;
2189 input = PyBytes_AS_STRING(next_input);
2190 input_end = input + PyBytes_GET_SIZE(next_input);
2191 while (input < input_end) {
2192 PyObject *state;
2193 char *dec_buffer;
2194 Py_ssize_t dec_buffer_len;
2195 int dec_flags;
2196
2197 PyObject *decoded = PyObject_CallMethod(
2198 self->decoder, "decode", "y#", input, 1);
2199 if (decoded == NULL)
2200 goto fail;
2201 assert (PyUnicode_Check(decoded));
2202 chars_decoded += PyUnicode_GET_SIZE(decoded);
2203 Py_DECREF(decoded);
2204
2205 cookie.bytes_to_feed += 1;
2206
2207 state = PyObject_CallMethodObjArgs(self->decoder,
2208 _PyIO_str_getstate, NULL);
2209 if (state == NULL)
2210 goto fail;
2211 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2212 Py_DECREF(state);
2213 goto fail;
2214 }
2215 Py_DECREF(state);
2216
2217 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2218 /* Decoder buffer is empty, so this is a safe start point. */
2219 cookie.start_pos += cookie.bytes_to_feed;
2220 chars_to_skip -= chars_decoded;
2221 cookie.dec_flags = dec_flags;
2222 cookie.bytes_to_feed = 0;
2223 chars_decoded = 0;
2224 }
2225 if (chars_decoded >= chars_to_skip)
2226 break;
2227 input++;
2228 }
2229 if (input == input_end) {
2230 /* We didn't get enough decoded data; signal EOF to get more. */
2231 PyObject *decoded = PyObject_CallMethod(
2232 self->decoder, "decode", "yi", "", /* final = */ 1);
2233 if (decoded == NULL)
2234 goto fail;
2235 assert (PyUnicode_Check(decoded));
2236 chars_decoded += PyUnicode_GET_SIZE(decoded);
2237 Py_DECREF(decoded);
2238 cookie.need_eof = 1;
2239
2240 if (chars_decoded < chars_to_skip) {
2241 PyErr_SetString(PyExc_IOError,
2242 "can't reconstruct logical file position");
2243 goto fail;
2244 }
2245 }
2246
2247 /* finally */
2248 Py_XDECREF(posobj);
2249 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2250 Py_DECREF(saved_state);
2251 if (res == NULL)
2252 return NULL;
2253 Py_DECREF(res);
2254
2255 /* The returned cookie corresponds to the last safe start point. */
2256 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2257 return TextIOWrapper_buildCookie(&cookie);
2258
2259 fail:
2260 Py_XDECREF(posobj);
2261 if (saved_state) {
2262 PyObject *type, *value, *traceback;
2263 PyErr_Fetch(&type, &value, &traceback);
2264
2265 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2266 Py_DECREF(saved_state);
2267 if (res == NULL)
2268 return NULL;
2269 Py_DECREF(res);
2270
2271 PyErr_Restore(type, value, traceback);
2272 }
2273 return NULL;
2274}
2275
2276static PyObject *
2277TextIOWrapper_truncate(PyTextIOWrapperObject *self, PyObject *args)
2278{
2279 PyObject *pos = Py_None;
2280 PyObject *res;
2281
2282 CHECK_INITIALIZED(self)
2283 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2284 return NULL;
2285 }
2286
2287 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2288 if (res == NULL)
2289 return NULL;
2290 Py_DECREF(res);
2291
2292 if (pos != Py_None) {
2293 res = PyObject_CallMethodObjArgs((PyObject *) self,
2294 _PyIO_str_seek, pos, NULL);
2295 if (res == NULL)
2296 return NULL;
2297 Py_DECREF(res);
2298 }
2299
2300 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2301}
2302
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002303static PyObject *
2304TextIOWrapper_repr(PyTextIOWrapperObject *self)
2305{
2306 CHECK_INITIALIZED(self);
2307 return PyUnicode_FromFormat("<TextIOWrapper encoding=%S>", self->encoding);
2308}
2309
2310
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002311/* Inquiries */
2312
2313static PyObject *
2314TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
2315{
2316 CHECK_INITIALIZED(self);
2317 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2318}
2319
2320static PyObject *
2321TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
2322{
2323 CHECK_INITIALIZED(self);
2324 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2325}
2326
2327static PyObject *
2328TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
2329{
2330 CHECK_INITIALIZED(self);
2331 return PyObject_CallMethod(self->buffer, "readable", NULL);
2332}
2333
2334static PyObject *
2335TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
2336{
2337 CHECK_INITIALIZED(self);
2338 return PyObject_CallMethod(self->buffer, "writable", NULL);
2339}
2340
2341static PyObject *
2342TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
2343{
2344 CHECK_INITIALIZED(self);
2345 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2346}
2347
2348static PyObject *
2349TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
2350{
2351 CHECK_INITIALIZED(self);
2352 CHECK_CLOSED(self);
2353 self->telling = self->seekable;
2354 if (_TextIOWrapper_writeflush(self) < 0)
2355 return NULL;
2356 return PyObject_CallMethod(self->buffer, "flush", NULL);
2357}
2358
2359static PyObject *
2360TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
2361{
2362 PyObject *res;
2363 CHECK_INITIALIZED(self);
2364 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2365 if (res == NULL) {
2366 /* If flush() fails, just give up */
2367 PyErr_Clear();
2368 }
2369 else
2370 Py_DECREF(res);
2371
2372 return PyObject_CallMethod(self->buffer, "close", NULL);
2373}
2374
2375static PyObject *
2376TextIOWrapper_iternext(PyTextIOWrapperObject *self)
2377{
2378 PyObject *line;
2379
2380 CHECK_INITIALIZED(self);
2381
2382 self->telling = 0;
2383 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2384 /* Skip method call overhead for speed */
2385 line = _TextIOWrapper_readline(self, -1);
2386 }
2387 else {
2388 line = PyObject_CallMethodObjArgs((PyObject *)self,
2389 _PyIO_str_readline, NULL);
2390 if (line && !PyUnicode_Check(line)) {
2391 PyErr_Format(PyExc_IOError,
2392 "readline() should have returned an str object, "
2393 "not '%.200s'", Py_TYPE(line)->tp_name);
2394 Py_DECREF(line);
2395 return NULL;
2396 }
2397 }
2398
2399 if (line == NULL)
2400 return NULL;
2401
2402 if (PyUnicode_GET_SIZE(line) == 0) {
2403 /* Reached EOF or would have blocked */
2404 Py_DECREF(line);
2405 Py_CLEAR(self->snapshot);
2406 self->telling = self->seekable;
2407 return NULL;
2408 }
2409
2410 return line;
2411}
2412
2413static PyObject *
2414TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
2415{
2416 CHECK_INITIALIZED(self);
2417 return PyObject_GetAttrString(self->buffer, "name");
2418}
2419
2420static PyObject *
2421TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
2422{
2423 CHECK_INITIALIZED(self);
2424 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2425}
2426
2427static PyObject *
2428TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
2429{
2430 PyObject *res;
2431 CHECK_INITIALIZED(self);
2432 if (self->decoder == NULL)
2433 Py_RETURN_NONE;
2434 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2435 if (res == NULL) {
2436 PyErr_Clear();
2437 Py_RETURN_NONE;
2438 }
2439 return res;
2440}
2441
2442static PyObject *
2443TextIOWrapper_chunk_size_get(PyTextIOWrapperObject *self, void *context)
2444{
2445 CHECK_INITIALIZED(self);
2446 return PyLong_FromSsize_t(self->chunk_size);
2447}
2448
2449static int
2450TextIOWrapper_chunk_size_set(PyTextIOWrapperObject *self,
2451 PyObject *arg, void *context)
2452{
2453 Py_ssize_t n;
2454 CHECK_INITIALIZED_INT(self);
2455 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2456 if (n == -1 && PyErr_Occurred())
2457 return -1;
2458 if (n <= 0) {
2459 PyErr_SetString(PyExc_ValueError,
2460 "a strictly positive integer is required");
2461 return -1;
2462 }
2463 self->chunk_size = n;
2464 return 0;
2465}
2466
2467static PyMethodDef TextIOWrapper_methods[] = {
Benjamin Petersond2e0c792009-05-01 20:40:59 +00002468 {"detach", (PyCFunction)TextIOWrapper_detach, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469 {"write", (PyCFunction)TextIOWrapper_write, METH_VARARGS},
2470 {"read", (PyCFunction)TextIOWrapper_read, METH_VARARGS},
2471 {"readline", (PyCFunction)TextIOWrapper_readline, METH_VARARGS},
2472 {"flush", (PyCFunction)TextIOWrapper_flush, METH_NOARGS},
2473 {"close", (PyCFunction)TextIOWrapper_close, METH_NOARGS},
2474
2475 {"fileno", (PyCFunction)TextIOWrapper_fileno, METH_NOARGS},
2476 {"seekable", (PyCFunction)TextIOWrapper_seekable, METH_NOARGS},
2477 {"readable", (PyCFunction)TextIOWrapper_readable, METH_NOARGS},
2478 {"writable", (PyCFunction)TextIOWrapper_writable, METH_NOARGS},
2479 {"isatty", (PyCFunction)TextIOWrapper_isatty, METH_NOARGS},
2480
2481 {"seek", (PyCFunction)TextIOWrapper_seek, METH_VARARGS},
2482 {"tell", (PyCFunction)TextIOWrapper_tell, METH_NOARGS},
2483 {"truncate", (PyCFunction)TextIOWrapper_truncate, METH_VARARGS},
2484 {NULL, NULL}
2485};
2486
2487static PyMemberDef TextIOWrapper_members[] = {
2488 {"encoding", T_OBJECT, offsetof(PyTextIOWrapperObject, encoding), READONLY},
2489 {"buffer", T_OBJECT, offsetof(PyTextIOWrapperObject, buffer), READONLY},
2490 {"line_buffering", T_BOOL, offsetof(PyTextIOWrapperObject, line_buffering), READONLY},
2491 {NULL}
2492};
2493
2494static PyGetSetDef TextIOWrapper_getset[] = {
2495 {"name", (getter)TextIOWrapper_name_get, NULL, NULL},
2496 {"closed", (getter)TextIOWrapper_closed_get, NULL, NULL},
2497/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2498*/
2499 {"newlines", (getter)TextIOWrapper_newlines_get, NULL, NULL},
2500 {"_CHUNK_SIZE", (getter)TextIOWrapper_chunk_size_get,
2501 (setter)TextIOWrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002502 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503};
2504
2505PyTypeObject PyTextIOWrapper_Type = {
2506 PyVarObject_HEAD_INIT(NULL, 0)
2507 "_io.TextIOWrapper", /*tp_name*/
2508 sizeof(PyTextIOWrapperObject), /*tp_basicsize*/
2509 0, /*tp_itemsize*/
2510 (destructor)TextIOWrapper_dealloc, /*tp_dealloc*/
2511 0, /*tp_print*/
2512 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002513 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514 0, /*tp_compare */
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002515 (reprfunc)TextIOWrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002516 0, /*tp_as_number*/
2517 0, /*tp_as_sequence*/
2518 0, /*tp_as_mapping*/
2519 0, /*tp_hash */
2520 0, /*tp_call*/
2521 0, /*tp_str*/
2522 0, /*tp_getattro*/
2523 0, /*tp_setattro*/
2524 0, /*tp_as_buffer*/
2525 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2526 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
2527 TextIOWrapper_doc, /* tp_doc */
2528 (traverseproc)TextIOWrapper_traverse, /* tp_traverse */
2529 (inquiry)TextIOWrapper_clear, /* tp_clear */
2530 0, /* tp_richcompare */
2531 offsetof(PyTextIOWrapperObject, weakreflist), /*tp_weaklistoffset*/
2532 0, /* tp_iter */
2533 (iternextfunc)TextIOWrapper_iternext, /* tp_iternext */
2534 TextIOWrapper_methods, /* tp_methods */
2535 TextIOWrapper_members, /* tp_members */
2536 TextIOWrapper_getset, /* tp_getset */
2537 0, /* tp_base */
2538 0, /* tp_dict */
2539 0, /* tp_descr_get */
2540 0, /* tp_descr_set */
2541 offsetof(PyTextIOWrapperObject, dict), /*tp_dictoffset*/
2542 (initproc)TextIOWrapper_init, /* tp_init */
2543 0, /* tp_alloc */
2544 PyType_GenericNew, /* tp_new */
2545};