blob: 2d516ee3ccdbb706a9ac60b6552103aec2ba2af2 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200625 "newline controls how line endings are handled. It can be None, '',\n"
626 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
627 "\n"
628 "* On input, if newline is None, universal newlines mode is\n"
629 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
630 " these are translated into '\\n' before being returned to the\n"
631 " caller. If it is '', universal newline mode is enabled, but line\n"
632 " endings are returned to the caller untranslated. If it has any of\n"
633 " the other legal values, input lines are only terminated by the given\n"
634 " string, and the line ending is returned to the caller untranslated.\n"
635 "\n"
636 "* On output, if newline is None, any '\\n' characters written are\n"
637 " translated to the system default line separator, os.linesep. If\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200638 " newline is '' or '\n', no translation takes place. If newline is any\n"
639 " of the other legal values, any '\\n' characters written are translated\n"
640 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000641 "\n"
642 "If line_buffering is True, a call to flush is implied when a call to\n"
643 "write contains a newline character."
644 );
645
646typedef PyObject *
647 (*encodefunc_t)(PyObject *, PyObject *);
648
649typedef struct
650{
651 PyObject_HEAD
652 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000653 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 Py_ssize_t chunk_size;
655 PyObject *buffer;
656 PyObject *encoding;
657 PyObject *encoder;
658 PyObject *decoder;
659 PyObject *readnl;
660 PyObject *errors;
661 const char *writenl; /* utf-8 encoded, NULL stands for \n */
662 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200663 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664 char readuniversal;
665 char readtranslate;
666 char writetranslate;
667 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200668 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000670 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 /* Specialized encoding func (see below) */
672 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000673 /* Whether or not it's the start of the stream */
674 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000675
676 /* Reads and writes are internally buffered in order to speed things up.
677 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000678
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000679 Please also note that text to be written is first encoded before being
680 buffered. This is necessary so that encoding errors are immediately
681 reported to the caller, but it unfortunately means that the
682 IncrementalEncoder (whose encode() method is always written in Python)
683 becomes a bottleneck for small writes.
684 */
685 PyObject *decoded_chars; /* buffer for text returned from decoder */
686 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
687 PyObject *pending_bytes; /* list of bytes objects waiting to be
688 written, or NULL */
689 Py_ssize_t pending_bytes_count;
690 PyObject *snapshot;
691 /* snapshot is either None, or a tuple (dec_flags, next_input) where
692 * dec_flags is the second (integer) item of the decoder state and
693 * next_input is the chunk of input bytes that comes next after the
694 * snapshot point. We use this to reconstruct decoder states in tell().
695 */
696
697 /* Cache raw object if it's a FileIO object */
698 PyObject *raw;
699
700 PyObject *weakreflist;
701 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000702} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703
704
705/* A couple of specialized cases in order to bypass the slow incremental
706 encoding methods for the most popular encodings. */
707
708static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000709ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710{
711 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
712 PyUnicode_GET_SIZE(text),
713 PyBytes_AS_STRING(self->errors));
714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
719 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
720 PyUnicode_GET_SIZE(text),
721 PyBytes_AS_STRING(self->errors), 1);
722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
727 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
728 PyUnicode_GET_SIZE(text),
729 PyBytes_AS_STRING(self->errors), -1);
730}
731
732static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000733utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734{
Antoine Pitroue4501852009-05-14 18:55:55 +0000735 if (!self->encoding_start_of_stream) {
736 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000738 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000740 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000742 }
743 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
744 PyUnicode_GET_SIZE(text),
745 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746}
747
Antoine Pitroue4501852009-05-14 18:55:55 +0000748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
751 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
752 PyUnicode_GET_SIZE(text),
753 PyBytes_AS_STRING(self->errors), 1);
754}
755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000758{
759 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
760 PyUnicode_GET_SIZE(text),
761 PyBytes_AS_STRING(self->errors), -1);
762}
763
764static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000765utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000766{
767 if (!self->encoding_start_of_stream) {
768 /* Skip the BOM and use native byte ordering */
769#if defined(WORDS_BIGENDIAN)
770 return utf32be_encode(self, text);
771#else
772 return utf32le_encode(self, text);
773#endif
774 }
775 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
776 PyUnicode_GET_SIZE(text),
777 PyBytes_AS_STRING(self->errors), 0);
778}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
780static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000781utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782{
783 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
784 PyUnicode_GET_SIZE(text),
785 PyBytes_AS_STRING(self->errors));
786}
787
788static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000789latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790{
791 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
792 PyUnicode_GET_SIZE(text),
793 PyBytes_AS_STRING(self->errors));
794}
795
796/* Map normalized encoding names onto the specialized encoding funcs */
797
798typedef struct {
799 const char *name;
800 encodefunc_t encodefunc;
801} encodefuncentry;
802
Antoine Pitrou24f36292009-03-28 22:16:42 +0000803static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 {"ascii", (encodefunc_t) ascii_encode},
805 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000806 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807 {"utf-16-be", (encodefunc_t) utf16be_encode},
808 {"utf-16-le", (encodefunc_t) utf16le_encode},
809 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000810 {"utf-32-be", (encodefunc_t) utf32be_encode},
811 {"utf-32-le", (encodefunc_t) utf32le_encode},
812 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000813 {NULL, NULL}
814};
815
816
817static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000818textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819{
820 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200821 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 NULL};
823 PyObject *buffer, *raw;
824 char *encoding = NULL;
825 char *errors = NULL;
826 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200827 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 _PyIO_State *state = IO_STATE;
829
830 PyObject *res;
831 int r;
832
833 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000834 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200835 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200837 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 return -1;
839
840 if (newline && newline[0] != '\0'
841 && !(newline[0] == '\n' && newline[1] == '\0')
842 && !(newline[0] == '\r' && newline[1] == '\0')
843 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
844 PyErr_Format(PyExc_ValueError,
845 "illegal newline value: %s", newline);
846 return -1;
847 }
848
849 Py_CLEAR(self->buffer);
850 Py_CLEAR(self->encoding);
851 Py_CLEAR(self->encoder);
852 Py_CLEAR(self->decoder);
853 Py_CLEAR(self->readnl);
854 Py_CLEAR(self->decoded_chars);
855 Py_CLEAR(self->pending_bytes);
856 Py_CLEAR(self->snapshot);
857 Py_CLEAR(self->errors);
858 Py_CLEAR(self->raw);
859 self->decoded_chars_used = 0;
860 self->pending_bytes_count = 0;
861 self->encodefunc = NULL;
862
863 if (encoding == NULL) {
864 /* Try os.device_encoding(fileno) */
865 PyObject *fileno;
866 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
867 /* Ignore only AttributeError and UnsupportedOperation */
868 if (fileno == NULL) {
869 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
870 PyErr_ExceptionMatches(state->unsupported_operation)) {
871 PyErr_Clear();
872 }
873 else {
874 goto error;
875 }
876 }
877 else {
878 self->encoding = PyObject_CallMethod(state->os_module,
879 "device_encoding",
880 "N", fileno);
881 if (self->encoding == NULL)
882 goto error;
883 else if (!PyUnicode_Check(self->encoding))
884 Py_CLEAR(self->encoding);
885 }
886 }
887 if (encoding == NULL && self->encoding == NULL) {
888 if (state->locale_module == NULL) {
889 state->locale_module = PyImport_ImportModule("locale");
890 if (state->locale_module == NULL)
891 goto catch_ImportError;
892 else
893 goto use_locale;
894 }
895 else {
896 use_locale:
897 self->encoding = PyObject_CallMethod(
898 state->locale_module, "getpreferredencoding", NULL);
899 if (self->encoding == NULL) {
900 catch_ImportError:
901 /*
902 Importing locale can raise a ImportError because of
903 _functools, and locale.getpreferredencoding can raise a
904 ImportError if _locale is not available. These will happen
905 during module building.
906 */
907 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
908 PyErr_Clear();
909 self->encoding = PyUnicode_FromString("ascii");
910 }
911 else
912 goto error;
913 }
914 else if (!PyUnicode_Check(self->encoding))
915 Py_CLEAR(self->encoding);
916 }
917 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000918 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000920 if (encoding == NULL)
921 goto error;
922 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000923 else if (encoding != NULL) {
924 self->encoding = PyUnicode_FromString(encoding);
925 if (self->encoding == NULL)
926 goto error;
927 }
928 else {
929 PyErr_SetString(PyExc_IOError,
930 "could not determine default encoding");
931 }
932
933 if (errors == NULL)
934 errors = "strict";
935 self->errors = PyBytes_FromString(errors);
936 if (self->errors == NULL)
937 goto error;
938
939 self->chunk_size = 8192;
940 self->readuniversal = (newline == NULL || newline[0] == '\0');
941 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200942 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 self->readtranslate = (newline == NULL);
944 if (newline) {
945 self->readnl = PyUnicode_FromString(newline);
946 if (self->readnl == NULL)
947 return -1;
948 }
949 self->writetranslate = (newline == NULL || newline[0] != '\0');
950 if (!self->readuniversal && self->readnl) {
951 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000952 if (self->writenl == NULL)
953 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 if (!strcmp(self->writenl, "\n"))
955 self->writenl = NULL;
956 }
957#ifdef MS_WINDOWS
958 else
959 self->writenl = "\r\n";
960#endif
961
962 /* Build the decoder object */
963 res = PyObject_CallMethod(buffer, "readable", NULL);
964 if (res == NULL)
965 goto error;
966 r = PyObject_IsTrue(res);
967 Py_DECREF(res);
968 if (r == -1)
969 goto error;
970 if (r == 1) {
971 self->decoder = PyCodec_IncrementalDecoder(
972 encoding, errors);
973 if (self->decoder == NULL)
974 goto error;
975
976 if (self->readuniversal) {
977 PyObject *incrementalDecoder = PyObject_CallFunction(
978 (PyObject *)&PyIncrementalNewlineDecoder_Type,
979 "Oi", self->decoder, (int)self->readtranslate);
980 if (incrementalDecoder == NULL)
981 goto error;
982 Py_CLEAR(self->decoder);
983 self->decoder = incrementalDecoder;
984 }
985 }
986
987 /* Build the encoder object */
988 res = PyObject_CallMethod(buffer, "writable", NULL);
989 if (res == NULL)
990 goto error;
991 r = PyObject_IsTrue(res);
992 Py_DECREF(res);
993 if (r == -1)
994 goto error;
995 if (r == 1) {
996 PyObject *ci;
997 self->encoder = PyCodec_IncrementalEncoder(
998 encoding, errors);
999 if (self->encoder == NULL)
1000 goto error;
1001 /* Get the normalized named of the codec */
1002 ci = _PyCodec_Lookup(encoding);
1003 if (ci == NULL)
1004 goto error;
1005 res = PyObject_GetAttrString(ci, "name");
1006 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001007 if (res == NULL) {
1008 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1009 PyErr_Clear();
1010 else
1011 goto error;
1012 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 else if (PyUnicode_Check(res)) {
1014 encodefuncentry *e = encodefuncs;
1015 while (e->name != NULL) {
1016 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1017 self->encodefunc = e->encodefunc;
1018 break;
1019 }
1020 e++;
1021 }
1022 }
1023 Py_XDECREF(res);
1024 }
1025
1026 self->buffer = buffer;
1027 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1030 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1031 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1032 raw = PyObject_GetAttrString(buffer, "raw");
1033 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001034 if (raw == NULL) {
1035 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1036 PyErr_Clear();
1037 else
1038 goto error;
1039 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 else if (Py_TYPE(raw) == &PyFileIO_Type)
1041 self->raw = raw;
1042 else
1043 Py_DECREF(raw);
1044 }
1045
1046 res = PyObject_CallMethod(buffer, "seekable", NULL);
1047 if (res == NULL)
1048 goto error;
1049 self->seekable = self->telling = PyObject_IsTrue(res);
1050 Py_DECREF(res);
1051
Antoine Pitroue96ec682011-07-23 21:46:35 +02001052 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1053
Antoine Pitroue4501852009-05-14 18:55:55 +00001054 self->encoding_start_of_stream = 0;
1055 if (self->seekable && self->encoder) {
1056 PyObject *cookieObj;
1057 int cmp;
1058
1059 self->encoding_start_of_stream = 1;
1060
1061 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1062 if (cookieObj == NULL)
1063 goto error;
1064
1065 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1066 Py_DECREF(cookieObj);
1067 if (cmp < 0) {
1068 goto error;
1069 }
1070
1071 if (cmp == 0) {
1072 self->encoding_start_of_stream = 0;
1073 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1074 _PyIO_zero, NULL);
1075 if (res == NULL)
1076 goto error;
1077 Py_DECREF(res);
1078 }
1079 }
1080
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 self->ok = 1;
1082 return 0;
1083
1084 error:
1085 return -1;
1086}
1087
1088static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001089_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090{
1091 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1092 return -1;
1093 self->ok = 0;
1094 Py_CLEAR(self->buffer);
1095 Py_CLEAR(self->encoding);
1096 Py_CLEAR(self->encoder);
1097 Py_CLEAR(self->decoder);
1098 Py_CLEAR(self->readnl);
1099 Py_CLEAR(self->decoded_chars);
1100 Py_CLEAR(self->pending_bytes);
1101 Py_CLEAR(self->snapshot);
1102 Py_CLEAR(self->errors);
1103 Py_CLEAR(self->raw);
1104 return 0;
1105}
1106
1107static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001108textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109{
Antoine Pitroue033e062010-10-29 10:38:18 +00001110 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001111 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112 return;
1113 _PyObject_GC_UNTRACK(self);
1114 if (self->weakreflist != NULL)
1115 PyObject_ClearWeakRefs((PyObject *)self);
1116 Py_CLEAR(self->dict);
1117 Py_TYPE(self)->tp_free((PyObject *)self);
1118}
1119
1120static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
1123 Py_VISIT(self->buffer);
1124 Py_VISIT(self->encoding);
1125 Py_VISIT(self->encoder);
1126 Py_VISIT(self->decoder);
1127 Py_VISIT(self->readnl);
1128 Py_VISIT(self->decoded_chars);
1129 Py_VISIT(self->pending_bytes);
1130 Py_VISIT(self->snapshot);
1131 Py_VISIT(self->errors);
1132 Py_VISIT(self->raw);
1133
1134 Py_VISIT(self->dict);
1135 return 0;
1136}
1137
1138static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001139textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001141 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 return -1;
1143 Py_CLEAR(self->dict);
1144 return 0;
1145}
1146
1147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001148textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149
1150/* This macro takes some shortcuts to make the common case faster. */
1151#define CHECK_CLOSED(self) \
1152 do { \
1153 int r; \
1154 PyObject *_res; \
1155 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1156 if (self->raw != NULL) \
1157 r = _PyFileIO_closed(self->raw); \
1158 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001159 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 if (_res == NULL) \
1161 return NULL; \
1162 r = PyObject_IsTrue(_res); \
1163 Py_DECREF(_res); \
1164 if (r < 0) \
1165 return NULL; \
1166 } \
1167 if (r > 0) { \
1168 PyErr_SetString(PyExc_ValueError, \
1169 "I/O operation on closed file."); \
1170 return NULL; \
1171 } \
1172 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001173 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 return NULL; \
1175 } while (0)
1176
1177#define CHECK_INITIALIZED(self) \
1178 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001179 if (self->detached) { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "underlying buffer has been detached"); \
1182 } else { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "I/O operation on uninitialized object"); \
1185 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 return NULL; \
1187 }
1188
1189#define CHECK_INITIALIZED_INT(self) \
1190 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001191 if (self->detached) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "underlying buffer has been detached"); \
1194 } else { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "I/O operation on uninitialized object"); \
1197 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return -1; \
1199 }
1200
1201
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001203textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204{
1205 PyObject *buffer, *res;
1206 CHECK_INITIALIZED(self);
1207 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1208 if (res == NULL)
1209 return NULL;
1210 Py_DECREF(res);
1211 buffer = self->buffer;
1212 self->buffer = NULL;
1213 self->detached = 1;
1214 self->ok = 0;
1215 return buffer;
1216}
1217
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001218Py_LOCAL_INLINE(const Py_UNICODE *)
1219findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1220{
1221 /* like wcschr, but doesn't stop at NULL characters */
1222 while (size-- > 0) {
1223 if (*s == ch)
1224 return s;
1225 s++;
1226 }
1227 return NULL;
1228}
1229
Antoine Pitrou24f36292009-03-28 22:16:42 +00001230/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001231 underlying buffered object, though. */
1232static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001233_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001235 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236
1237 if (self->pending_bytes == NULL)
1238 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001239
1240 pending = self->pending_bytes;
1241 Py_INCREF(pending);
1242 self->pending_bytes_count = 0;
1243 Py_CLEAR(self->pending_bytes);
1244
1245 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1246 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 if (b == NULL)
1248 return -1;
1249 ret = PyObject_CallMethodObjArgs(self->buffer,
1250 _PyIO_str_write, b, NULL);
1251 Py_DECREF(b);
1252 if (ret == NULL)
1253 return -1;
1254 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 return 0;
1256}
1257
1258static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001259textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001260{
1261 PyObject *ret;
1262 PyObject *text; /* owned reference */
1263 PyObject *b;
1264 Py_ssize_t textlen;
1265 int haslf = 0;
1266 int needflush = 0;
1267
1268 CHECK_INITIALIZED(self);
1269
1270 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1271 return NULL;
1272 }
1273
1274 CHECK_CLOSED(self);
1275
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001276 if (self->encoder == NULL)
1277 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001278
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 Py_INCREF(text);
1280
1281 textlen = PyUnicode_GetSize(text);
1282
1283 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1284 if (findchar(PyUnicode_AS_UNICODE(text),
1285 PyUnicode_GET_SIZE(text), '\n'))
1286 haslf = 1;
1287
1288 if (haslf && self->writetranslate && self->writenl != NULL) {
1289 PyObject *newtext = PyObject_CallMethod(
1290 text, "replace", "ss", "\n", self->writenl);
1291 Py_DECREF(text);
1292 if (newtext == NULL)
1293 return NULL;
1294 text = newtext;
1295 }
1296
Antoine Pitroue96ec682011-07-23 21:46:35 +02001297 if (self->write_through)
1298 needflush = 1;
1299 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300 (haslf ||
1301 findchar(PyUnicode_AS_UNICODE(text),
1302 PyUnicode_GET_SIZE(text), '\r')))
1303 needflush = 1;
1304
1305 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001306 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001308 self->encoding_start_of_stream = 0;
1309 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001310 else
1311 b = PyObject_CallMethodObjArgs(self->encoder,
1312 _PyIO_str_encode, text, NULL);
1313 Py_DECREF(text);
1314 if (b == NULL)
1315 return NULL;
1316
1317 if (self->pending_bytes == NULL) {
1318 self->pending_bytes = PyList_New(0);
1319 if (self->pending_bytes == NULL) {
1320 Py_DECREF(b);
1321 return NULL;
1322 }
1323 self->pending_bytes_count = 0;
1324 }
1325 if (PyList_Append(self->pending_bytes, b) < 0) {
1326 Py_DECREF(b);
1327 return NULL;
1328 }
1329 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1330 Py_DECREF(b);
1331 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001332 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333 return NULL;
1334 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001335
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 if (needflush) {
1337 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1338 if (ret == NULL)
1339 return NULL;
1340 Py_DECREF(ret);
1341 }
1342
1343 Py_CLEAR(self->snapshot);
1344
1345 if (self->decoder) {
1346 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1347 if (ret == NULL)
1348 return NULL;
1349 Py_DECREF(ret);
1350 }
1351
1352 return PyLong_FromSsize_t(textlen);
1353}
1354
1355/* Steal a reference to chars and store it in the decoded_char buffer;
1356 */
1357static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001358textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359{
1360 Py_CLEAR(self->decoded_chars);
1361 self->decoded_chars = chars;
1362 self->decoded_chars_used = 0;
1363}
1364
1365static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001366textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367{
1368 PyObject *chars;
1369 Py_ssize_t avail;
1370
1371 if (self->decoded_chars == NULL)
1372 return PyUnicode_FromStringAndSize(NULL, 0);
1373
1374 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1375 - self->decoded_chars_used);
1376
1377 assert(avail >= 0);
1378
1379 if (n < 0 || n > avail)
1380 n = avail;
1381
1382 if (self->decoded_chars_used > 0 || n < avail) {
1383 chars = PyUnicode_FromUnicode(
1384 PyUnicode_AS_UNICODE(self->decoded_chars)
1385 + self->decoded_chars_used, n);
1386 if (chars == NULL)
1387 return NULL;
1388 }
1389 else {
1390 chars = self->decoded_chars;
1391 Py_INCREF(chars);
1392 }
1393
1394 self->decoded_chars_used += n;
1395 return chars;
1396}
1397
1398/* Read and decode the next chunk of data from the BufferedReader.
1399 */
1400static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001401textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402{
1403 PyObject *dec_buffer = NULL;
1404 PyObject *dec_flags = NULL;
1405 PyObject *input_chunk = NULL;
1406 PyObject *decoded_chars, *chunk_size;
1407 int eof;
1408
1409 /* The return value is True unless EOF was reached. The decoded string is
1410 * placed in self._decoded_chars (replacing its previous value). The
1411 * entire input chunk is sent to the decoder, though some of it may remain
1412 * buffered in the decoder, yet to be converted.
1413 */
1414
1415 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001416 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001417 return -1;
1418 }
1419
1420 if (self->telling) {
1421 /* To prepare for tell(), we need to snapshot a point in the file
1422 * where the decoder's input buffer is empty.
1423 */
1424
1425 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1426 _PyIO_str_getstate, NULL);
1427 if (state == NULL)
1428 return -1;
1429 /* Given this, we know there was a valid snapshot point
1430 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1431 */
1432 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1433 Py_DECREF(state);
1434 return -1;
1435 }
1436 Py_INCREF(dec_buffer);
1437 Py_INCREF(dec_flags);
1438 Py_DECREF(state);
1439 }
1440
1441 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1442 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1443 if (chunk_size == NULL)
1444 goto fail;
1445 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001446 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1447 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448 Py_DECREF(chunk_size);
1449 if (input_chunk == NULL)
1450 goto fail;
1451 assert(PyBytes_Check(input_chunk));
1452
1453 eof = (PyBytes_Size(input_chunk) == 0);
1454
1455 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1456 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1457 self->decoder, input_chunk, eof);
1458 }
1459 else {
1460 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1461 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1462 }
1463
1464 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1465 if (decoded_chars == NULL)
1466 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001467 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1469 eof = 0;
1470
1471 if (self->telling) {
1472 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1473 * next input to be decoded is dec_buffer + input_chunk.
1474 */
1475 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1476 if (next_input == NULL)
1477 goto fail;
1478 assert (PyBytes_Check(next_input));
1479 Py_DECREF(dec_buffer);
1480 Py_CLEAR(self->snapshot);
1481 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1482 }
1483 Py_DECREF(input_chunk);
1484
1485 return (eof == 0);
1486
1487 fail:
1488 Py_XDECREF(dec_buffer);
1489 Py_XDECREF(dec_flags);
1490 Py_XDECREF(input_chunk);
1491 return -1;
1492}
1493
1494static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001495textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496{
1497 Py_ssize_t n = -1;
1498 PyObject *result = NULL, *chunks = NULL;
1499
1500 CHECK_INITIALIZED(self);
1501
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001502 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 return NULL;
1504
1505 CHECK_CLOSED(self);
1506
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001507 if (self->decoder == NULL)
1508 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001509
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 return NULL;
1512
1513 if (n < 0) {
1514 /* Read everything */
1515 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1516 PyObject *decoded;
1517 if (bytes == NULL)
1518 goto fail;
1519 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1520 bytes, Py_True, NULL);
1521 Py_DECREF(bytes);
1522 if (decoded == NULL)
1523 goto fail;
1524
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001525 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526
1527 if (result == NULL) {
1528 Py_DECREF(decoded);
1529 return NULL;
1530 }
1531
1532 PyUnicode_AppendAndDel(&result, decoded);
1533 if (result == NULL)
1534 goto fail;
1535
1536 Py_CLEAR(self->snapshot);
1537 return result;
1538 }
1539 else {
1540 int res = 1;
1541 Py_ssize_t remaining = n;
1542
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001543 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544 if (result == NULL)
1545 goto fail;
1546 remaining -= PyUnicode_GET_SIZE(result);
1547
1548 /* Keep reading chunks until we have n characters to return */
1549 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001550 res = textiowrapper_read_chunk(self);
Gregory P. Smith51359922012-06-23 23:55:39 -07001551 if (res < 0) {
1552 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1553 when EINTR occurs so we needn't do it ourselves. */
1554 if (_PyIO_trap_eintr()) {
1555 continue;
1556 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001557 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001558 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 if (res == 0) /* EOF */
1560 break;
1561 if (chunks == NULL) {
1562 chunks = PyList_New(0);
1563 if (chunks == NULL)
1564 goto fail;
1565 }
1566 if (PyList_Append(chunks, result) < 0)
1567 goto fail;
1568 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001569 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570 if (result == NULL)
1571 goto fail;
1572 remaining -= PyUnicode_GET_SIZE(result);
1573 }
1574 if (chunks != NULL) {
1575 if (result != NULL && PyList_Append(chunks, result) < 0)
1576 goto fail;
1577 Py_CLEAR(result);
1578 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1579 if (result == NULL)
1580 goto fail;
1581 Py_CLEAR(chunks);
1582 }
1583 return result;
1584 }
1585 fail:
1586 Py_XDECREF(result);
1587 Py_XDECREF(chunks);
1588 return NULL;
1589}
1590
1591
1592/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1593 that is to the NUL character. Otherwise the function will produce
1594 incorrect results. */
1595static Py_UNICODE *
1596find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1597{
1598 Py_UNICODE *s = start;
1599 for (;;) {
1600 while (*s > ch)
1601 s++;
1602 if (*s == ch)
1603 return s;
1604 if (s == end)
1605 return NULL;
1606 s++;
1607 }
1608}
1609
1610Py_ssize_t
1611_PyIO_find_line_ending(
1612 int translated, int universal, PyObject *readnl,
1613 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1614{
1615 Py_ssize_t len = end - start;
1616
1617 if (translated) {
1618 /* Newlines are already translated, only search for \n */
1619 Py_UNICODE *pos = find_control_char(start, end, '\n');
1620 if (pos != NULL)
1621 return pos - start + 1;
1622 else {
1623 *consumed = len;
1624 return -1;
1625 }
1626 }
1627 else if (universal) {
1628 /* Universal newline search. Find any of \r, \r\n, \n
1629 * The decoder ensures that \r\n are not split in two pieces
1630 */
1631 Py_UNICODE *s = start;
1632 for (;;) {
1633 Py_UNICODE ch;
1634 /* Fast path for non-control chars. The loop always ends
1635 since the Py_UNICODE storage is NUL-terminated. */
1636 while (*s > '\r')
1637 s++;
1638 if (s >= end) {
1639 *consumed = len;
1640 return -1;
1641 }
1642 ch = *s++;
1643 if (ch == '\n')
1644 return s - start;
1645 if (ch == '\r') {
1646 if (*s == '\n')
1647 return s - start + 1;
1648 else
1649 return s - start;
1650 }
1651 }
1652 }
1653 else {
1654 /* Non-universal mode. */
1655 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1656 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1657 if (readnl_len == 1) {
1658 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1659 if (pos != NULL)
1660 return pos - start + 1;
1661 *consumed = len;
1662 return -1;
1663 }
1664 else {
1665 Py_UNICODE *s = start;
1666 Py_UNICODE *e = end - readnl_len + 1;
1667 Py_UNICODE *pos;
1668 if (e < s)
1669 e = s;
1670 while (s < e) {
1671 Py_ssize_t i;
1672 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1673 if (pos == NULL || pos >= e)
1674 break;
1675 for (i = 1; i < readnl_len; i++) {
1676 if (pos[i] != nl[i])
1677 break;
1678 }
1679 if (i == readnl_len)
1680 return pos - start + readnl_len;
1681 s = pos + 1;
1682 }
1683 pos = find_control_char(e, end, nl[0]);
1684 if (pos == NULL)
1685 *consumed = len;
1686 else
1687 *consumed = pos - start;
1688 return -1;
1689 }
1690 }
1691}
1692
1693static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001694_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001695{
1696 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1697 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1698 int res;
1699
1700 CHECK_CLOSED(self);
1701
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001702 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 return NULL;
1704
1705 chunked = 0;
1706
1707 while (1) {
1708 Py_UNICODE *ptr;
1709 Py_ssize_t line_len;
1710 Py_ssize_t consumed = 0;
1711
1712 /* First, get some data if necessary */
1713 res = 1;
1714 while (!self->decoded_chars ||
1715 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001716 res = textiowrapper_read_chunk(self);
Gregory P. Smith51359922012-06-23 23:55:39 -07001717 if (res < 0) {
1718 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1719 when EINTR occurs so we needn't do it ourselves. */
1720 if (_PyIO_trap_eintr()) {
1721 continue;
1722 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001724 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if (res == 0)
1726 break;
1727 }
1728 if (res == 0) {
1729 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001730 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 Py_CLEAR(self->snapshot);
1732 start = endpos = offset_to_buffer = 0;
1733 break;
1734 }
1735
1736 if (remaining == NULL) {
1737 line = self->decoded_chars;
1738 start = self->decoded_chars_used;
1739 offset_to_buffer = 0;
1740 Py_INCREF(line);
1741 }
1742 else {
1743 assert(self->decoded_chars_used == 0);
1744 line = PyUnicode_Concat(remaining, self->decoded_chars);
1745 start = 0;
1746 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1747 Py_CLEAR(remaining);
1748 if (line == NULL)
1749 goto error;
1750 }
1751
1752 ptr = PyUnicode_AS_UNICODE(line);
1753 line_len = PyUnicode_GET_SIZE(line);
1754
1755 endpos = _PyIO_find_line_ending(
1756 self->readtranslate, self->readuniversal, self->readnl,
1757 ptr + start, ptr + line_len, &consumed);
1758 if (endpos >= 0) {
1759 endpos += start;
1760 if (limit >= 0 && (endpos - start) + chunked >= limit)
1761 endpos = start + limit - chunked;
1762 break;
1763 }
1764
1765 /* We can put aside up to `endpos` */
1766 endpos = consumed + start;
1767 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1768 /* Didn't find line ending, but reached length limit */
1769 endpos = start + limit - chunked;
1770 break;
1771 }
1772
1773 if (endpos > start) {
1774 /* No line ending seen yet - put aside current data */
1775 PyObject *s;
1776 if (chunks == NULL) {
1777 chunks = PyList_New(0);
1778 if (chunks == NULL)
1779 goto error;
1780 }
1781 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1782 if (s == NULL)
1783 goto error;
1784 if (PyList_Append(chunks, s) < 0) {
1785 Py_DECREF(s);
1786 goto error;
1787 }
1788 chunked += PyUnicode_GET_SIZE(s);
1789 Py_DECREF(s);
1790 }
1791 /* There may be some remaining bytes we'll have to prepend to the
1792 next chunk of data */
1793 if (endpos < line_len) {
1794 remaining = PyUnicode_FromUnicode(
1795 ptr + endpos, line_len - endpos);
1796 if (remaining == NULL)
1797 goto error;
1798 }
1799 Py_CLEAR(line);
1800 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001801 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 }
1803
1804 if (line != NULL) {
1805 /* Our line ends in the current buffer */
1806 self->decoded_chars_used = endpos - offset_to_buffer;
1807 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1808 if (start == 0 && Py_REFCNT(line) == 1) {
1809 if (PyUnicode_Resize(&line, endpos) < 0)
1810 goto error;
1811 }
1812 else {
1813 PyObject *s = PyUnicode_FromUnicode(
1814 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1815 Py_CLEAR(line);
1816 if (s == NULL)
1817 goto error;
1818 line = s;
1819 }
1820 }
1821 }
1822 if (remaining != NULL) {
1823 if (chunks == NULL) {
1824 chunks = PyList_New(0);
1825 if (chunks == NULL)
1826 goto error;
1827 }
1828 if (PyList_Append(chunks, remaining) < 0)
1829 goto error;
1830 Py_CLEAR(remaining);
1831 }
1832 if (chunks != NULL) {
1833 if (line != NULL && PyList_Append(chunks, line) < 0)
1834 goto error;
1835 Py_CLEAR(line);
1836 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1837 if (line == NULL)
1838 goto error;
1839 Py_DECREF(chunks);
1840 }
1841 if (line == NULL)
1842 line = PyUnicode_FromStringAndSize(NULL, 0);
1843
1844 return line;
1845
1846 error:
1847 Py_XDECREF(chunks);
1848 Py_XDECREF(remaining);
1849 Py_XDECREF(line);
1850 return NULL;
1851}
1852
1853static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001854textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001855{
1856 Py_ssize_t limit = -1;
1857
1858 CHECK_INITIALIZED(self);
1859 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1860 return NULL;
1861 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001862 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863}
1864
1865/* Seek and Tell */
1866
1867typedef struct {
1868 Py_off_t start_pos;
1869 int dec_flags;
1870 int bytes_to_feed;
1871 int chars_to_skip;
1872 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001873} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874
1875/*
1876 To speed up cookie packing/unpacking, we store the fields in a temporary
1877 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1878 The following macros define at which offsets in the intermediary byte
1879 string the various CookieStruct fields will be stored.
1880 */
1881
1882#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1883
1884#if defined(WORDS_BIGENDIAN)
1885
1886# define IS_LITTLE_ENDIAN 0
1887
1888/* We want the least significant byte of start_pos to also be the least
1889 significant byte of the cookie, which means that in big-endian mode we
1890 must copy the fields in reverse order. */
1891
1892# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1893# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1894# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1895# define OFF_CHARS_TO_SKIP (sizeof(char))
1896# define OFF_NEED_EOF 0
1897
1898#else
1899
1900# define IS_LITTLE_ENDIAN 1
1901
1902/* Little-endian mode: the least significant byte of start_pos will
1903 naturally end up the least significant byte of the cookie. */
1904
1905# define OFF_START_POS 0
1906# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1907# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1908# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1909# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1910
1911#endif
1912
1913static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001914textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915{
1916 unsigned char buffer[COOKIE_BUF_LEN];
1917 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1918 if (cookieLong == NULL)
1919 return -1;
1920
1921 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1922 IS_LITTLE_ENDIAN, 0) < 0) {
1923 Py_DECREF(cookieLong);
1924 return -1;
1925 }
1926 Py_DECREF(cookieLong);
1927
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001928 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1929 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1930 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1931 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1932 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001933
1934 return 0;
1935}
1936
1937static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001938textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939{
1940 unsigned char buffer[COOKIE_BUF_LEN];
1941
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001942 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1943 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1944 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1945 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1946 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947
1948 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1949}
1950#undef IS_LITTLE_ENDIAN
1951
1952static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001953_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954{
1955 PyObject *res;
1956 /* When seeking to the start of the stream, we call decoder.reset()
1957 rather than decoder.getstate().
1958 This is for a few decoders such as utf-16 for which the state value
1959 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1960 utf-16, that we are expecting a BOM).
1961 */
1962 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1963 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1964 else
1965 res = PyObject_CallMethod(self->decoder, "setstate",
1966 "((yi))", "", cookie->dec_flags);
1967 if (res == NULL)
1968 return -1;
1969 Py_DECREF(res);
1970 return 0;
1971}
1972
Antoine Pitroue4501852009-05-14 18:55:55 +00001973static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001974_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001975{
1976 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001977 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001978 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1979 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1980 self->encoding_start_of_stream = 1;
1981 }
1982 else {
1983 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1984 _PyIO_zero, NULL);
1985 self->encoding_start_of_stream = 0;
1986 }
1987 if (res == NULL)
1988 return -1;
1989 Py_DECREF(res);
1990 return 0;
1991}
1992
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001994textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995{
1996 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001997 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 PyObject *res;
2000 int cmp;
2001
2002 CHECK_INITIALIZED(self);
2003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2005 return NULL;
2006 CHECK_CLOSED(self);
2007
2008 Py_INCREF(cookieObj);
2009
2010 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002011 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002012 goto fail;
2013 }
2014
2015 if (whence == 1) {
2016 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002017 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002018 if (cmp < 0)
2019 goto fail;
2020
2021 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002022 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 goto fail;
2024 }
2025
2026 /* Seeking to the current position should attempt to
2027 * sync the underlying buffer with the current position.
2028 */
2029 Py_DECREF(cookieObj);
2030 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2031 if (cookieObj == NULL)
2032 goto fail;
2033 }
2034 else if (whence == 2) {
2035 /* seek relative to end of file */
2036
Antoine Pitroue4501852009-05-14 18:55:55 +00002037 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 if (cmp < 0)
2039 goto fail;
2040
2041 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002042 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 goto fail;
2044 }
2045
2046 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2047 if (res == NULL)
2048 goto fail;
2049 Py_DECREF(res);
2050
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002051 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 Py_CLEAR(self->snapshot);
2053 if (self->decoder) {
2054 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2055 if (res == NULL)
2056 goto fail;
2057 Py_DECREF(res);
2058 }
2059
2060 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2061 Py_XDECREF(cookieObj);
2062 return res;
2063 }
2064 else if (whence != 0) {
2065 PyErr_Format(PyExc_ValueError,
2066 "invalid whence (%d, should be 0, 1 or 2)", whence);
2067 goto fail;
2068 }
2069
Antoine Pitroue4501852009-05-14 18:55:55 +00002070 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 if (cmp < 0)
2072 goto fail;
2073
2074 if (cmp == 1) {
2075 PyErr_Format(PyExc_ValueError,
2076 "negative seek position %R", cookieObj);
2077 goto fail;
2078 }
2079
2080 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2081 if (res == NULL)
2082 goto fail;
2083 Py_DECREF(res);
2084
2085 /* The strategy of seek() is to go back to the safe start point
2086 * and replay the effect of read(chars_to_skip) from there.
2087 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002088 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 goto fail;
2090
2091 /* Seek back to the safe start point. */
2092 posobj = PyLong_FromOff_t(cookie.start_pos);
2093 if (posobj == NULL)
2094 goto fail;
2095 res = PyObject_CallMethodObjArgs(self->buffer,
2096 _PyIO_str_seek, posobj, NULL);
2097 Py_DECREF(posobj);
2098 if (res == NULL)
2099 goto fail;
2100 Py_DECREF(res);
2101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002102 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 Py_CLEAR(self->snapshot);
2104
2105 /* Restore the decoder to its state from the safe start point. */
2106 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002107 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 goto fail;
2109 }
2110
2111 if (cookie.chars_to_skip) {
2112 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2113 PyObject *input_chunk = PyObject_CallMethod(
2114 self->buffer, "read", "i", cookie.bytes_to_feed);
2115 PyObject *decoded;
2116
2117 if (input_chunk == NULL)
2118 goto fail;
2119
2120 assert (PyBytes_Check(input_chunk));
2121
2122 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2123 if (self->snapshot == NULL) {
2124 Py_DECREF(input_chunk);
2125 goto fail;
2126 }
2127
2128 decoded = PyObject_CallMethod(self->decoder, "decode",
2129 "Oi", input_chunk, (int)cookie.need_eof);
2130
2131 if (decoded == NULL)
2132 goto fail;
2133
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002134 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135
2136 /* Skip chars_to_skip of the decoded characters. */
2137 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2138 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2139 goto fail;
2140 }
2141 self->decoded_chars_used = cookie.chars_to_skip;
2142 }
2143 else {
2144 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2145 if (self->snapshot == NULL)
2146 goto fail;
2147 }
2148
Antoine Pitroue4501852009-05-14 18:55:55 +00002149 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2150 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002151 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002152 goto fail;
2153 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002154 return cookieObj;
2155 fail:
2156 Py_XDECREF(cookieObj);
2157 return NULL;
2158
2159}
2160
2161static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002162textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163{
2164 PyObject *res;
2165 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002166 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 PyObject *next_input;
2168 Py_ssize_t chars_to_skip, chars_decoded;
2169 PyObject *saved_state = NULL;
2170 char *input, *input_end;
2171
2172 CHECK_INITIALIZED(self);
2173 CHECK_CLOSED(self);
2174
2175 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002176 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002177 goto fail;
2178 }
2179 if (!self->telling) {
2180 PyErr_SetString(PyExc_IOError,
2181 "telling position disabled by next() call");
2182 goto fail;
2183 }
2184
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002185 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002186 return NULL;
2187 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2188 if (res == NULL)
2189 goto fail;
2190 Py_DECREF(res);
2191
2192 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2193 if (posobj == NULL)
2194 goto fail;
2195
2196 if (self->decoder == NULL || self->snapshot == NULL) {
2197 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2198 return posobj;
2199 }
2200
2201#if defined(HAVE_LARGEFILE_SUPPORT)
2202 cookie.start_pos = PyLong_AsLongLong(posobj);
2203#else
2204 cookie.start_pos = PyLong_AsLong(posobj);
2205#endif
2206 if (PyErr_Occurred())
2207 goto fail;
2208
2209 /* Skip backward to the snapshot point (see _read_chunk). */
2210 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2211 goto fail;
2212
2213 assert (PyBytes_Check(next_input));
2214
2215 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2216
2217 /* How many decoded characters have been used up since the snapshot? */
2218 if (self->decoded_chars_used == 0) {
2219 /* We haven't moved from the snapshot point. */
2220 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002221 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 }
2223
2224 chars_to_skip = self->decoded_chars_used;
2225
2226 /* Starting from the snapshot position, we will walk the decoder
2227 * forward until it gives us enough decoded characters.
2228 */
2229 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2230 _PyIO_str_getstate, NULL);
2231 if (saved_state == NULL)
2232 goto fail;
2233
2234 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002235 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 goto fail;
2237
2238 /* Feed the decoder one byte at a time. As we go, note the
2239 * nearest "safe start point" before the current location
2240 * (a point where the decoder has nothing buffered, so seek()
2241 * can safely start from there and advance to this location).
2242 */
2243 chars_decoded = 0;
2244 input = PyBytes_AS_STRING(next_input);
2245 input_end = input + PyBytes_GET_SIZE(next_input);
2246 while (input < input_end) {
2247 PyObject *state;
2248 char *dec_buffer;
2249 Py_ssize_t dec_buffer_len;
2250 int dec_flags;
2251
2252 PyObject *decoded = PyObject_CallMethod(
2253 self->decoder, "decode", "y#", input, 1);
2254 if (decoded == NULL)
2255 goto fail;
2256 assert (PyUnicode_Check(decoded));
2257 chars_decoded += PyUnicode_GET_SIZE(decoded);
2258 Py_DECREF(decoded);
2259
2260 cookie.bytes_to_feed += 1;
2261
2262 state = PyObject_CallMethodObjArgs(self->decoder,
2263 _PyIO_str_getstate, NULL);
2264 if (state == NULL)
2265 goto fail;
2266 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2267 Py_DECREF(state);
2268 goto fail;
2269 }
2270 Py_DECREF(state);
2271
2272 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2273 /* Decoder buffer is empty, so this is a safe start point. */
2274 cookie.start_pos += cookie.bytes_to_feed;
2275 chars_to_skip -= chars_decoded;
2276 cookie.dec_flags = dec_flags;
2277 cookie.bytes_to_feed = 0;
2278 chars_decoded = 0;
2279 }
2280 if (chars_decoded >= chars_to_skip)
2281 break;
2282 input++;
2283 }
2284 if (input == input_end) {
2285 /* We didn't get enough decoded data; signal EOF to get more. */
2286 PyObject *decoded = PyObject_CallMethod(
2287 self->decoder, "decode", "yi", "", /* final = */ 1);
2288 if (decoded == NULL)
2289 goto fail;
2290 assert (PyUnicode_Check(decoded));
2291 chars_decoded += PyUnicode_GET_SIZE(decoded);
2292 Py_DECREF(decoded);
2293 cookie.need_eof = 1;
2294
2295 if (chars_decoded < chars_to_skip) {
2296 PyErr_SetString(PyExc_IOError,
2297 "can't reconstruct logical file position");
2298 goto fail;
2299 }
2300 }
2301
2302 /* finally */
2303 Py_XDECREF(posobj);
2304 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2305 Py_DECREF(saved_state);
2306 if (res == NULL)
2307 return NULL;
2308 Py_DECREF(res);
2309
2310 /* The returned cookie corresponds to the last safe start point. */
2311 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002312 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002313
2314 fail:
2315 Py_XDECREF(posobj);
2316 if (saved_state) {
2317 PyObject *type, *value, *traceback;
2318 PyErr_Fetch(&type, &value, &traceback);
2319
2320 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2321 Py_DECREF(saved_state);
2322 if (res == NULL)
2323 return NULL;
2324 Py_DECREF(res);
2325
2326 PyErr_Restore(type, value, traceback);
2327 }
2328 return NULL;
2329}
2330
2331static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002332textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333{
2334 PyObject *pos = Py_None;
2335 PyObject *res;
2336
2337 CHECK_INITIALIZED(self)
2338 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2339 return NULL;
2340 }
2341
2342 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2343 if (res == NULL)
2344 return NULL;
2345 Py_DECREF(res);
2346
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002347 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348}
2349
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002350static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002351textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002352{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002353 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002354
2355 CHECK_INITIALIZED(self);
2356
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002357 res = PyUnicode_FromString("<_io.TextIOWrapper");
2358 if (res == NULL)
2359 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002360 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2361 if (nameobj == NULL) {
2362 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2363 PyErr_Clear();
2364 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002365 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002366 }
2367 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002368 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002369 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002370 if (s == NULL)
2371 goto error;
2372 PyUnicode_AppendAndDel(&res, s);
2373 if (res == NULL)
2374 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002375 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002376 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2377 if (modeobj == NULL) {
2378 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2379 PyErr_Clear();
2380 else
2381 goto error;
2382 }
2383 else {
2384 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2385 Py_DECREF(modeobj);
2386 if (s == NULL)
2387 goto error;
2388 PyUnicode_AppendAndDel(&res, s);
2389 if (res == NULL)
2390 return NULL;
2391 }
2392 s = PyUnicode_FromFormat("%U encoding=%R>",
2393 res, self->encoding);
2394 Py_DECREF(res);
2395 return s;
2396error:
2397 Py_XDECREF(res);
2398 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002399}
2400
2401
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402/* Inquiries */
2403
2404static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002405textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406{
2407 CHECK_INITIALIZED(self);
2408 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2409}
2410
2411static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002412textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413{
2414 CHECK_INITIALIZED(self);
2415 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2416}
2417
2418static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002419textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002420{
2421 CHECK_INITIALIZED(self);
2422 return PyObject_CallMethod(self->buffer, "readable", NULL);
2423}
2424
2425static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002426textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002427{
2428 CHECK_INITIALIZED(self);
2429 return PyObject_CallMethod(self->buffer, "writable", NULL);
2430}
2431
2432static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002433textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434{
2435 CHECK_INITIALIZED(self);
2436 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2437}
2438
2439static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002440textiowrapper_getstate(textio *self, PyObject *args)
2441{
2442 PyErr_Format(PyExc_TypeError,
2443 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2444 return NULL;
2445}
2446
2447static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002448textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449{
2450 CHECK_INITIALIZED(self);
2451 CHECK_CLOSED(self);
2452 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002453 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454 return NULL;
2455 return PyObject_CallMethod(self->buffer, "flush", NULL);
2456}
2457
2458static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002459textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460{
2461 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002462 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464
Antoine Pitrou6be88762010-05-03 16:48:20 +00002465 res = textiowrapper_closed_get(self, NULL);
2466 if (res == NULL)
2467 return NULL;
2468 r = PyObject_IsTrue(res);
2469 Py_DECREF(res);
2470 if (r < 0)
2471 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002472
Antoine Pitrou6be88762010-05-03 16:48:20 +00002473 if (r > 0) {
2474 Py_RETURN_NONE; /* stream already closed */
2475 }
2476 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002477 if (self->deallocating) {
2478 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2479 if (res)
2480 Py_DECREF(res);
2481 else
2482 PyErr_Clear();
2483 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002484 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2485 if (res == NULL) {
2486 return NULL;
2487 }
2488 else
2489 Py_DECREF(res);
2490
2491 return PyObject_CallMethod(self->buffer, "close", NULL);
2492 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493}
2494
2495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497{
2498 PyObject *line;
2499
2500 CHECK_INITIALIZED(self);
2501
2502 self->telling = 0;
2503 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2504 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002505 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506 }
2507 else {
2508 line = PyObject_CallMethodObjArgs((PyObject *)self,
2509 _PyIO_str_readline, NULL);
2510 if (line && !PyUnicode_Check(line)) {
2511 PyErr_Format(PyExc_IOError,
2512 "readline() should have returned an str object, "
2513 "not '%.200s'", Py_TYPE(line)->tp_name);
2514 Py_DECREF(line);
2515 return NULL;
2516 }
2517 }
2518
2519 if (line == NULL)
2520 return NULL;
2521
2522 if (PyUnicode_GET_SIZE(line) == 0) {
2523 /* Reached EOF or would have blocked */
2524 Py_DECREF(line);
2525 Py_CLEAR(self->snapshot);
2526 self->telling = self->seekable;
2527 return NULL;
2528 }
2529
2530 return line;
2531}
2532
2533static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002534textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535{
2536 CHECK_INITIALIZED(self);
2537 return PyObject_GetAttrString(self->buffer, "name");
2538}
2539
2540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002541textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542{
2543 CHECK_INITIALIZED(self);
2544 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2545}
2546
2547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002548textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549{
2550 PyObject *res;
2551 CHECK_INITIALIZED(self);
2552 if (self->decoder == NULL)
2553 Py_RETURN_NONE;
2554 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2555 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002556 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2557 PyErr_Clear();
2558 Py_RETURN_NONE;
2559 }
2560 else {
2561 return NULL;
2562 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 }
2564 return res;
2565}
2566
2567static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002568textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002569{
2570 CHECK_INITIALIZED(self);
2571 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2572}
2573
2574static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002575textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576{
2577 CHECK_INITIALIZED(self);
2578 return PyLong_FromSsize_t(self->chunk_size);
2579}
2580
2581static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002582textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583{
2584 Py_ssize_t n;
2585 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002586 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587 if (n == -1 && PyErr_Occurred())
2588 return -1;
2589 if (n <= 0) {
2590 PyErr_SetString(PyExc_ValueError,
2591 "a strictly positive integer is required");
2592 return -1;
2593 }
2594 self->chunk_size = n;
2595 return 0;
2596}
2597
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002598static PyMethodDef textiowrapper_methods[] = {
2599 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2600 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2601 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2602 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2603 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2604 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002605
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002606 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2607 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2608 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2609 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2610 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002611 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002613 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2614 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2615 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002616 {NULL, NULL}
2617};
2618
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619static PyMemberDef textiowrapper_members[] = {
2620 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2621 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2622 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002623 {NULL}
2624};
2625
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002626static PyGetSetDef textiowrapper_getset[] = {
2627 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2628 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002629/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2630*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002631 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2632 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2633 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2634 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002635 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636};
2637
2638PyTypeObject PyTextIOWrapper_Type = {
2639 PyVarObject_HEAD_INIT(NULL, 0)
2640 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002641 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002642 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002643 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 0, /*tp_print*/
2645 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002646 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002647 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002648 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 0, /*tp_as_number*/
2650 0, /*tp_as_sequence*/
2651 0, /*tp_as_mapping*/
2652 0, /*tp_hash */
2653 0, /*tp_call*/
2654 0, /*tp_str*/
2655 0, /*tp_getattro*/
2656 0, /*tp_setattro*/
2657 0, /*tp_as_buffer*/
2658 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2659 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002660 textiowrapper_doc, /* tp_doc */
2661 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2662 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002664 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002666 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2667 textiowrapper_methods, /* tp_methods */
2668 textiowrapper_members, /* tp_members */
2669 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002670 0, /* tp_base */
2671 0, /* tp_dict */
2672 0, /* tp_descr_get */
2673 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674 offsetof(textio, dict), /*tp_dictoffset*/
2675 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676 0, /* tp_alloc */
2677 PyType_GenericNew, /* tp_new */
2678};