blob: a8231bebb65de921881c314a66f32973efcb0175 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200239static int
240check_decoded(PyObject *decoded)
241{
242 if (decoded == NULL)
243 return -1;
244 if (!PyUnicode_Check(decoded)) {
245 PyErr_Format(PyExc_TypeError,
246 "decoder should return a string result, not '%.200s'",
247 Py_TYPE(decoded)->tp_name);
248 Py_DECREF(decoded);
249 return -1;
250 }
251 return 0;
252}
253
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254#define SEEN_CR 1
255#define SEEN_LF 2
256#define SEEN_CRLF 4
257#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
258
259PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000260_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000261 PyObject *input, int final)
262{
263 PyObject *output;
264 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000265 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000266
267 if (self->decoder == NULL) {
268 PyErr_SetString(PyExc_ValueError,
269 "IncrementalNewlineDecoder.__init__ not called");
270 return NULL;
271 }
272
273 /* decode input (with the eventual \r from a previous pass) */
274 if (self->decoder != Py_None) {
275 output = PyObject_CallMethodObjArgs(self->decoder,
276 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
277 }
278 else {
279 output = input;
280 Py_INCREF(output);
281 }
282
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200283 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000284 return NULL;
285
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000286 output_len = PyUnicode_GET_SIZE(output);
287 if (self->pendingcr && (final || output_len > 0)) {
288 Py_UNICODE *out;
289 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
290 if (modified == NULL)
291 goto error;
292 out = PyUnicode_AS_UNICODE(modified);
293 out[0] = '\r';
294 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
295 output_len * sizeof(Py_UNICODE));
296 Py_DECREF(output);
297 output = modified;
298 self->pendingcr = 0;
299 output_len++;
300 }
301
302 /* retain last \r even when not translating data:
303 * then readline() is sure to get \r\n in one pass
304 */
305 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000306 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000307 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
308
309 if (Py_REFCNT(output) == 1) {
310 if (PyUnicode_Resize(&output, output_len - 1) < 0)
311 goto error;
312 }
313 else {
314 PyObject *modified = PyUnicode_FromUnicode(
315 PyUnicode_AS_UNICODE(output),
316 output_len - 1);
317 if (modified == NULL)
318 goto error;
319 Py_DECREF(output);
320 output = modified;
321 }
322 self->pendingcr = 1;
323 }
324 }
325
326 /* Record which newlines are read and do newline translation if desired,
327 all in one pass. */
328 {
329 Py_UNICODE *in_str;
330 Py_ssize_t len;
331 int seennl = self->seennl;
332 int only_lf = 0;
333
334 in_str = PyUnicode_AS_UNICODE(output);
335 len = PyUnicode_GET_SIZE(output);
336
337 if (len == 0)
338 return output;
339
340 /* If, up to now, newlines are consistently \n, do a quick check
341 for the \r *byte* with the libc's optimized memchr.
342 */
343 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000344 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000345 }
346
Antoine Pitrou66913e22009-03-06 23:40:56 +0000347 if (only_lf) {
348 /* If not already seen, quick scan for a possible "\n" character.
349 (there's nothing else to be done, even when in translation mode)
350 */
351 if (seennl == 0 &&
352 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
353 Py_UNICODE *s, *end;
354 s = in_str;
355 end = in_str + len;
356 for (;;) {
357 Py_UNICODE c;
358 /* Fast loop for non-control characters */
359 while (*s > '\n')
360 s++;
361 c = *s++;
362 if (c == '\n') {
363 seennl |= SEEN_LF;
364 break;
365 }
366 if (s > end)
367 break;
368 }
369 }
370 /* Finished: we have scanned for newlines, and none of them
371 need translating */
372 }
373 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000374 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000375 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000376 if (seennl == SEEN_ALL)
377 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000378 s = in_str;
379 end = in_str + len;
380 for (;;) {
381 Py_UNICODE c;
382 /* Fast loop for non-control characters */
383 while (*s > '\r')
384 s++;
385 c = *s++;
386 if (c == '\n')
387 seennl |= SEEN_LF;
388 else if (c == '\r') {
389 if (*s == '\n') {
390 seennl |= SEEN_CRLF;
391 s++;
392 }
393 else
394 seennl |= SEEN_CR;
395 }
396 if (s > end)
397 break;
398 if (seennl == SEEN_ALL)
399 break;
400 }
401 endscan:
402 ;
403 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000404 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405 PyObject *translated = NULL;
406 Py_UNICODE *out_str;
407 Py_UNICODE *in, *out, *end;
408 if (Py_REFCNT(output) != 1) {
409 /* We could try to optimize this so that we only do a copy
410 when there is something to translate. On the other hand,
411 most decoders should only output non-shared strings, i.e.
412 translation is done in place. */
413 translated = PyUnicode_FromUnicode(NULL, len);
414 if (translated == NULL)
415 goto error;
416 assert(Py_REFCNT(translated) == 1);
417 memcpy(PyUnicode_AS_UNICODE(translated),
418 PyUnicode_AS_UNICODE(output),
419 len * sizeof(Py_UNICODE));
420 }
421 else {
422 translated = output;
423 }
424 out_str = PyUnicode_AS_UNICODE(translated);
425 in = in_str;
426 out = out_str;
427 end = in_str + len;
428 for (;;) {
429 Py_UNICODE c;
430 /* Fast loop for non-control characters */
431 while ((c = *in++) > '\r')
432 *out++ = c;
433 if (c == '\n') {
434 *out++ = c;
435 seennl |= SEEN_LF;
436 continue;
437 }
438 if (c == '\r') {
439 if (*in == '\n') {
440 in++;
441 seennl |= SEEN_CRLF;
442 }
443 else
444 seennl |= SEEN_CR;
445 *out++ = '\n';
446 continue;
447 }
448 if (in > end)
449 break;
450 *out++ = c;
451 }
452 if (translated != output) {
453 Py_DECREF(output);
454 output = translated;
455 }
456 if (out - out_str != len) {
457 if (PyUnicode_Resize(&output, out - out_str) < 0)
458 goto error;
459 }
460 }
461 self->seennl |= seennl;
462 }
463
464 return output;
465
466 error:
467 Py_DECREF(output);
468 return NULL;
469}
470
471static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000472incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 PyObject *args, PyObject *kwds)
474{
475 char *kwlist[] = {"input", "final", NULL};
476 PyObject *input;
477 int final = 0;
478
479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 kwlist, &input, &final))
481 return NULL;
482 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487{
488 PyObject *buffer;
489 unsigned PY_LONG_LONG flag;
490
491 if (self->decoder != Py_None) {
492 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 _PyIO_str_getstate, NULL);
494 if (state == NULL)
495 return NULL;
496 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 Py_DECREF(state);
498 return NULL;
499 }
500 Py_INCREF(buffer);
501 Py_DECREF(state);
502 }
503 else {
504 buffer = PyBytes_FromString("");
505 flag = 0;
506 }
507 flag <<= 1;
508 if (self->pendingcr)
509 flag |= 1;
510 return Py_BuildValue("NK", buffer, flag);
511}
512
513static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000514incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515{
516 PyObject *buffer;
517 unsigned PY_LONG_LONG flag;
518
519 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 return NULL;
521
522 self->pendingcr = (int) flag & 1;
523 flag >>= 1;
524
525 if (self->decoder != Py_None)
526 return PyObject_CallMethod(self->decoder,
527 "setstate", "((OK))", buffer, flag);
528 else
529 Py_RETURN_NONE;
530}
531
532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000533incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000534{
535 self->seennl = 0;
536 self->pendingcr = 0;
537 if (self->decoder != Py_None)
538 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 else
540 Py_RETURN_NONE;
541}
542
543static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000544incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545{
546 switch (self->seennl) {
547 case SEEN_CR:
548 return PyUnicode_FromString("\r");
549 case SEEN_LF:
550 return PyUnicode_FromString("\n");
551 case SEEN_CRLF:
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR | SEEN_LF:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR | SEEN_CRLF:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF | SEEN_CRLF:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 default:
562 Py_RETURN_NONE;
563 }
564
565}
566
567
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000568static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000573 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000574};
575
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000576static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000578 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579};
580
581PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 PyVarObject_HEAD_INIT(NULL, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000584 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000586 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 0, /*tp_print*/
588 0, /*tp_getattr*/
589 0, /*tp_setattr*/
590 0, /*tp_compare */
591 0, /*tp_repr*/
592 0, /*tp_as_number*/
593 0, /*tp_as_sequence*/
594 0, /*tp_as_mapping*/
595 0, /*tp_hash */
596 0, /*tp_call*/
597 0, /*tp_str*/
598 0, /*tp_getattro*/
599 0, /*tp_setattro*/
600 0, /*tp_as_buffer*/
601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_traverse */
604 0, /* tp_clear */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
607 0, /* tp_iter */
608 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000609 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /* tp_base */
613 0, /* tp_dict */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000617 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000618 0, /* tp_alloc */
619 PyType_GenericNew, /* tp_new */
620};
621
622
623/* TextIOWrapper */
624
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 "\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 "\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
633 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200634 "newline controls how line endings are handled. It can be None, '',\n"
635 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
636 "\n"
637 "* On input, if newline is None, universal newlines mode is\n"
638 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
639 " these are translated into '\\n' before being returned to the\n"
640 " caller. If it is '', universal newline mode is enabled, but line\n"
641 " endings are returned to the caller untranslated. If it has any of\n"
642 " the other legal values, input lines are only terminated by the given\n"
643 " string, and the line ending is returned to the caller untranslated.\n"
644 "\n"
645 "* On output, if newline is None, any '\\n' characters written are\n"
646 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300647 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200648 " of the other legal values, any '\\n' characters written are translated\n"
649 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000650 "\n"
651 "If line_buffering is True, a call to flush is implied when a call to\n"
652 "write contains a newline character."
653 );
654
655typedef PyObject *
656 (*encodefunc_t)(PyObject *, PyObject *);
657
658typedef struct
659{
660 PyObject_HEAD
661 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000662 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000663 Py_ssize_t chunk_size;
664 PyObject *buffer;
665 PyObject *encoding;
666 PyObject *encoder;
667 PyObject *decoder;
668 PyObject *readnl;
669 PyObject *errors;
670 const char *writenl; /* utf-8 encoded, NULL stands for \n */
671 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200672 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673 char readuniversal;
674 char readtranslate;
675 char writetranslate;
676 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200677 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000679 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000680 /* Specialized encoding func (see below) */
681 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000682 /* Whether or not it's the start of the stream */
683 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684
685 /* Reads and writes are internally buffered in order to speed things up.
686 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000687
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688 Please also note that text to be written is first encoded before being
689 buffered. This is necessary so that encoding errors are immediately
690 reported to the caller, but it unfortunately means that the
691 IncrementalEncoder (whose encode() method is always written in Python)
692 becomes a bottleneck for small writes.
693 */
694 PyObject *decoded_chars; /* buffer for text returned from decoder */
695 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
696 PyObject *pending_bytes; /* list of bytes objects waiting to be
697 written, or NULL */
698 Py_ssize_t pending_bytes_count;
699 PyObject *snapshot;
700 /* snapshot is either None, or a tuple (dec_flags, next_input) where
701 * dec_flags is the second (integer) item of the decoder state and
702 * next_input is the chunk of input bytes that comes next after the
703 * snapshot point. We use this to reconstruct decoder states in tell().
704 */
705
706 /* Cache raw object if it's a FileIO object */
707 PyObject *raw;
708
709 PyObject *weakreflist;
710 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712
713
714/* A couple of specialized cases in order to bypass the slow incremental
715 encoding methods for the most popular encodings. */
716
717static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000718ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719{
720 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
721 PyUnicode_GET_SIZE(text),
722 PyBytes_AS_STRING(self->errors));
723}
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
728 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
729 PyUnicode_GET_SIZE(text),
730 PyBytes_AS_STRING(self->errors), 1);
731}
732
733static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000734utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735{
736 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
737 PyUnicode_GET_SIZE(text),
738 PyBytes_AS_STRING(self->errors), -1);
739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743{
Antoine Pitroue4501852009-05-14 18:55:55 +0000744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000749 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 }
752 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
753 PyUnicode_GET_SIZE(text),
754 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755}
756
Antoine Pitroue4501852009-05-14 18:55:55 +0000757static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000758utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000759{
760 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
761 PyUnicode_GET_SIZE(text),
762 PyBytes_AS_STRING(self->errors), 1);
763}
764
765static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000766utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000767{
768 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
769 PyUnicode_GET_SIZE(text),
770 PyBytes_AS_STRING(self->errors), -1);
771}
772
773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000774utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000775{
776 if (!self->encoding_start_of_stream) {
777 /* Skip the BOM and use native byte ordering */
778#if defined(WORDS_BIGENDIAN)
779 return utf32be_encode(self, text);
780#else
781 return utf32le_encode(self, text);
782#endif
783 }
784 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
785 PyUnicode_GET_SIZE(text),
786 PyBytes_AS_STRING(self->errors), 0);
787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
792 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
793 PyUnicode_GET_SIZE(text),
794 PyBytes_AS_STRING(self->errors));
795}
796
797static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000798latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799{
800 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
801 PyUnicode_GET_SIZE(text),
802 PyBytes_AS_STRING(self->errors));
803}
804
805/* Map normalized encoding names onto the specialized encoding funcs */
806
807typedef struct {
808 const char *name;
809 encodefunc_t encodefunc;
810} encodefuncentry;
811
Antoine Pitrou24f36292009-03-28 22:16:42 +0000812static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000813 {"ascii", (encodefunc_t) ascii_encode},
814 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000816 {"utf-16-be", (encodefunc_t) utf16be_encode},
817 {"utf-16-le", (encodefunc_t) utf16le_encode},
818 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000819 {"utf-32-be", (encodefunc_t) utf32be_encode},
820 {"utf-32-le", (encodefunc_t) utf32le_encode},
821 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 {NULL, NULL}
823};
824
825
826static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000827textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828{
829 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200830 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000831 NULL};
832 PyObject *buffer, *raw;
833 char *encoding = NULL;
834 char *errors = NULL;
835 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 _PyIO_State *state = IO_STATE;
838
839 PyObject *res;
840 int r;
841
842 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000843 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200844 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000845 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200846 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000847 return -1;
848
849 if (newline && newline[0] != '\0'
850 && !(newline[0] == '\n' && newline[1] == '\0')
851 && !(newline[0] == '\r' && newline[1] == '\0')
852 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
853 PyErr_Format(PyExc_ValueError,
854 "illegal newline value: %s", newline);
855 return -1;
856 }
857
858 Py_CLEAR(self->buffer);
859 Py_CLEAR(self->encoding);
860 Py_CLEAR(self->encoder);
861 Py_CLEAR(self->decoder);
862 Py_CLEAR(self->readnl);
863 Py_CLEAR(self->decoded_chars);
864 Py_CLEAR(self->pending_bytes);
865 Py_CLEAR(self->snapshot);
866 Py_CLEAR(self->errors);
867 Py_CLEAR(self->raw);
868 self->decoded_chars_used = 0;
869 self->pending_bytes_count = 0;
870 self->encodefunc = NULL;
871
872 if (encoding == NULL) {
873 /* Try os.device_encoding(fileno) */
874 PyObject *fileno;
875 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
876 /* Ignore only AttributeError and UnsupportedOperation */
877 if (fileno == NULL) {
878 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
879 PyErr_ExceptionMatches(state->unsupported_operation)) {
880 PyErr_Clear();
881 }
882 else {
883 goto error;
884 }
885 }
886 else {
887 self->encoding = PyObject_CallMethod(state->os_module,
888 "device_encoding",
889 "N", fileno);
890 if (self->encoding == NULL)
891 goto error;
892 else if (!PyUnicode_Check(self->encoding))
893 Py_CLEAR(self->encoding);
894 }
895 }
896 if (encoding == NULL && self->encoding == NULL) {
897 if (state->locale_module == NULL) {
898 state->locale_module = PyImport_ImportModule("locale");
899 if (state->locale_module == NULL)
900 goto catch_ImportError;
901 else
902 goto use_locale;
903 }
904 else {
905 use_locale:
906 self->encoding = PyObject_CallMethod(
907 state->locale_module, "getpreferredencoding", NULL);
908 if (self->encoding == NULL) {
909 catch_ImportError:
910 /*
911 Importing locale can raise a ImportError because of
912 _functools, and locale.getpreferredencoding can raise a
913 ImportError if _locale is not available. These will happen
914 during module building.
915 */
916 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
917 PyErr_Clear();
918 self->encoding = PyUnicode_FromString("ascii");
919 }
920 else
921 goto error;
922 }
923 else if (!PyUnicode_Check(self->encoding))
924 Py_CLEAR(self->encoding);
925 }
926 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000927 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000928 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000929 if (encoding == NULL)
930 goto error;
931 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000932 else if (encoding != NULL) {
933 self->encoding = PyUnicode_FromString(encoding);
934 if (self->encoding == NULL)
935 goto error;
936 }
937 else {
938 PyErr_SetString(PyExc_IOError,
939 "could not determine default encoding");
940 }
941
942 if (errors == NULL)
943 errors = "strict";
944 self->errors = PyBytes_FromString(errors);
945 if (self->errors == NULL)
946 goto error;
947
948 self->chunk_size = 8192;
949 self->readuniversal = (newline == NULL || newline[0] == '\0');
950 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200951 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000952 self->readtranslate = (newline == NULL);
953 if (newline) {
954 self->readnl = PyUnicode_FromString(newline);
955 if (self->readnl == NULL)
956 return -1;
957 }
958 self->writetranslate = (newline == NULL || newline[0] != '\0');
959 if (!self->readuniversal && self->readnl) {
960 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000961 if (self->writenl == NULL)
962 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 if (!strcmp(self->writenl, "\n"))
964 self->writenl = NULL;
965 }
966#ifdef MS_WINDOWS
967 else
968 self->writenl = "\r\n";
969#endif
970
971 /* Build the decoder object */
972 res = PyObject_CallMethod(buffer, "readable", NULL);
973 if (res == NULL)
974 goto error;
975 r = PyObject_IsTrue(res);
976 Py_DECREF(res);
977 if (r == -1)
978 goto error;
979 if (r == 1) {
980 self->decoder = PyCodec_IncrementalDecoder(
981 encoding, errors);
982 if (self->decoder == NULL)
983 goto error;
984
985 if (self->readuniversal) {
986 PyObject *incrementalDecoder = PyObject_CallFunction(
987 (PyObject *)&PyIncrementalNewlineDecoder_Type,
988 "Oi", self->decoder, (int)self->readtranslate);
989 if (incrementalDecoder == NULL)
990 goto error;
991 Py_CLEAR(self->decoder);
992 self->decoder = incrementalDecoder;
993 }
994 }
995
996 /* Build the encoder object */
997 res = PyObject_CallMethod(buffer, "writable", NULL);
998 if (res == NULL)
999 goto error;
1000 r = PyObject_IsTrue(res);
1001 Py_DECREF(res);
1002 if (r == -1)
1003 goto error;
1004 if (r == 1) {
1005 PyObject *ci;
1006 self->encoder = PyCodec_IncrementalEncoder(
1007 encoding, errors);
1008 if (self->encoder == NULL)
1009 goto error;
1010 /* Get the normalized named of the codec */
1011 ci = _PyCodec_Lookup(encoding);
1012 if (ci == NULL)
1013 goto error;
1014 res = PyObject_GetAttrString(ci, "name");
1015 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001016 if (res == NULL) {
1017 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1018 PyErr_Clear();
1019 else
1020 goto error;
1021 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001022 else if (PyUnicode_Check(res)) {
1023 encodefuncentry *e = encodefuncs;
1024 while (e->name != NULL) {
1025 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1026 self->encodefunc = e->encodefunc;
1027 break;
1028 }
1029 e++;
1030 }
1031 }
1032 Py_XDECREF(res);
1033 }
1034
1035 self->buffer = buffer;
1036 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001037
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001038 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1039 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1040 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1041 raw = PyObject_GetAttrString(buffer, "raw");
1042 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001043 if (raw == NULL) {
1044 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1045 PyErr_Clear();
1046 else
1047 goto error;
1048 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 else if (Py_TYPE(raw) == &PyFileIO_Type)
1050 self->raw = raw;
1051 else
1052 Py_DECREF(raw);
1053 }
1054
1055 res = PyObject_CallMethod(buffer, "seekable", NULL);
1056 if (res == NULL)
1057 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001058 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001060 if (r < 0)
1061 goto error;
1062 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063
Antoine Pitroue96ec682011-07-23 21:46:35 +02001064 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1065
Antoine Pitroue4501852009-05-14 18:55:55 +00001066 self->encoding_start_of_stream = 0;
1067 if (self->seekable && self->encoder) {
1068 PyObject *cookieObj;
1069 int cmp;
1070
1071 self->encoding_start_of_stream = 1;
1072
1073 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1074 if (cookieObj == NULL)
1075 goto error;
1076
1077 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1078 Py_DECREF(cookieObj);
1079 if (cmp < 0) {
1080 goto error;
1081 }
1082
1083 if (cmp == 0) {
1084 self->encoding_start_of_stream = 0;
1085 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1086 _PyIO_zero, NULL);
1087 if (res == NULL)
1088 goto error;
1089 Py_DECREF(res);
1090 }
1091 }
1092
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 self->ok = 1;
1094 return 0;
1095
1096 error:
1097 return -1;
1098}
1099
1100static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001101_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001102{
1103 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1104 return -1;
1105 self->ok = 0;
1106 Py_CLEAR(self->buffer);
1107 Py_CLEAR(self->encoding);
1108 Py_CLEAR(self->encoder);
1109 Py_CLEAR(self->decoder);
1110 Py_CLEAR(self->readnl);
1111 Py_CLEAR(self->decoded_chars);
1112 Py_CLEAR(self->pending_bytes);
1113 Py_CLEAR(self->snapshot);
1114 Py_CLEAR(self->errors);
1115 Py_CLEAR(self->raw);
1116 return 0;
1117}
1118
1119static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001120textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121{
Antoine Pitroue033e062010-10-29 10:38:18 +00001122 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001123 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124 return;
1125 _PyObject_GC_UNTRACK(self);
1126 if (self->weakreflist != NULL)
1127 PyObject_ClearWeakRefs((PyObject *)self);
1128 Py_CLEAR(self->dict);
1129 Py_TYPE(self)->tp_free((PyObject *)self);
1130}
1131
1132static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001133textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001134{
1135 Py_VISIT(self->buffer);
1136 Py_VISIT(self->encoding);
1137 Py_VISIT(self->encoder);
1138 Py_VISIT(self->decoder);
1139 Py_VISIT(self->readnl);
1140 Py_VISIT(self->decoded_chars);
1141 Py_VISIT(self->pending_bytes);
1142 Py_VISIT(self->snapshot);
1143 Py_VISIT(self->errors);
1144 Py_VISIT(self->raw);
1145
1146 Py_VISIT(self->dict);
1147 return 0;
1148}
1149
1150static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001151textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001153 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001154 return -1;
1155 Py_CLEAR(self->dict);
1156 return 0;
1157}
1158
1159static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001160textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161
1162/* This macro takes some shortcuts to make the common case faster. */
1163#define CHECK_CLOSED(self) \
1164 do { \
1165 int r; \
1166 PyObject *_res; \
1167 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1168 if (self->raw != NULL) \
1169 r = _PyFileIO_closed(self->raw); \
1170 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001171 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 if (_res == NULL) \
1173 return NULL; \
1174 r = PyObject_IsTrue(_res); \
1175 Py_DECREF(_res); \
1176 if (r < 0) \
1177 return NULL; \
1178 } \
1179 if (r > 0) { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "I/O operation on closed file."); \
1182 return NULL; \
1183 } \
1184 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001185 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 return NULL; \
1187 } while (0)
1188
1189#define CHECK_INITIALIZED(self) \
1190 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001191 if (self->detached) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "underlying buffer has been detached"); \
1194 } else { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "I/O operation on uninitialized object"); \
1197 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return NULL; \
1199 }
1200
1201#define CHECK_INITIALIZED_INT(self) \
1202 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001203 if (self->detached) { \
1204 PyErr_SetString(PyExc_ValueError, \
1205 "underlying buffer has been detached"); \
1206 } else { \
1207 PyErr_SetString(PyExc_ValueError, \
1208 "I/O operation on uninitialized object"); \
1209 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 return -1; \
1211 }
1212
1213
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001214static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001215textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001216{
1217 PyObject *buffer, *res;
1218 CHECK_INITIALIZED(self);
1219 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1220 if (res == NULL)
1221 return NULL;
1222 Py_DECREF(res);
1223 buffer = self->buffer;
1224 self->buffer = NULL;
1225 self->detached = 1;
1226 self->ok = 0;
1227 return buffer;
1228}
1229
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230Py_LOCAL_INLINE(const Py_UNICODE *)
1231findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1232{
1233 /* like wcschr, but doesn't stop at NULL characters */
1234 while (size-- > 0) {
1235 if (*s == ch)
1236 return s;
1237 s++;
1238 }
1239 return NULL;
1240}
1241
Antoine Pitrou24f36292009-03-28 22:16:42 +00001242/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 underlying buffered object, though. */
1244static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001245_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001247 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248
1249 if (self->pending_bytes == NULL)
1250 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001251
1252 pending = self->pending_bytes;
1253 Py_INCREF(pending);
1254 self->pending_bytes_count = 0;
1255 Py_CLEAR(self->pending_bytes);
1256
1257 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1258 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 if (b == NULL)
1260 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001261 ret = NULL;
1262 do {
1263 ret = PyObject_CallMethodObjArgs(self->buffer,
1264 _PyIO_str_write, b, NULL);
1265 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 Py_DECREF(b);
1267 if (ret == NULL)
1268 return -1;
1269 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 return 0;
1271}
1272
1273static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001274textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275{
1276 PyObject *ret;
1277 PyObject *text; /* owned reference */
1278 PyObject *b;
1279 Py_ssize_t textlen;
1280 int haslf = 0;
1281 int needflush = 0;
1282
1283 CHECK_INITIALIZED(self);
1284
1285 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1286 return NULL;
1287 }
1288
1289 CHECK_CLOSED(self);
1290
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001291 if (self->encoder == NULL)
1292 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001293
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 Py_INCREF(text);
1295
1296 textlen = PyUnicode_GetSize(text);
1297
1298 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1299 if (findchar(PyUnicode_AS_UNICODE(text),
1300 PyUnicode_GET_SIZE(text), '\n'))
1301 haslf = 1;
1302
1303 if (haslf && self->writetranslate && self->writenl != NULL) {
1304 PyObject *newtext = PyObject_CallMethod(
1305 text, "replace", "ss", "\n", self->writenl);
1306 Py_DECREF(text);
1307 if (newtext == NULL)
1308 return NULL;
1309 text = newtext;
1310 }
1311
Antoine Pitroue96ec682011-07-23 21:46:35 +02001312 if (self->write_through)
1313 needflush = 1;
1314 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001315 (haslf ||
1316 findchar(PyUnicode_AS_UNICODE(text),
1317 PyUnicode_GET_SIZE(text), '\r')))
1318 needflush = 1;
1319
1320 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001321 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001323 self->encoding_start_of_stream = 0;
1324 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 else
1326 b = PyObject_CallMethodObjArgs(self->encoder,
1327 _PyIO_str_encode, text, NULL);
1328 Py_DECREF(text);
1329 if (b == NULL)
1330 return NULL;
1331
1332 if (self->pending_bytes == NULL) {
1333 self->pending_bytes = PyList_New(0);
1334 if (self->pending_bytes == NULL) {
1335 Py_DECREF(b);
1336 return NULL;
1337 }
1338 self->pending_bytes_count = 0;
1339 }
1340 if (PyList_Append(self->pending_bytes, b) < 0) {
1341 Py_DECREF(b);
1342 return NULL;
1343 }
1344 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1345 Py_DECREF(b);
1346 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348 return NULL;
1349 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001350
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351 if (needflush) {
1352 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1353 if (ret == NULL)
1354 return NULL;
1355 Py_DECREF(ret);
1356 }
1357
1358 Py_CLEAR(self->snapshot);
1359
1360 if (self->decoder) {
1361 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1362 if (ret == NULL)
1363 return NULL;
1364 Py_DECREF(ret);
1365 }
1366
1367 return PyLong_FromSsize_t(textlen);
1368}
1369
1370/* Steal a reference to chars and store it in the decoded_char buffer;
1371 */
1372static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001373textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374{
1375 Py_CLEAR(self->decoded_chars);
1376 self->decoded_chars = chars;
1377 self->decoded_chars_used = 0;
1378}
1379
1380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001381textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382{
1383 PyObject *chars;
1384 Py_ssize_t avail;
1385
1386 if (self->decoded_chars == NULL)
1387 return PyUnicode_FromStringAndSize(NULL, 0);
1388
1389 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1390 - self->decoded_chars_used);
1391
1392 assert(avail >= 0);
1393
1394 if (n < 0 || n > avail)
1395 n = avail;
1396
1397 if (self->decoded_chars_used > 0 || n < avail) {
1398 chars = PyUnicode_FromUnicode(
1399 PyUnicode_AS_UNICODE(self->decoded_chars)
1400 + self->decoded_chars_used, n);
1401 if (chars == NULL)
1402 return NULL;
1403 }
1404 else {
1405 chars = self->decoded_chars;
1406 Py_INCREF(chars);
1407 }
1408
1409 self->decoded_chars_used += n;
1410 return chars;
1411}
1412
1413/* Read and decode the next chunk of data from the BufferedReader.
1414 */
1415static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001416textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001417{
1418 PyObject *dec_buffer = NULL;
1419 PyObject *dec_flags = NULL;
1420 PyObject *input_chunk = NULL;
1421 PyObject *decoded_chars, *chunk_size;
1422 int eof;
1423
1424 /* The return value is True unless EOF was reached. The decoded string is
1425 * placed in self._decoded_chars (replacing its previous value). The
1426 * entire input chunk is sent to the decoder, though some of it may remain
1427 * buffered in the decoder, yet to be converted.
1428 */
1429
1430 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001431 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001432 return -1;
1433 }
1434
1435 if (self->telling) {
1436 /* To prepare for tell(), we need to snapshot a point in the file
1437 * where the decoder's input buffer is empty.
1438 */
1439
1440 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1441 _PyIO_str_getstate, NULL);
1442 if (state == NULL)
1443 return -1;
1444 /* Given this, we know there was a valid snapshot point
1445 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1446 */
1447 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1448 Py_DECREF(state);
1449 return -1;
1450 }
1451 Py_INCREF(dec_buffer);
1452 Py_INCREF(dec_flags);
1453 Py_DECREF(state);
1454 }
1455
1456 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1457 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1458 if (chunk_size == NULL)
1459 goto fail;
1460 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001461 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1462 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 Py_DECREF(chunk_size);
1464 if (input_chunk == NULL)
1465 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001466 if (!PyBytes_Check(input_chunk)) {
1467 PyErr_Format(PyExc_TypeError,
1468 "underlying %s() should have returned a bytes object, "
1469 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1470 Py_TYPE(input_chunk)->tp_name);
1471 goto fail;
1472 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473
1474 eof = (PyBytes_Size(input_chunk) == 0);
1475
1476 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1477 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1478 self->decoder, input_chunk, eof);
1479 }
1480 else {
1481 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1482 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1483 }
1484
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001485 if (check_decoded(decoded_chars) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001487 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1489 eof = 0;
1490
1491 if (self->telling) {
1492 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1493 * next input to be decoded is dec_buffer + input_chunk.
1494 */
1495 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1496 if (next_input == NULL)
1497 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001498 if (!PyBytes_Check(next_input)) {
1499 PyErr_Format(PyExc_TypeError,
1500 "decoder getstate() should have returned a bytes "
1501 "object, not '%.200s'",
1502 Py_TYPE(next_input)->tp_name);
1503 Py_DECREF(next_input);
1504 goto fail;
1505 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001506 Py_DECREF(dec_buffer);
1507 Py_CLEAR(self->snapshot);
1508 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1509 }
1510 Py_DECREF(input_chunk);
1511
1512 return (eof == 0);
1513
1514 fail:
1515 Py_XDECREF(dec_buffer);
1516 Py_XDECREF(dec_flags);
1517 Py_XDECREF(input_chunk);
1518 return -1;
1519}
1520
1521static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001522textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523{
1524 Py_ssize_t n = -1;
1525 PyObject *result = NULL, *chunks = NULL;
1526
1527 CHECK_INITIALIZED(self);
1528
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001529 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001530 return NULL;
1531
1532 CHECK_CLOSED(self);
1533
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001534 if (self->decoder == NULL)
1535 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001536
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001537 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 return NULL;
1539
1540 if (n < 0) {
1541 /* Read everything */
1542 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1543 PyObject *decoded;
1544 if (bytes == NULL)
1545 goto fail;
1546 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1547 bytes, Py_True, NULL);
1548 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001549 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 goto fail;
1551
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001552 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001553
1554 if (result == NULL) {
1555 Py_DECREF(decoded);
1556 return NULL;
1557 }
1558
1559 PyUnicode_AppendAndDel(&result, decoded);
1560 if (result == NULL)
1561 goto fail;
1562
1563 Py_CLEAR(self->snapshot);
1564 return result;
1565 }
1566 else {
1567 int res = 1;
1568 Py_ssize_t remaining = n;
1569
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001570 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 if (result == NULL)
1572 goto fail;
1573 remaining -= PyUnicode_GET_SIZE(result);
1574
1575 /* Keep reading chunks until we have n characters to return */
1576 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001577 res = textiowrapper_read_chunk(self);
Gregory P. Smith51359922012-06-23 23:55:39 -07001578 if (res < 0) {
1579 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1580 when EINTR occurs so we needn't do it ourselves. */
1581 if (_PyIO_trap_eintr()) {
1582 continue;
1583 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001584 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001585 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001586 if (res == 0) /* EOF */
1587 break;
1588 if (chunks == NULL) {
1589 chunks = PyList_New(0);
1590 if (chunks == NULL)
1591 goto fail;
1592 }
1593 if (PyList_Append(chunks, result) < 0)
1594 goto fail;
1595 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001596 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597 if (result == NULL)
1598 goto fail;
1599 remaining -= PyUnicode_GET_SIZE(result);
1600 }
1601 if (chunks != NULL) {
1602 if (result != NULL && PyList_Append(chunks, result) < 0)
1603 goto fail;
1604 Py_CLEAR(result);
1605 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1606 if (result == NULL)
1607 goto fail;
1608 Py_CLEAR(chunks);
1609 }
1610 return result;
1611 }
1612 fail:
1613 Py_XDECREF(result);
1614 Py_XDECREF(chunks);
1615 return NULL;
1616}
1617
1618
1619/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1620 that is to the NUL character. Otherwise the function will produce
1621 incorrect results. */
1622static Py_UNICODE *
1623find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1624{
1625 Py_UNICODE *s = start;
1626 for (;;) {
1627 while (*s > ch)
1628 s++;
1629 if (*s == ch)
1630 return s;
1631 if (s == end)
1632 return NULL;
1633 s++;
1634 }
1635}
1636
1637Py_ssize_t
1638_PyIO_find_line_ending(
1639 int translated, int universal, PyObject *readnl,
1640 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1641{
1642 Py_ssize_t len = end - start;
1643
1644 if (translated) {
1645 /* Newlines are already translated, only search for \n */
1646 Py_UNICODE *pos = find_control_char(start, end, '\n');
1647 if (pos != NULL)
1648 return pos - start + 1;
1649 else {
1650 *consumed = len;
1651 return -1;
1652 }
1653 }
1654 else if (universal) {
1655 /* Universal newline search. Find any of \r, \r\n, \n
1656 * The decoder ensures that \r\n are not split in two pieces
1657 */
1658 Py_UNICODE *s = start;
1659 for (;;) {
1660 Py_UNICODE ch;
1661 /* Fast path for non-control chars. The loop always ends
1662 since the Py_UNICODE storage is NUL-terminated. */
1663 while (*s > '\r')
1664 s++;
1665 if (s >= end) {
1666 *consumed = len;
1667 return -1;
1668 }
1669 ch = *s++;
1670 if (ch == '\n')
1671 return s - start;
1672 if (ch == '\r') {
1673 if (*s == '\n')
1674 return s - start + 1;
1675 else
1676 return s - start;
1677 }
1678 }
1679 }
1680 else {
1681 /* Non-universal mode. */
1682 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1683 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1684 if (readnl_len == 1) {
1685 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1686 if (pos != NULL)
1687 return pos - start + 1;
1688 *consumed = len;
1689 return -1;
1690 }
1691 else {
1692 Py_UNICODE *s = start;
1693 Py_UNICODE *e = end - readnl_len + 1;
1694 Py_UNICODE *pos;
1695 if (e < s)
1696 e = s;
1697 while (s < e) {
1698 Py_ssize_t i;
1699 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1700 if (pos == NULL || pos >= e)
1701 break;
1702 for (i = 1; i < readnl_len; i++) {
1703 if (pos[i] != nl[i])
1704 break;
1705 }
1706 if (i == readnl_len)
1707 return pos - start + readnl_len;
1708 s = pos + 1;
1709 }
1710 pos = find_control_char(e, end, nl[0]);
1711 if (pos == NULL)
1712 *consumed = len;
1713 else
1714 *consumed = pos - start;
1715 return -1;
1716 }
1717 }
1718}
1719
1720static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001721_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722{
1723 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1724 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1725 int res;
1726
1727 CHECK_CLOSED(self);
1728
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001729 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 return NULL;
1731
1732 chunked = 0;
1733
1734 while (1) {
1735 Py_UNICODE *ptr;
1736 Py_ssize_t line_len;
1737 Py_ssize_t consumed = 0;
1738
1739 /* First, get some data if necessary */
1740 res = 1;
1741 while (!self->decoded_chars ||
1742 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001743 res = textiowrapper_read_chunk(self);
Gregory P. Smith51359922012-06-23 23:55:39 -07001744 if (res < 0) {
1745 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1746 when EINTR occurs so we needn't do it ourselves. */
1747 if (_PyIO_trap_eintr()) {
1748 continue;
1749 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001751 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 if (res == 0)
1753 break;
1754 }
1755 if (res == 0) {
1756 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001757 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 Py_CLEAR(self->snapshot);
1759 start = endpos = offset_to_buffer = 0;
1760 break;
1761 }
1762
1763 if (remaining == NULL) {
1764 line = self->decoded_chars;
1765 start = self->decoded_chars_used;
1766 offset_to_buffer = 0;
1767 Py_INCREF(line);
1768 }
1769 else {
1770 assert(self->decoded_chars_used == 0);
1771 line = PyUnicode_Concat(remaining, self->decoded_chars);
1772 start = 0;
1773 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1774 Py_CLEAR(remaining);
1775 if (line == NULL)
1776 goto error;
1777 }
1778
1779 ptr = PyUnicode_AS_UNICODE(line);
1780 line_len = PyUnicode_GET_SIZE(line);
1781
1782 endpos = _PyIO_find_line_ending(
1783 self->readtranslate, self->readuniversal, self->readnl,
1784 ptr + start, ptr + line_len, &consumed);
1785 if (endpos >= 0) {
1786 endpos += start;
1787 if (limit >= 0 && (endpos - start) + chunked >= limit)
1788 endpos = start + limit - chunked;
1789 break;
1790 }
1791
1792 /* We can put aside up to `endpos` */
1793 endpos = consumed + start;
1794 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1795 /* Didn't find line ending, but reached length limit */
1796 endpos = start + limit - chunked;
1797 break;
1798 }
1799
1800 if (endpos > start) {
1801 /* No line ending seen yet - put aside current data */
1802 PyObject *s;
1803 if (chunks == NULL) {
1804 chunks = PyList_New(0);
1805 if (chunks == NULL)
1806 goto error;
1807 }
1808 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1809 if (s == NULL)
1810 goto error;
1811 if (PyList_Append(chunks, s) < 0) {
1812 Py_DECREF(s);
1813 goto error;
1814 }
1815 chunked += PyUnicode_GET_SIZE(s);
1816 Py_DECREF(s);
1817 }
1818 /* There may be some remaining bytes we'll have to prepend to the
1819 next chunk of data */
1820 if (endpos < line_len) {
1821 remaining = PyUnicode_FromUnicode(
1822 ptr + endpos, line_len - endpos);
1823 if (remaining == NULL)
1824 goto error;
1825 }
1826 Py_CLEAR(line);
1827 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001828 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001829 }
1830
1831 if (line != NULL) {
1832 /* Our line ends in the current buffer */
1833 self->decoded_chars_used = endpos - offset_to_buffer;
1834 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1835 if (start == 0 && Py_REFCNT(line) == 1) {
1836 if (PyUnicode_Resize(&line, endpos) < 0)
1837 goto error;
1838 }
1839 else {
1840 PyObject *s = PyUnicode_FromUnicode(
1841 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1842 Py_CLEAR(line);
1843 if (s == NULL)
1844 goto error;
1845 line = s;
1846 }
1847 }
1848 }
1849 if (remaining != NULL) {
1850 if (chunks == NULL) {
1851 chunks = PyList_New(0);
1852 if (chunks == NULL)
1853 goto error;
1854 }
1855 if (PyList_Append(chunks, remaining) < 0)
1856 goto error;
1857 Py_CLEAR(remaining);
1858 }
1859 if (chunks != NULL) {
1860 if (line != NULL && PyList_Append(chunks, line) < 0)
1861 goto error;
1862 Py_CLEAR(line);
1863 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1864 if (line == NULL)
1865 goto error;
1866 Py_DECREF(chunks);
1867 }
1868 if (line == NULL)
1869 line = PyUnicode_FromStringAndSize(NULL, 0);
1870
1871 return line;
1872
1873 error:
1874 Py_XDECREF(chunks);
1875 Py_XDECREF(remaining);
1876 Py_XDECREF(line);
1877 return NULL;
1878}
1879
1880static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001881textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001882{
1883 Py_ssize_t limit = -1;
1884
1885 CHECK_INITIALIZED(self);
1886 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1887 return NULL;
1888 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001889 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001890}
1891
1892/* Seek and Tell */
1893
1894typedef struct {
1895 Py_off_t start_pos;
1896 int dec_flags;
1897 int bytes_to_feed;
1898 int chars_to_skip;
1899 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001900} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001901
1902/*
1903 To speed up cookie packing/unpacking, we store the fields in a temporary
1904 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1905 The following macros define at which offsets in the intermediary byte
1906 string the various CookieStruct fields will be stored.
1907 */
1908
1909#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1910
1911#if defined(WORDS_BIGENDIAN)
1912
1913# define IS_LITTLE_ENDIAN 0
1914
1915/* We want the least significant byte of start_pos to also be the least
1916 significant byte of the cookie, which means that in big-endian mode we
1917 must copy the fields in reverse order. */
1918
1919# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1920# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1921# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1922# define OFF_CHARS_TO_SKIP (sizeof(char))
1923# define OFF_NEED_EOF 0
1924
1925#else
1926
1927# define IS_LITTLE_ENDIAN 1
1928
1929/* Little-endian mode: the least significant byte of start_pos will
1930 naturally end up the least significant byte of the cookie. */
1931
1932# define OFF_START_POS 0
1933# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1934# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1935# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1936# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1937
1938#endif
1939
1940static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001941textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942{
1943 unsigned char buffer[COOKIE_BUF_LEN];
1944 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1945 if (cookieLong == NULL)
1946 return -1;
1947
1948 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1949 IS_LITTLE_ENDIAN, 0) < 0) {
1950 Py_DECREF(cookieLong);
1951 return -1;
1952 }
1953 Py_DECREF(cookieLong);
1954
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001955 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1956 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1957 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1958 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1959 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960
1961 return 0;
1962}
1963
1964static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001965textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001966{
1967 unsigned char buffer[COOKIE_BUF_LEN];
1968
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001969 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1970 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1971 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1972 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1973 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974
1975 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1976}
1977#undef IS_LITTLE_ENDIAN
1978
1979static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001980_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001981{
1982 PyObject *res;
1983 /* When seeking to the start of the stream, we call decoder.reset()
1984 rather than decoder.getstate().
1985 This is for a few decoders such as utf-16 for which the state value
1986 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1987 utf-16, that we are expecting a BOM).
1988 */
1989 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1990 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1991 else
1992 res = PyObject_CallMethod(self->decoder, "setstate",
1993 "((yi))", "", cookie->dec_flags);
1994 if (res == NULL)
1995 return -1;
1996 Py_DECREF(res);
1997 return 0;
1998}
1999
Antoine Pitroue4501852009-05-14 18:55:55 +00002000static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002001_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002002{
2003 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002004 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002005 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2006 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2007 self->encoding_start_of_stream = 1;
2008 }
2009 else {
2010 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2011 _PyIO_zero, NULL);
2012 self->encoding_start_of_stream = 0;
2013 }
2014 if (res == NULL)
2015 return -1;
2016 Py_DECREF(res);
2017 return 0;
2018}
2019
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002021textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022{
2023 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002024 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 PyObject *res;
2027 int cmp;
2028
2029 CHECK_INITIALIZED(self);
2030
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2032 return NULL;
2033 CHECK_CLOSED(self);
2034
2035 Py_INCREF(cookieObj);
2036
2037 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002038 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 goto fail;
2040 }
2041
2042 if (whence == 1) {
2043 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002044 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 if (cmp < 0)
2046 goto fail;
2047
2048 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002049 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 goto fail;
2051 }
2052
2053 /* Seeking to the current position should attempt to
2054 * sync the underlying buffer with the current position.
2055 */
2056 Py_DECREF(cookieObj);
2057 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2058 if (cookieObj == NULL)
2059 goto fail;
2060 }
2061 else if (whence == 2) {
2062 /* seek relative to end of file */
2063
Antoine Pitroue4501852009-05-14 18:55:55 +00002064 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002065 if (cmp < 0)
2066 goto fail;
2067
2068 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002069 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070 goto fail;
2071 }
2072
2073 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2074 if (res == NULL)
2075 goto fail;
2076 Py_DECREF(res);
2077
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002078 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 Py_CLEAR(self->snapshot);
2080 if (self->decoder) {
2081 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2082 if (res == NULL)
2083 goto fail;
2084 Py_DECREF(res);
2085 }
2086
2087 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2088 Py_XDECREF(cookieObj);
2089 return res;
2090 }
2091 else if (whence != 0) {
2092 PyErr_Format(PyExc_ValueError,
2093 "invalid whence (%d, should be 0, 1 or 2)", whence);
2094 goto fail;
2095 }
2096
Antoine Pitroue4501852009-05-14 18:55:55 +00002097 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 if (cmp < 0)
2099 goto fail;
2100
2101 if (cmp == 1) {
2102 PyErr_Format(PyExc_ValueError,
2103 "negative seek position %R", cookieObj);
2104 goto fail;
2105 }
2106
2107 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2108 if (res == NULL)
2109 goto fail;
2110 Py_DECREF(res);
2111
2112 /* The strategy of seek() is to go back to the safe start point
2113 * and replay the effect of read(chars_to_skip) from there.
2114 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002115 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002116 goto fail;
2117
2118 /* Seek back to the safe start point. */
2119 posobj = PyLong_FromOff_t(cookie.start_pos);
2120 if (posobj == NULL)
2121 goto fail;
2122 res = PyObject_CallMethodObjArgs(self->buffer,
2123 _PyIO_str_seek, posobj, NULL);
2124 Py_DECREF(posobj);
2125 if (res == NULL)
2126 goto fail;
2127 Py_DECREF(res);
2128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002129 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130 Py_CLEAR(self->snapshot);
2131
2132 /* Restore the decoder to its state from the safe start point. */
2133 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002134 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135 goto fail;
2136 }
2137
2138 if (cookie.chars_to_skip) {
2139 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2140 PyObject *input_chunk = PyObject_CallMethod(
2141 self->buffer, "read", "i", cookie.bytes_to_feed);
2142 PyObject *decoded;
2143
2144 if (input_chunk == NULL)
2145 goto fail;
2146
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002147 if (!PyBytes_Check(input_chunk)) {
2148 PyErr_Format(PyExc_TypeError,
2149 "underlying read() should have returned a bytes "
2150 "object, not '%.200s'",
2151 Py_TYPE(input_chunk)->tp_name);
2152 Py_DECREF(input_chunk);
2153 goto fail;
2154 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155
2156 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2157 if (self->snapshot == NULL) {
2158 Py_DECREF(input_chunk);
2159 goto fail;
2160 }
2161
2162 decoded = PyObject_CallMethod(self->decoder, "decode",
2163 "Oi", input_chunk, (int)cookie.need_eof);
2164
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002165 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 goto fail;
2167
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002168 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169
2170 /* Skip chars_to_skip of the decoded characters. */
2171 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2172 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2173 goto fail;
2174 }
2175 self->decoded_chars_used = cookie.chars_to_skip;
2176 }
2177 else {
2178 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2179 if (self->snapshot == NULL)
2180 goto fail;
2181 }
2182
Antoine Pitroue4501852009-05-14 18:55:55 +00002183 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2184 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002185 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002186 goto fail;
2187 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188 return cookieObj;
2189 fail:
2190 Py_XDECREF(cookieObj);
2191 return NULL;
2192
2193}
2194
2195static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002196textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197{
2198 PyObject *res;
2199 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002200 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201 PyObject *next_input;
2202 Py_ssize_t chars_to_skip, chars_decoded;
2203 PyObject *saved_state = NULL;
2204 char *input, *input_end;
2205
2206 CHECK_INITIALIZED(self);
2207 CHECK_CLOSED(self);
2208
2209 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002210 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002211 goto fail;
2212 }
2213 if (!self->telling) {
2214 PyErr_SetString(PyExc_IOError,
2215 "telling position disabled by next() call");
2216 goto fail;
2217 }
2218
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002219 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002220 return NULL;
2221 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2222 if (res == NULL)
2223 goto fail;
2224 Py_DECREF(res);
2225
2226 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2227 if (posobj == NULL)
2228 goto fail;
2229
2230 if (self->decoder == NULL || self->snapshot == NULL) {
2231 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2232 return posobj;
2233 }
2234
2235#if defined(HAVE_LARGEFILE_SUPPORT)
2236 cookie.start_pos = PyLong_AsLongLong(posobj);
2237#else
2238 cookie.start_pos = PyLong_AsLong(posobj);
2239#endif
2240 if (PyErr_Occurred())
2241 goto fail;
2242
2243 /* Skip backward to the snapshot point (see _read_chunk). */
2244 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2245 goto fail;
2246
2247 assert (PyBytes_Check(next_input));
2248
2249 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2250
2251 /* How many decoded characters have been used up since the snapshot? */
2252 if (self->decoded_chars_used == 0) {
2253 /* We haven't moved from the snapshot point. */
2254 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002255 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256 }
2257
2258 chars_to_skip = self->decoded_chars_used;
2259
2260 /* Starting from the snapshot position, we will walk the decoder
2261 * forward until it gives us enough decoded characters.
2262 */
2263 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2264 _PyIO_str_getstate, NULL);
2265 if (saved_state == NULL)
2266 goto fail;
2267
2268 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002269 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002270 goto fail;
2271
2272 /* Feed the decoder one byte at a time. As we go, note the
2273 * nearest "safe start point" before the current location
2274 * (a point where the decoder has nothing buffered, so seek()
2275 * can safely start from there and advance to this location).
2276 */
2277 chars_decoded = 0;
2278 input = PyBytes_AS_STRING(next_input);
2279 input_end = input + PyBytes_GET_SIZE(next_input);
2280 while (input < input_end) {
2281 PyObject *state;
2282 char *dec_buffer;
2283 Py_ssize_t dec_buffer_len;
2284 int dec_flags;
2285
2286 PyObject *decoded = PyObject_CallMethod(
2287 self->decoder, "decode", "y#", input, 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002288 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002289 goto fail;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002290 chars_decoded += PyUnicode_GET_SIZE(decoded);
2291 Py_DECREF(decoded);
2292
2293 cookie.bytes_to_feed += 1;
2294
2295 state = PyObject_CallMethodObjArgs(self->decoder,
2296 _PyIO_str_getstate, NULL);
2297 if (state == NULL)
2298 goto fail;
2299 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2300 Py_DECREF(state);
2301 goto fail;
2302 }
2303 Py_DECREF(state);
2304
2305 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2306 /* Decoder buffer is empty, so this is a safe start point. */
2307 cookie.start_pos += cookie.bytes_to_feed;
2308 chars_to_skip -= chars_decoded;
2309 cookie.dec_flags = dec_flags;
2310 cookie.bytes_to_feed = 0;
2311 chars_decoded = 0;
2312 }
2313 if (chars_decoded >= chars_to_skip)
2314 break;
2315 input++;
2316 }
2317 if (input == input_end) {
2318 /* We didn't get enough decoded data; signal EOF to get more. */
2319 PyObject *decoded = PyObject_CallMethod(
2320 self->decoder, "decode", "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002321 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322 goto fail;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323 chars_decoded += PyUnicode_GET_SIZE(decoded);
2324 Py_DECREF(decoded);
2325 cookie.need_eof = 1;
2326
2327 if (chars_decoded < chars_to_skip) {
2328 PyErr_SetString(PyExc_IOError,
2329 "can't reconstruct logical file position");
2330 goto fail;
2331 }
2332 }
2333
2334 /* finally */
2335 Py_XDECREF(posobj);
2336 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2337 Py_DECREF(saved_state);
2338 if (res == NULL)
2339 return NULL;
2340 Py_DECREF(res);
2341
2342 /* The returned cookie corresponds to the last safe start point. */
2343 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002344 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345
2346 fail:
2347 Py_XDECREF(posobj);
2348 if (saved_state) {
2349 PyObject *type, *value, *traceback;
2350 PyErr_Fetch(&type, &value, &traceback);
2351
2352 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2353 Py_DECREF(saved_state);
2354 if (res == NULL)
2355 return NULL;
2356 Py_DECREF(res);
2357
2358 PyErr_Restore(type, value, traceback);
2359 }
2360 return NULL;
2361}
2362
2363static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002364textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365{
2366 PyObject *pos = Py_None;
2367 PyObject *res;
2368
2369 CHECK_INITIALIZED(self)
2370 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2371 return NULL;
2372 }
2373
2374 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2375 if (res == NULL)
2376 return NULL;
2377 Py_DECREF(res);
2378
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002379 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002380}
2381
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002382static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002383textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002384{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002385 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002386
2387 CHECK_INITIALIZED(self);
2388
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002389 res = PyUnicode_FromString("<_io.TextIOWrapper");
2390 if (res == NULL)
2391 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002392 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2393 if (nameobj == NULL) {
2394 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2395 PyErr_Clear();
2396 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002397 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002398 }
2399 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002400 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002401 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002402 if (s == NULL)
2403 goto error;
2404 PyUnicode_AppendAndDel(&res, s);
2405 if (res == NULL)
2406 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002407 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002408 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2409 if (modeobj == NULL) {
2410 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2411 PyErr_Clear();
2412 else
2413 goto error;
2414 }
2415 else {
2416 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2417 Py_DECREF(modeobj);
2418 if (s == NULL)
2419 goto error;
2420 PyUnicode_AppendAndDel(&res, s);
2421 if (res == NULL)
2422 return NULL;
2423 }
2424 s = PyUnicode_FromFormat("%U encoding=%R>",
2425 res, self->encoding);
2426 Py_DECREF(res);
2427 return s;
2428error:
2429 Py_XDECREF(res);
2430 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002431}
2432
2433
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002434/* Inquiries */
2435
2436static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002437textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002438{
2439 CHECK_INITIALIZED(self);
2440 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2441}
2442
2443static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002444textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445{
2446 CHECK_INITIALIZED(self);
2447 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2448}
2449
2450static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002451textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452{
2453 CHECK_INITIALIZED(self);
2454 return PyObject_CallMethod(self->buffer, "readable", NULL);
2455}
2456
2457static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002458textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459{
2460 CHECK_INITIALIZED(self);
2461 return PyObject_CallMethod(self->buffer, "writable", NULL);
2462}
2463
2464static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002465textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466{
2467 CHECK_INITIALIZED(self);
2468 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2469}
2470
2471static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002472textiowrapper_getstate(textio *self, PyObject *args)
2473{
2474 PyErr_Format(PyExc_TypeError,
2475 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2476 return NULL;
2477}
2478
2479static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002480textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481{
2482 CHECK_INITIALIZED(self);
2483 CHECK_CLOSED(self);
2484 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002485 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486 return NULL;
2487 return PyObject_CallMethod(self->buffer, "flush", NULL);
2488}
2489
2490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002491textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492{
2493 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002494 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496
Antoine Pitrou6be88762010-05-03 16:48:20 +00002497 res = textiowrapper_closed_get(self, NULL);
2498 if (res == NULL)
2499 return NULL;
2500 r = PyObject_IsTrue(res);
2501 Py_DECREF(res);
2502 if (r < 0)
2503 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002504
Antoine Pitrou6be88762010-05-03 16:48:20 +00002505 if (r > 0) {
2506 Py_RETURN_NONE; /* stream already closed */
2507 }
2508 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002509 if (self->deallocating) {
2510 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2511 if (res)
2512 Py_DECREF(res);
2513 else
2514 PyErr_Clear();
2515 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002516 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2517 if (res == NULL) {
2518 return NULL;
2519 }
2520 else
2521 Py_DECREF(res);
2522
2523 return PyObject_CallMethod(self->buffer, "close", NULL);
2524 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525}
2526
2527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002528textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529{
2530 PyObject *line;
2531
2532 CHECK_INITIALIZED(self);
2533
2534 self->telling = 0;
2535 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2536 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002537 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538 }
2539 else {
2540 line = PyObject_CallMethodObjArgs((PyObject *)self,
2541 _PyIO_str_readline, NULL);
2542 if (line && !PyUnicode_Check(line)) {
2543 PyErr_Format(PyExc_IOError,
2544 "readline() should have returned an str object, "
2545 "not '%.200s'", Py_TYPE(line)->tp_name);
2546 Py_DECREF(line);
2547 return NULL;
2548 }
2549 }
2550
2551 if (line == NULL)
2552 return NULL;
2553
2554 if (PyUnicode_GET_SIZE(line) == 0) {
2555 /* Reached EOF or would have blocked */
2556 Py_DECREF(line);
2557 Py_CLEAR(self->snapshot);
2558 self->telling = self->seekable;
2559 return NULL;
2560 }
2561
2562 return line;
2563}
2564
2565static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567{
2568 CHECK_INITIALIZED(self);
2569 return PyObject_GetAttrString(self->buffer, "name");
2570}
2571
2572static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002573textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574{
2575 CHECK_INITIALIZED(self);
2576 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2577}
2578
2579static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002580textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002581{
2582 PyObject *res;
2583 CHECK_INITIALIZED(self);
2584 if (self->decoder == NULL)
2585 Py_RETURN_NONE;
2586 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2587 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002588 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2589 PyErr_Clear();
2590 Py_RETURN_NONE;
2591 }
2592 else {
2593 return NULL;
2594 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595 }
2596 return res;
2597}
2598
2599static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002600textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002601{
2602 CHECK_INITIALIZED(self);
2603 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2604}
2605
2606static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002607textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002608{
2609 CHECK_INITIALIZED(self);
2610 return PyLong_FromSsize_t(self->chunk_size);
2611}
2612
2613static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002614textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615{
2616 Py_ssize_t n;
2617 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002618 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619 if (n == -1 && PyErr_Occurred())
2620 return -1;
2621 if (n <= 0) {
2622 PyErr_SetString(PyExc_ValueError,
2623 "a strictly positive integer is required");
2624 return -1;
2625 }
2626 self->chunk_size = n;
2627 return 0;
2628}
2629
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002630static PyMethodDef textiowrapper_methods[] = {
2631 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2632 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2633 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2634 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2635 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2636 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002638 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2639 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2640 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2641 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2642 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002643 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002645 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2646 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2647 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648 {NULL, NULL}
2649};
2650
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002651static PyMemberDef textiowrapper_members[] = {
2652 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2653 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2654 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002655 {NULL}
2656};
2657
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002658static PyGetSetDef textiowrapper_getset[] = {
2659 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2660 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2662*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002663 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2664 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2665 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2666 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002667 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002668};
2669
2670PyTypeObject PyTextIOWrapper_Type = {
2671 PyVarObject_HEAD_INIT(NULL, 0)
2672 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002673 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676 0, /*tp_print*/
2677 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002678 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002680 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681 0, /*tp_as_number*/
2682 0, /*tp_as_sequence*/
2683 0, /*tp_as_mapping*/
2684 0, /*tp_hash */
2685 0, /*tp_call*/
2686 0, /*tp_str*/
2687 0, /*tp_getattro*/
2688 0, /*tp_setattro*/
2689 0, /*tp_as_buffer*/
2690 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2691 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002692 textiowrapper_doc, /* tp_doc */
2693 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2694 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002695 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2699 textiowrapper_methods, /* tp_methods */
2700 textiowrapper_members, /* tp_members */
2701 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002702 0, /* tp_base */
2703 0, /* tp_dict */
2704 0, /* tp_descr_get */
2705 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002706 offsetof(textio, dict), /*tp_dictoffset*/
2707 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002708 0, /* tp_alloc */
2709 PyType_GenericNew, /* tp_new */
2710};