blob: 13d4bd9b88c7501771a0d3911216ee64f1811d84 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200656 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 char readuniversal;
658 char readtranslate;
659 char writetranslate;
660 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200661 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000663 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664 /* Specialized encoding func (see below) */
665 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000666 /* Whether or not it's the start of the stream */
667 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000668
669 /* Reads and writes are internally buffered in order to speed things up.
670 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000671
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672 Please also note that text to be written is first encoded before being
673 buffered. This is necessary so that encoding errors are immediately
674 reported to the caller, but it unfortunately means that the
675 IncrementalEncoder (whose encode() method is always written in Python)
676 becomes a bottleneck for small writes.
677 */
678 PyObject *decoded_chars; /* buffer for text returned from decoder */
679 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
680 PyObject *pending_bytes; /* list of bytes objects waiting to be
681 written, or NULL */
682 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000683
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 /* snapshot is either None, or a tuple (dec_flags, next_input) where
685 * dec_flags is the second (integer) item of the decoder state and
686 * next_input is the chunk of input bytes that comes next after the
687 * snapshot point. We use this to reconstruct decoder states in tell().
688 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000689 PyObject *snapshot;
690 /* Bytes-to-characters ratio for the current chunk. Serves as input for
691 the heuristic in tell(). */
692 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694 /* Cache raw object if it's a FileIO object */
695 PyObject *raw;
696
697 PyObject *weakreflist;
698 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700
701
702/* A couple of specialized cases in order to bypass the slow incremental
703 encoding methods for the most popular encodings. */
704
705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000706ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707{
708 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
709 PyUnicode_GET_SIZE(text),
710 PyBytes_AS_STRING(self->errors));
711}
712
713static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000714utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715{
716 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
717 PyUnicode_GET_SIZE(text),
718 PyBytes_AS_STRING(self->errors), 1);
719}
720
721static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000722utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), -1);
727}
728
729static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000730utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731{
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 if (!self->encoding_start_of_stream) {
733 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000735 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000737 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000739 }
740 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
741 PyUnicode_GET_SIZE(text),
742 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743}
744
Antoine Pitroue4501852009-05-14 18:55:55 +0000745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747{
748 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 1);
751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), -1);
759}
760
761static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000762utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000763{
764 if (!self->encoding_start_of_stream) {
765 /* Skip the BOM and use native byte ordering */
766#if defined(WORDS_BIGENDIAN)
767 return utf32be_encode(self, text);
768#else
769 return utf32le_encode(self, text);
770#endif
771 }
772 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
773 PyUnicode_GET_SIZE(text),
774 PyBytes_AS_STRING(self->errors), 0);
775}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776
777static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000778utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779{
780 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors));
783}
784
785static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000786latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787{
788 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
789 PyUnicode_GET_SIZE(text),
790 PyBytes_AS_STRING(self->errors));
791}
792
793/* Map normalized encoding names onto the specialized encoding funcs */
794
795typedef struct {
796 const char *name;
797 encodefunc_t encodefunc;
798} encodefuncentry;
799
Antoine Pitrou24f36292009-03-28 22:16:42 +0000800static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801 {"ascii", (encodefunc_t) ascii_encode},
802 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000803 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 {"utf-16-be", (encodefunc_t) utf16be_encode},
805 {"utf-16-le", (encodefunc_t) utf16le_encode},
806 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000807 {"utf-32-be", (encodefunc_t) utf32be_encode},
808 {"utf-32-le", (encodefunc_t) utf32le_encode},
809 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810 {NULL, NULL}
811};
812
813
814static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000815textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000816{
817 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200818 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819 NULL};
820 PyObject *buffer, *raw;
821 char *encoding = NULL;
822 char *errors = NULL;
823 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200824 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 _PyIO_State *state = IO_STATE;
826
827 PyObject *res;
828 int r;
829
830 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000831 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200834 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835 return -1;
836
837 if (newline && newline[0] != '\0'
838 && !(newline[0] == '\n' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\0')
840 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
841 PyErr_Format(PyExc_ValueError,
842 "illegal newline value: %s", newline);
843 return -1;
844 }
845
846 Py_CLEAR(self->buffer);
847 Py_CLEAR(self->encoding);
848 Py_CLEAR(self->encoder);
849 Py_CLEAR(self->decoder);
850 Py_CLEAR(self->readnl);
851 Py_CLEAR(self->decoded_chars);
852 Py_CLEAR(self->pending_bytes);
853 Py_CLEAR(self->snapshot);
854 Py_CLEAR(self->errors);
855 Py_CLEAR(self->raw);
856 self->decoded_chars_used = 0;
857 self->pending_bytes_count = 0;
858 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000859 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000860
861 if (encoding == NULL) {
862 /* Try os.device_encoding(fileno) */
863 PyObject *fileno;
864 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
865 /* Ignore only AttributeError and UnsupportedOperation */
866 if (fileno == NULL) {
867 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
868 PyErr_ExceptionMatches(state->unsupported_operation)) {
869 PyErr_Clear();
870 }
871 else {
872 goto error;
873 }
874 }
875 else {
876 self->encoding = PyObject_CallMethod(state->os_module,
877 "device_encoding",
878 "N", fileno);
879 if (self->encoding == NULL)
880 goto error;
881 else if (!PyUnicode_Check(self->encoding))
882 Py_CLEAR(self->encoding);
883 }
884 }
885 if (encoding == NULL && self->encoding == NULL) {
886 if (state->locale_module == NULL) {
887 state->locale_module = PyImport_ImportModule("locale");
888 if (state->locale_module == NULL)
889 goto catch_ImportError;
890 else
891 goto use_locale;
892 }
893 else {
894 use_locale:
895 self->encoding = PyObject_CallMethod(
896 state->locale_module, "getpreferredencoding", NULL);
897 if (self->encoding == NULL) {
898 catch_ImportError:
899 /*
900 Importing locale can raise a ImportError because of
901 _functools, and locale.getpreferredencoding can raise a
902 ImportError if _locale is not available. These will happen
903 during module building.
904 */
905 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
906 PyErr_Clear();
907 self->encoding = PyUnicode_FromString("ascii");
908 }
909 else
910 goto error;
911 }
912 else if (!PyUnicode_Check(self->encoding))
913 Py_CLEAR(self->encoding);
914 }
915 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000916 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000917 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000918 if (encoding == NULL)
919 goto error;
920 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000921 else if (encoding != NULL) {
922 self->encoding = PyUnicode_FromString(encoding);
923 if (self->encoding == NULL)
924 goto error;
925 }
926 else {
927 PyErr_SetString(PyExc_IOError,
928 "could not determine default encoding");
929 }
930
931 if (errors == NULL)
932 errors = "strict";
933 self->errors = PyBytes_FromString(errors);
934 if (self->errors == NULL)
935 goto error;
936
937 self->chunk_size = 8192;
938 self->readuniversal = (newline == NULL || newline[0] == '\0');
939 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200940 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000941 self->readtranslate = (newline == NULL);
942 if (newline) {
943 self->readnl = PyUnicode_FromString(newline);
944 if (self->readnl == NULL)
945 return -1;
946 }
947 self->writetranslate = (newline == NULL || newline[0] != '\0');
948 if (!self->readuniversal && self->readnl) {
949 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000950 if (self->writenl == NULL)
951 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000952 if (!strcmp(self->writenl, "\n"))
953 self->writenl = NULL;
954 }
955#ifdef MS_WINDOWS
956 else
957 self->writenl = "\r\n";
958#endif
959
960 /* Build the decoder object */
961 res = PyObject_CallMethod(buffer, "readable", NULL);
962 if (res == NULL)
963 goto error;
964 r = PyObject_IsTrue(res);
965 Py_DECREF(res);
966 if (r == -1)
967 goto error;
968 if (r == 1) {
969 self->decoder = PyCodec_IncrementalDecoder(
970 encoding, errors);
971 if (self->decoder == NULL)
972 goto error;
973
974 if (self->readuniversal) {
975 PyObject *incrementalDecoder = PyObject_CallFunction(
976 (PyObject *)&PyIncrementalNewlineDecoder_Type,
977 "Oi", self->decoder, (int)self->readtranslate);
978 if (incrementalDecoder == NULL)
979 goto error;
980 Py_CLEAR(self->decoder);
981 self->decoder = incrementalDecoder;
982 }
983 }
984
985 /* Build the encoder object */
986 res = PyObject_CallMethod(buffer, "writable", NULL);
987 if (res == NULL)
988 goto error;
989 r = PyObject_IsTrue(res);
990 Py_DECREF(res);
991 if (r == -1)
992 goto error;
993 if (r == 1) {
994 PyObject *ci;
995 self->encoder = PyCodec_IncrementalEncoder(
996 encoding, errors);
997 if (self->encoder == NULL)
998 goto error;
999 /* Get the normalized named of the codec */
1000 ci = _PyCodec_Lookup(encoding);
1001 if (ci == NULL)
1002 goto error;
1003 res = PyObject_GetAttrString(ci, "name");
1004 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001005 if (res == NULL) {
1006 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1007 PyErr_Clear();
1008 else
1009 goto error;
1010 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001011 else if (PyUnicode_Check(res)) {
1012 encodefuncentry *e = encodefuncs;
1013 while (e->name != NULL) {
1014 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1015 self->encodefunc = e->encodefunc;
1016 break;
1017 }
1018 e++;
1019 }
1020 }
1021 Py_XDECREF(res);
1022 }
1023
1024 self->buffer = buffer;
1025 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001026
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1028 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1029 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1030 raw = PyObject_GetAttrString(buffer, "raw");
1031 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001032 if (raw == NULL) {
1033 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1034 PyErr_Clear();
1035 else
1036 goto error;
1037 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001038 else if (Py_TYPE(raw) == &PyFileIO_Type)
1039 self->raw = raw;
1040 else
1041 Py_DECREF(raw);
1042 }
1043
1044 res = PyObject_CallMethod(buffer, "seekable", NULL);
1045 if (res == NULL)
1046 goto error;
1047 self->seekable = self->telling = PyObject_IsTrue(res);
1048 Py_DECREF(res);
1049
Antoine Pitroue96ec682011-07-23 21:46:35 +02001050 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1051
Antoine Pitroue4501852009-05-14 18:55:55 +00001052 self->encoding_start_of_stream = 0;
1053 if (self->seekable && self->encoder) {
1054 PyObject *cookieObj;
1055 int cmp;
1056
1057 self->encoding_start_of_stream = 1;
1058
1059 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1060 if (cookieObj == NULL)
1061 goto error;
1062
1063 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1064 Py_DECREF(cookieObj);
1065 if (cmp < 0) {
1066 goto error;
1067 }
1068
1069 if (cmp == 0) {
1070 self->encoding_start_of_stream = 0;
1071 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1072 _PyIO_zero, NULL);
1073 if (res == NULL)
1074 goto error;
1075 Py_DECREF(res);
1076 }
1077 }
1078
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001079 self->ok = 1;
1080 return 0;
1081
1082 error:
1083 return -1;
1084}
1085
1086static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001087_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001088{
1089 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1090 return -1;
1091 self->ok = 0;
1092 Py_CLEAR(self->buffer);
1093 Py_CLEAR(self->encoding);
1094 Py_CLEAR(self->encoder);
1095 Py_CLEAR(self->decoder);
1096 Py_CLEAR(self->readnl);
1097 Py_CLEAR(self->decoded_chars);
1098 Py_CLEAR(self->pending_bytes);
1099 Py_CLEAR(self->snapshot);
1100 Py_CLEAR(self->errors);
1101 Py_CLEAR(self->raw);
1102 return 0;
1103}
1104
1105static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001106textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001107{
Antoine Pitroue033e062010-10-29 10:38:18 +00001108 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001109 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001110 return;
1111 _PyObject_GC_UNTRACK(self);
1112 if (self->weakreflist != NULL)
1113 PyObject_ClearWeakRefs((PyObject *)self);
1114 Py_CLEAR(self->dict);
1115 Py_TYPE(self)->tp_free((PyObject *)self);
1116}
1117
1118static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001119textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001120{
1121 Py_VISIT(self->buffer);
1122 Py_VISIT(self->encoding);
1123 Py_VISIT(self->encoder);
1124 Py_VISIT(self->decoder);
1125 Py_VISIT(self->readnl);
1126 Py_VISIT(self->decoded_chars);
1127 Py_VISIT(self->pending_bytes);
1128 Py_VISIT(self->snapshot);
1129 Py_VISIT(self->errors);
1130 Py_VISIT(self->raw);
1131
1132 Py_VISIT(self->dict);
1133 return 0;
1134}
1135
1136static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001137textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001138{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001139 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140 return -1;
1141 Py_CLEAR(self->dict);
1142 return 0;
1143}
1144
1145static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001146textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001147
1148/* This macro takes some shortcuts to make the common case faster. */
1149#define CHECK_CLOSED(self) \
1150 do { \
1151 int r; \
1152 PyObject *_res; \
1153 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1154 if (self->raw != NULL) \
1155 r = _PyFileIO_closed(self->raw); \
1156 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001157 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001158 if (_res == NULL) \
1159 return NULL; \
1160 r = PyObject_IsTrue(_res); \
1161 Py_DECREF(_res); \
1162 if (r < 0) \
1163 return NULL; \
1164 } \
1165 if (r > 0) { \
1166 PyErr_SetString(PyExc_ValueError, \
1167 "I/O operation on closed file."); \
1168 return NULL; \
1169 } \
1170 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001171 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172 return NULL; \
1173 } while (0)
1174
1175#define CHECK_INITIALIZED(self) \
1176 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001177 if (self->detached) { \
1178 PyErr_SetString(PyExc_ValueError, \
1179 "underlying buffer has been detached"); \
1180 } else { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on uninitialized object"); \
1183 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 return NULL; \
1185 }
1186
1187#define CHECK_INITIALIZED_INT(self) \
1188 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001189 if (self->detached) { \
1190 PyErr_SetString(PyExc_ValueError, \
1191 "underlying buffer has been detached"); \
1192 } else { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "I/O operation on uninitialized object"); \
1195 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 return -1; \
1197 }
1198
1199
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001200static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001201textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202{
1203 PyObject *buffer, *res;
1204 CHECK_INITIALIZED(self);
1205 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1206 if (res == NULL)
1207 return NULL;
1208 Py_DECREF(res);
1209 buffer = self->buffer;
1210 self->buffer = NULL;
1211 self->detached = 1;
1212 self->ok = 0;
1213 return buffer;
1214}
1215
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216Py_LOCAL_INLINE(const Py_UNICODE *)
1217findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1218{
1219 /* like wcschr, but doesn't stop at NULL characters */
1220 while (size-- > 0) {
1221 if (*s == ch)
1222 return s;
1223 s++;
1224 }
1225 return NULL;
1226}
1227
Antoine Pitrou24f36292009-03-28 22:16:42 +00001228/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 underlying buffered object, though. */
1230static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001231_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001233 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234
1235 if (self->pending_bytes == NULL)
1236 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001237
1238 pending = self->pending_bytes;
1239 Py_INCREF(pending);
1240 self->pending_bytes_count = 0;
1241 Py_CLEAR(self->pending_bytes);
1242
1243 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1244 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245 if (b == NULL)
1246 return -1;
1247 ret = PyObject_CallMethodObjArgs(self->buffer,
1248 _PyIO_str_write, b, NULL);
1249 Py_DECREF(b);
1250 if (ret == NULL)
1251 return -1;
1252 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253 return 0;
1254}
1255
1256static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001257textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258{
1259 PyObject *ret;
1260 PyObject *text; /* owned reference */
1261 PyObject *b;
1262 Py_ssize_t textlen;
1263 int haslf = 0;
1264 int needflush = 0;
1265
1266 CHECK_INITIALIZED(self);
1267
1268 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1269 return NULL;
1270 }
1271
1272 CHECK_CLOSED(self);
1273
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001274 if (self->encoder == NULL)
1275 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001276
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277 Py_INCREF(text);
1278
1279 textlen = PyUnicode_GetSize(text);
1280
1281 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1282 if (findchar(PyUnicode_AS_UNICODE(text),
1283 PyUnicode_GET_SIZE(text), '\n'))
1284 haslf = 1;
1285
1286 if (haslf && self->writetranslate && self->writenl != NULL) {
1287 PyObject *newtext = PyObject_CallMethod(
1288 text, "replace", "ss", "\n", self->writenl);
1289 Py_DECREF(text);
1290 if (newtext == NULL)
1291 return NULL;
1292 text = newtext;
1293 }
1294
Antoine Pitroue96ec682011-07-23 21:46:35 +02001295 if (self->write_through)
1296 needflush = 1;
1297 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001298 (haslf ||
1299 findchar(PyUnicode_AS_UNICODE(text),
1300 PyUnicode_GET_SIZE(text), '\r')))
1301 needflush = 1;
1302
1303 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001304 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001306 self->encoding_start_of_stream = 0;
1307 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001308 else
1309 b = PyObject_CallMethodObjArgs(self->encoder,
1310 _PyIO_str_encode, text, NULL);
1311 Py_DECREF(text);
1312 if (b == NULL)
1313 return NULL;
1314
1315 if (self->pending_bytes == NULL) {
1316 self->pending_bytes = PyList_New(0);
1317 if (self->pending_bytes == NULL) {
1318 Py_DECREF(b);
1319 return NULL;
1320 }
1321 self->pending_bytes_count = 0;
1322 }
1323 if (PyList_Append(self->pending_bytes, b) < 0) {
1324 Py_DECREF(b);
1325 return NULL;
1326 }
1327 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1328 Py_DECREF(b);
1329 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001330 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001331 return NULL;
1332 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001333
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001334 if (needflush) {
1335 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1336 if (ret == NULL)
1337 return NULL;
1338 Py_DECREF(ret);
1339 }
1340
1341 Py_CLEAR(self->snapshot);
1342
1343 if (self->decoder) {
1344 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1345 if (ret == NULL)
1346 return NULL;
1347 Py_DECREF(ret);
1348 }
1349
1350 return PyLong_FromSsize_t(textlen);
1351}
1352
1353/* Steal a reference to chars and store it in the decoded_char buffer;
1354 */
1355static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001356textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357{
1358 Py_CLEAR(self->decoded_chars);
1359 self->decoded_chars = chars;
1360 self->decoded_chars_used = 0;
1361}
1362
1363static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001364textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001365{
1366 PyObject *chars;
1367 Py_ssize_t avail;
1368
1369 if (self->decoded_chars == NULL)
1370 return PyUnicode_FromStringAndSize(NULL, 0);
1371
1372 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1373 - self->decoded_chars_used);
1374
1375 assert(avail >= 0);
1376
1377 if (n < 0 || n > avail)
1378 n = avail;
1379
1380 if (self->decoded_chars_used > 0 || n < avail) {
1381 chars = PyUnicode_FromUnicode(
1382 PyUnicode_AS_UNICODE(self->decoded_chars)
1383 + self->decoded_chars_used, n);
1384 if (chars == NULL)
1385 return NULL;
1386 }
1387 else {
1388 chars = self->decoded_chars;
1389 Py_INCREF(chars);
1390 }
1391
1392 self->decoded_chars_used += n;
1393 return chars;
1394}
1395
1396/* Read and decode the next chunk of data from the BufferedReader.
1397 */
1398static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001399textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001400{
1401 PyObject *dec_buffer = NULL;
1402 PyObject *dec_flags = NULL;
1403 PyObject *input_chunk = NULL;
1404 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001405 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001406 int eof;
1407
1408 /* The return value is True unless EOF was reached. The decoded string is
1409 * placed in self._decoded_chars (replacing its previous value). The
1410 * entire input chunk is sent to the decoder, though some of it may remain
1411 * buffered in the decoder, yet to be converted.
1412 */
1413
1414 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001415 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001416 return -1;
1417 }
1418
1419 if (self->telling) {
1420 /* To prepare for tell(), we need to snapshot a point in the file
1421 * where the decoder's input buffer is empty.
1422 */
1423
1424 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1425 _PyIO_str_getstate, NULL);
1426 if (state == NULL)
1427 return -1;
1428 /* Given this, we know there was a valid snapshot point
1429 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1430 */
1431 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1432 Py_DECREF(state);
1433 return -1;
1434 }
1435 Py_INCREF(dec_buffer);
1436 Py_INCREF(dec_flags);
1437 Py_DECREF(state);
1438 }
1439
1440 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1441 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1442 if (chunk_size == NULL)
1443 goto fail;
1444 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001445 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1446 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447 Py_DECREF(chunk_size);
1448 if (input_chunk == NULL)
1449 goto fail;
1450 assert(PyBytes_Check(input_chunk));
1451
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001452 nbytes = PyBytes_Size(input_chunk);
1453 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001454
1455 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1456 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1457 self->decoder, input_chunk, eof);
1458 }
1459 else {
1460 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1461 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1462 }
1463
1464 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1465 if (decoded_chars == NULL)
1466 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001467 textiowrapper_set_decoded_chars(self, decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001468 nchars = PyUnicode_GET_SIZE(decoded_chars);
1469 if (nchars > 0)
1470 self->b2cratio = (double) nbytes / nchars;
1471 else
1472 self->b2cratio = 0.0;
1473 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474 eof = 0;
1475
1476 if (self->telling) {
1477 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1478 * next input to be decoded is dec_buffer + input_chunk.
1479 */
1480 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1481 if (next_input == NULL)
1482 goto fail;
1483 assert (PyBytes_Check(next_input));
1484 Py_DECREF(dec_buffer);
1485 Py_CLEAR(self->snapshot);
1486 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1487 }
1488 Py_DECREF(input_chunk);
1489
1490 return (eof == 0);
1491
1492 fail:
1493 Py_XDECREF(dec_buffer);
1494 Py_XDECREF(dec_flags);
1495 Py_XDECREF(input_chunk);
1496 return -1;
1497}
1498
1499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001500textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501{
1502 Py_ssize_t n = -1;
1503 PyObject *result = NULL, *chunks = NULL;
1504
1505 CHECK_INITIALIZED(self);
1506
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001507 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 return NULL;
1509
1510 CHECK_CLOSED(self);
1511
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001512 if (self->decoder == NULL)
1513 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001514
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001515 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 return NULL;
1517
1518 if (n < 0) {
1519 /* Read everything */
1520 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1521 PyObject *decoded;
1522 if (bytes == NULL)
1523 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001524
1525 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1526 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1527 bytes, 1);
1528 else
1529 decoded = PyObject_CallMethodObjArgs(
1530 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 Py_DECREF(bytes);
1532 if (decoded == NULL)
1533 goto fail;
1534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536
1537 if (result == NULL) {
1538 Py_DECREF(decoded);
1539 return NULL;
1540 }
1541
1542 PyUnicode_AppendAndDel(&result, decoded);
1543 if (result == NULL)
1544 goto fail;
1545
1546 Py_CLEAR(self->snapshot);
1547 return result;
1548 }
1549 else {
1550 int res = 1;
1551 Py_ssize_t remaining = n;
1552
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 if (result == NULL)
1555 goto fail;
1556 remaining -= PyUnicode_GET_SIZE(result);
1557
1558 /* Keep reading chunks until we have n characters to return */
1559 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001560 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 if (res < 0)
1562 goto fail;
1563 if (res == 0) /* EOF */
1564 break;
1565 if (chunks == NULL) {
1566 chunks = PyList_New(0);
1567 if (chunks == NULL)
1568 goto fail;
1569 }
1570 if (PyList_Append(chunks, result) < 0)
1571 goto fail;
1572 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001573 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 if (result == NULL)
1575 goto fail;
1576 remaining -= PyUnicode_GET_SIZE(result);
1577 }
1578 if (chunks != NULL) {
1579 if (result != NULL && PyList_Append(chunks, result) < 0)
1580 goto fail;
1581 Py_CLEAR(result);
1582 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1583 if (result == NULL)
1584 goto fail;
1585 Py_CLEAR(chunks);
1586 }
1587 return result;
1588 }
1589 fail:
1590 Py_XDECREF(result);
1591 Py_XDECREF(chunks);
1592 return NULL;
1593}
1594
1595
1596/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1597 that is to the NUL character. Otherwise the function will produce
1598 incorrect results. */
1599static Py_UNICODE *
1600find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1601{
1602 Py_UNICODE *s = start;
1603 for (;;) {
1604 while (*s > ch)
1605 s++;
1606 if (*s == ch)
1607 return s;
1608 if (s == end)
1609 return NULL;
1610 s++;
1611 }
1612}
1613
1614Py_ssize_t
1615_PyIO_find_line_ending(
1616 int translated, int universal, PyObject *readnl,
1617 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1618{
1619 Py_ssize_t len = end - start;
1620
1621 if (translated) {
1622 /* Newlines are already translated, only search for \n */
1623 Py_UNICODE *pos = find_control_char(start, end, '\n');
1624 if (pos != NULL)
1625 return pos - start + 1;
1626 else {
1627 *consumed = len;
1628 return -1;
1629 }
1630 }
1631 else if (universal) {
1632 /* Universal newline search. Find any of \r, \r\n, \n
1633 * The decoder ensures that \r\n are not split in two pieces
1634 */
1635 Py_UNICODE *s = start;
1636 for (;;) {
1637 Py_UNICODE ch;
1638 /* Fast path for non-control chars. The loop always ends
1639 since the Py_UNICODE storage is NUL-terminated. */
1640 while (*s > '\r')
1641 s++;
1642 if (s >= end) {
1643 *consumed = len;
1644 return -1;
1645 }
1646 ch = *s++;
1647 if (ch == '\n')
1648 return s - start;
1649 if (ch == '\r') {
1650 if (*s == '\n')
1651 return s - start + 1;
1652 else
1653 return s - start;
1654 }
1655 }
1656 }
1657 else {
1658 /* Non-universal mode. */
1659 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1660 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1661 if (readnl_len == 1) {
1662 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1663 if (pos != NULL)
1664 return pos - start + 1;
1665 *consumed = len;
1666 return -1;
1667 }
1668 else {
1669 Py_UNICODE *s = start;
1670 Py_UNICODE *e = end - readnl_len + 1;
1671 Py_UNICODE *pos;
1672 if (e < s)
1673 e = s;
1674 while (s < e) {
1675 Py_ssize_t i;
1676 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1677 if (pos == NULL || pos >= e)
1678 break;
1679 for (i = 1; i < readnl_len; i++) {
1680 if (pos[i] != nl[i])
1681 break;
1682 }
1683 if (i == readnl_len)
1684 return pos - start + readnl_len;
1685 s = pos + 1;
1686 }
1687 pos = find_control_char(e, end, nl[0]);
1688 if (pos == NULL)
1689 *consumed = len;
1690 else
1691 *consumed = pos - start;
1692 return -1;
1693 }
1694 }
1695}
1696
1697static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001698_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699{
1700 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1701 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1702 int res;
1703
1704 CHECK_CLOSED(self);
1705
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001706 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 return NULL;
1708
1709 chunked = 0;
1710
1711 while (1) {
1712 Py_UNICODE *ptr;
1713 Py_ssize_t line_len;
1714 Py_ssize_t consumed = 0;
1715
1716 /* First, get some data if necessary */
1717 res = 1;
1718 while (!self->decoded_chars ||
1719 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001720 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (res < 0)
1722 goto error;
1723 if (res == 0)
1724 break;
1725 }
1726 if (res == 0) {
1727 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001728 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 Py_CLEAR(self->snapshot);
1730 start = endpos = offset_to_buffer = 0;
1731 break;
1732 }
1733
1734 if (remaining == NULL) {
1735 line = self->decoded_chars;
1736 start = self->decoded_chars_used;
1737 offset_to_buffer = 0;
1738 Py_INCREF(line);
1739 }
1740 else {
1741 assert(self->decoded_chars_used == 0);
1742 line = PyUnicode_Concat(remaining, self->decoded_chars);
1743 start = 0;
1744 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1745 Py_CLEAR(remaining);
1746 if (line == NULL)
1747 goto error;
1748 }
1749
1750 ptr = PyUnicode_AS_UNICODE(line);
1751 line_len = PyUnicode_GET_SIZE(line);
1752
1753 endpos = _PyIO_find_line_ending(
1754 self->readtranslate, self->readuniversal, self->readnl,
1755 ptr + start, ptr + line_len, &consumed);
1756 if (endpos >= 0) {
1757 endpos += start;
1758 if (limit >= 0 && (endpos - start) + chunked >= limit)
1759 endpos = start + limit - chunked;
1760 break;
1761 }
1762
1763 /* We can put aside up to `endpos` */
1764 endpos = consumed + start;
1765 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1766 /* Didn't find line ending, but reached length limit */
1767 endpos = start + limit - chunked;
1768 break;
1769 }
1770
1771 if (endpos > start) {
1772 /* No line ending seen yet - put aside current data */
1773 PyObject *s;
1774 if (chunks == NULL) {
1775 chunks = PyList_New(0);
1776 if (chunks == NULL)
1777 goto error;
1778 }
1779 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1780 if (s == NULL)
1781 goto error;
1782 if (PyList_Append(chunks, s) < 0) {
1783 Py_DECREF(s);
1784 goto error;
1785 }
1786 chunked += PyUnicode_GET_SIZE(s);
1787 Py_DECREF(s);
1788 }
1789 /* There may be some remaining bytes we'll have to prepend to the
1790 next chunk of data */
1791 if (endpos < line_len) {
1792 remaining = PyUnicode_FromUnicode(
1793 ptr + endpos, line_len - endpos);
1794 if (remaining == NULL)
1795 goto error;
1796 }
1797 Py_CLEAR(line);
1798 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001799 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 }
1801
1802 if (line != NULL) {
1803 /* Our line ends in the current buffer */
1804 self->decoded_chars_used = endpos - offset_to_buffer;
1805 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1806 if (start == 0 && Py_REFCNT(line) == 1) {
1807 if (PyUnicode_Resize(&line, endpos) < 0)
1808 goto error;
1809 }
1810 else {
1811 PyObject *s = PyUnicode_FromUnicode(
1812 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1813 Py_CLEAR(line);
1814 if (s == NULL)
1815 goto error;
1816 line = s;
1817 }
1818 }
1819 }
1820 if (remaining != NULL) {
1821 if (chunks == NULL) {
1822 chunks = PyList_New(0);
1823 if (chunks == NULL)
1824 goto error;
1825 }
1826 if (PyList_Append(chunks, remaining) < 0)
1827 goto error;
1828 Py_CLEAR(remaining);
1829 }
1830 if (chunks != NULL) {
1831 if (line != NULL && PyList_Append(chunks, line) < 0)
1832 goto error;
1833 Py_CLEAR(line);
1834 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1835 if (line == NULL)
1836 goto error;
1837 Py_DECREF(chunks);
1838 }
1839 if (line == NULL)
1840 line = PyUnicode_FromStringAndSize(NULL, 0);
1841
1842 return line;
1843
1844 error:
1845 Py_XDECREF(chunks);
1846 Py_XDECREF(remaining);
1847 Py_XDECREF(line);
1848 return NULL;
1849}
1850
1851static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001852textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853{
1854 Py_ssize_t limit = -1;
1855
1856 CHECK_INITIALIZED(self);
1857 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1858 return NULL;
1859 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001860 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861}
1862
1863/* Seek and Tell */
1864
1865typedef struct {
1866 Py_off_t start_pos;
1867 int dec_flags;
1868 int bytes_to_feed;
1869 int chars_to_skip;
1870 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001871} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872
1873/*
1874 To speed up cookie packing/unpacking, we store the fields in a temporary
1875 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1876 The following macros define at which offsets in the intermediary byte
1877 string the various CookieStruct fields will be stored.
1878 */
1879
1880#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1881
1882#if defined(WORDS_BIGENDIAN)
1883
1884# define IS_LITTLE_ENDIAN 0
1885
1886/* We want the least significant byte of start_pos to also be the least
1887 significant byte of the cookie, which means that in big-endian mode we
1888 must copy the fields in reverse order. */
1889
1890# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1891# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1892# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1893# define OFF_CHARS_TO_SKIP (sizeof(char))
1894# define OFF_NEED_EOF 0
1895
1896#else
1897
1898# define IS_LITTLE_ENDIAN 1
1899
1900/* Little-endian mode: the least significant byte of start_pos will
1901 naturally end up the least significant byte of the cookie. */
1902
1903# define OFF_START_POS 0
1904# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1905# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1906# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1907# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1908
1909#endif
1910
1911static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001912textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001913{
1914 unsigned char buffer[COOKIE_BUF_LEN];
1915 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1916 if (cookieLong == NULL)
1917 return -1;
1918
1919 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1920 IS_LITTLE_ENDIAN, 0) < 0) {
1921 Py_DECREF(cookieLong);
1922 return -1;
1923 }
1924 Py_DECREF(cookieLong);
1925
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001926 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1927 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1928 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1929 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1930 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001931
1932 return 0;
1933}
1934
1935static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001936textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937{
1938 unsigned char buffer[COOKIE_BUF_LEN];
1939
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001940 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1941 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1942 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1943 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1944 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945
1946 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1947}
1948#undef IS_LITTLE_ENDIAN
1949
1950static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001951_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952{
1953 PyObject *res;
1954 /* When seeking to the start of the stream, we call decoder.reset()
1955 rather than decoder.getstate().
1956 This is for a few decoders such as utf-16 for which the state value
1957 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1958 utf-16, that we are expecting a BOM).
1959 */
1960 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1961 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1962 else
1963 res = PyObject_CallMethod(self->decoder, "setstate",
1964 "((yi))", "", cookie->dec_flags);
1965 if (res == NULL)
1966 return -1;
1967 Py_DECREF(res);
1968 return 0;
1969}
1970
Antoine Pitroue4501852009-05-14 18:55:55 +00001971static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001972_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001973{
1974 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001975 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001976 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1977 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1978 self->encoding_start_of_stream = 1;
1979 }
1980 else {
1981 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1982 _PyIO_zero, NULL);
1983 self->encoding_start_of_stream = 0;
1984 }
1985 if (res == NULL)
1986 return -1;
1987 Py_DECREF(res);
1988 return 0;
1989}
1990
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001991static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001992textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993{
1994 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001995 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997 PyObject *res;
1998 int cmp;
1999
2000 CHECK_INITIALIZED(self);
2001
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2003 return NULL;
2004 CHECK_CLOSED(self);
2005
2006 Py_INCREF(cookieObj);
2007
2008 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002009 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 goto fail;
2011 }
2012
2013 if (whence == 1) {
2014 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002015 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 if (cmp < 0)
2017 goto fail;
2018
2019 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002020 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 goto fail;
2022 }
2023
2024 /* Seeking to the current position should attempt to
2025 * sync the underlying buffer with the current position.
2026 */
2027 Py_DECREF(cookieObj);
2028 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2029 if (cookieObj == NULL)
2030 goto fail;
2031 }
2032 else if (whence == 2) {
2033 /* seek relative to end of file */
2034
Antoine Pitroue4501852009-05-14 18:55:55 +00002035 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 if (cmp < 0)
2037 goto fail;
2038
2039 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002040 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 goto fail;
2042 }
2043
2044 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2045 if (res == NULL)
2046 goto fail;
2047 Py_DECREF(res);
2048
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002049 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 Py_CLEAR(self->snapshot);
2051 if (self->decoder) {
2052 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2053 if (res == NULL)
2054 goto fail;
2055 Py_DECREF(res);
2056 }
2057
2058 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2059 Py_XDECREF(cookieObj);
2060 return res;
2061 }
2062 else if (whence != 0) {
2063 PyErr_Format(PyExc_ValueError,
2064 "invalid whence (%d, should be 0, 1 or 2)", whence);
2065 goto fail;
2066 }
2067
Antoine Pitroue4501852009-05-14 18:55:55 +00002068 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 if (cmp < 0)
2070 goto fail;
2071
2072 if (cmp == 1) {
2073 PyErr_Format(PyExc_ValueError,
2074 "negative seek position %R", cookieObj);
2075 goto fail;
2076 }
2077
2078 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2079 if (res == NULL)
2080 goto fail;
2081 Py_DECREF(res);
2082
2083 /* The strategy of seek() is to go back to the safe start point
2084 * and replay the effect of read(chars_to_skip) from there.
2085 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002086 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087 goto fail;
2088
2089 /* Seek back to the safe start point. */
2090 posobj = PyLong_FromOff_t(cookie.start_pos);
2091 if (posobj == NULL)
2092 goto fail;
2093 res = PyObject_CallMethodObjArgs(self->buffer,
2094 _PyIO_str_seek, posobj, NULL);
2095 Py_DECREF(posobj);
2096 if (res == NULL)
2097 goto fail;
2098 Py_DECREF(res);
2099
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002100 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 Py_CLEAR(self->snapshot);
2102
2103 /* Restore the decoder to its state from the safe start point. */
2104 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002105 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 goto fail;
2107 }
2108
2109 if (cookie.chars_to_skip) {
2110 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2111 PyObject *input_chunk = PyObject_CallMethod(
2112 self->buffer, "read", "i", cookie.bytes_to_feed);
2113 PyObject *decoded;
2114
2115 if (input_chunk == NULL)
2116 goto fail;
2117
2118 assert (PyBytes_Check(input_chunk));
2119
2120 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2121 if (self->snapshot == NULL) {
2122 Py_DECREF(input_chunk);
2123 goto fail;
2124 }
2125
2126 decoded = PyObject_CallMethod(self->decoder, "decode",
2127 "Oi", input_chunk, (int)cookie.need_eof);
2128
2129 if (decoded == NULL)
2130 goto fail;
2131
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002132 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002133
2134 /* Skip chars_to_skip of the decoded characters. */
2135 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2136 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2137 goto fail;
2138 }
2139 self->decoded_chars_used = cookie.chars_to_skip;
2140 }
2141 else {
2142 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2143 if (self->snapshot == NULL)
2144 goto fail;
2145 }
2146
Antoine Pitroue4501852009-05-14 18:55:55 +00002147 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2148 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002149 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002150 goto fail;
2151 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152 return cookieObj;
2153 fail:
2154 Py_XDECREF(cookieObj);
2155 return NULL;
2156
2157}
2158
2159static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002160textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161{
2162 PyObject *res;
2163 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002164 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002165 PyObject *next_input;
2166 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002167 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168 PyObject *saved_state = NULL;
2169 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002170 char *dec_buffer;
2171 Py_ssize_t dec_buffer_len;
2172 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173
2174 CHECK_INITIALIZED(self);
2175 CHECK_CLOSED(self);
2176
2177 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002178 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002179 goto fail;
2180 }
2181 if (!self->telling) {
2182 PyErr_SetString(PyExc_IOError,
2183 "telling position disabled by next() call");
2184 goto fail;
2185 }
2186
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002187 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188 return NULL;
2189 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2190 if (res == NULL)
2191 goto fail;
2192 Py_DECREF(res);
2193
2194 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2195 if (posobj == NULL)
2196 goto fail;
2197
2198 if (self->decoder == NULL || self->snapshot == NULL) {
2199 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2200 return posobj;
2201 }
2202
2203#if defined(HAVE_LARGEFILE_SUPPORT)
2204 cookie.start_pos = PyLong_AsLongLong(posobj);
2205#else
2206 cookie.start_pos = PyLong_AsLong(posobj);
2207#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002208 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 if (PyErr_Occurred())
2210 goto fail;
2211
2212 /* Skip backward to the snapshot point (see _read_chunk). */
2213 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2214 goto fail;
2215
2216 assert (PyBytes_Check(next_input));
2217
2218 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2219
2220 /* How many decoded characters have been used up since the snapshot? */
2221 if (self->decoded_chars_used == 0) {
2222 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002223 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224 }
2225
2226 chars_to_skip = self->decoded_chars_used;
2227
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002228 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2230 _PyIO_str_getstate, NULL);
2231 if (saved_state == NULL)
2232 goto fail;
2233
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002234#define DECODER_GETSTATE() do { \
2235 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2236 _PyIO_str_getstate, NULL); \
2237 if (_state == NULL) \
2238 goto fail; \
2239 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2240 Py_DECREF(_state); \
2241 goto fail; \
2242 } \
2243 Py_DECREF(_state); \
2244 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002246 /* TODO: replace assert with exception */
2247#define DECODER_DECODE(start, len, res) do { \
2248 PyObject *_decoded = PyObject_CallMethod( \
2249 self->decoder, "decode", "y#", start, len); \
2250 if (_decoded == NULL) \
2251 goto fail; \
2252 assert (PyUnicode_Check(_decoded)); \
2253 res = PyUnicode_GET_SIZE(_decoded); \
2254 Py_DECREF(_decoded); \
2255 } while (0)
2256
2257 /* Fast search for an acceptable start point, close to our
2258 current pos */
2259 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2260 skip_back = 1;
2261 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2262 input = PyBytes_AS_STRING(next_input);
2263 while (skip_bytes > 0) {
2264 /* Decode up to temptative start point */
2265 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2266 goto fail;
2267 DECODER_DECODE(input, skip_bytes, chars_decoded);
2268 if (chars_decoded <= chars_to_skip) {
2269 DECODER_GETSTATE();
2270 if (dec_buffer_len == 0) {
2271 /* Before pos and no bytes buffered in decoder => OK */
2272 cookie.dec_flags = dec_flags;
2273 chars_to_skip -= chars_decoded;
2274 break;
2275 }
2276 /* Skip back by buffered amount and reset heuristic */
2277 skip_bytes -= dec_buffer_len;
2278 skip_back = 1;
2279 }
2280 else {
2281 /* We're too far ahead, skip back a bit */
2282 skip_bytes -= skip_back;
2283 skip_back *= 2;
2284 }
2285 }
2286 if (skip_bytes <= 0) {
2287 skip_bytes = 0;
2288 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2289 goto fail;
2290 }
2291
2292 /* Note our initial start point. */
2293 cookie.start_pos += skip_bytes;
2294 cookie.chars_to_skip = chars_to_skip;
2295 if (chars_to_skip == 0)
2296 goto finally;
2297
2298 /* We should be close to the desired position. Now feed the decoder one
2299 * byte at a time until we reach the `chars_to_skip` target.
2300 * As we go, note the nearest "safe start point" before the current
2301 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302 * can safely start from there and advance to this location).
2303 */
2304 chars_decoded = 0;
2305 input = PyBytes_AS_STRING(next_input);
2306 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002307 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002309 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002310
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002311 DECODER_DECODE(input, 1, n);
2312 /* We got n chars for 1 byte */
2313 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002315 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316
2317 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2318 /* Decoder buffer is empty, so this is a safe start point. */
2319 cookie.start_pos += cookie.bytes_to_feed;
2320 chars_to_skip -= chars_decoded;
2321 cookie.dec_flags = dec_flags;
2322 cookie.bytes_to_feed = 0;
2323 chars_decoded = 0;
2324 }
2325 if (chars_decoded >= chars_to_skip)
2326 break;
2327 input++;
2328 }
2329 if (input == input_end) {
2330 /* We didn't get enough decoded data; signal EOF to get more. */
2331 PyObject *decoded = PyObject_CallMethod(
2332 self->decoder, "decode", "yi", "", /* final = */ 1);
2333 if (decoded == NULL)
2334 goto fail;
2335 assert (PyUnicode_Check(decoded));
2336 chars_decoded += PyUnicode_GET_SIZE(decoded);
2337 Py_DECREF(decoded);
2338 cookie.need_eof = 1;
2339
2340 if (chars_decoded < chars_to_skip) {
2341 PyErr_SetString(PyExc_IOError,
2342 "can't reconstruct logical file position");
2343 goto fail;
2344 }
2345 }
2346
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002347finally:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2349 Py_DECREF(saved_state);
2350 if (res == NULL)
2351 return NULL;
2352 Py_DECREF(res);
2353
2354 /* The returned cookie corresponds to the last safe start point. */
2355 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002356 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002358fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002359 if (saved_state) {
2360 PyObject *type, *value, *traceback;
2361 PyErr_Fetch(&type, &value, &traceback);
2362
2363 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2364 Py_DECREF(saved_state);
2365 if (res == NULL)
2366 return NULL;
2367 Py_DECREF(res);
2368
2369 PyErr_Restore(type, value, traceback);
2370 }
2371 return NULL;
2372}
2373
2374static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002375textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376{
2377 PyObject *pos = Py_None;
2378 PyObject *res;
2379
2380 CHECK_INITIALIZED(self)
2381 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2382 return NULL;
2383 }
2384
2385 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2386 if (res == NULL)
2387 return NULL;
2388 Py_DECREF(res);
2389
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002390 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391}
2392
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002393static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002394textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002395{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002396 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002397
2398 CHECK_INITIALIZED(self);
2399
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002400 res = PyUnicode_FromString("<_io.TextIOWrapper");
2401 if (res == NULL)
2402 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002403 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2404 if (nameobj == NULL) {
2405 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2406 PyErr_Clear();
2407 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002408 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002409 }
2410 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002411 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002412 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002413 if (s == NULL)
2414 goto error;
2415 PyUnicode_AppendAndDel(&res, s);
2416 if (res == NULL)
2417 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002418 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002419 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2420 if (modeobj == NULL) {
2421 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2422 PyErr_Clear();
2423 else
2424 goto error;
2425 }
2426 else {
2427 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2428 Py_DECREF(modeobj);
2429 if (s == NULL)
2430 goto error;
2431 PyUnicode_AppendAndDel(&res, s);
2432 if (res == NULL)
2433 return NULL;
2434 }
2435 s = PyUnicode_FromFormat("%U encoding=%R>",
2436 res, self->encoding);
2437 Py_DECREF(res);
2438 return s;
2439error:
2440 Py_XDECREF(res);
2441 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002442}
2443
2444
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445/* Inquiries */
2446
2447static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002448textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002449{
2450 CHECK_INITIALIZED(self);
2451 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2452}
2453
2454static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002455textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456{
2457 CHECK_INITIALIZED(self);
2458 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2459}
2460
2461static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002462textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463{
2464 CHECK_INITIALIZED(self);
2465 return PyObject_CallMethod(self->buffer, "readable", NULL);
2466}
2467
2468static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002469textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470{
2471 CHECK_INITIALIZED(self);
2472 return PyObject_CallMethod(self->buffer, "writable", NULL);
2473}
2474
2475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002476textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477{
2478 CHECK_INITIALIZED(self);
2479 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2480}
2481
2482static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002483textiowrapper_getstate(textio *self, PyObject *args)
2484{
2485 PyErr_Format(PyExc_TypeError,
2486 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2487 return NULL;
2488}
2489
2490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002491textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492{
2493 CHECK_INITIALIZED(self);
2494 CHECK_CLOSED(self);
2495 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497 return NULL;
2498 return PyObject_CallMethod(self->buffer, "flush", NULL);
2499}
2500
2501static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002502textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503{
2504 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002505 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507
Antoine Pitrou6be88762010-05-03 16:48:20 +00002508 res = textiowrapper_closed_get(self, NULL);
2509 if (res == NULL)
2510 return NULL;
2511 r = PyObject_IsTrue(res);
2512 Py_DECREF(res);
2513 if (r < 0)
2514 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002515
Antoine Pitrou6be88762010-05-03 16:48:20 +00002516 if (r > 0) {
2517 Py_RETURN_NONE; /* stream already closed */
2518 }
2519 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002520 if (self->deallocating) {
2521 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2522 if (res)
2523 Py_DECREF(res);
2524 else
2525 PyErr_Clear();
2526 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002527 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2528 if (res == NULL) {
2529 return NULL;
2530 }
2531 else
2532 Py_DECREF(res);
2533
2534 return PyObject_CallMethod(self->buffer, "close", NULL);
2535 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536}
2537
2538static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002539textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540{
2541 PyObject *line;
2542
2543 CHECK_INITIALIZED(self);
2544
2545 self->telling = 0;
2546 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2547 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002548 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549 }
2550 else {
2551 line = PyObject_CallMethodObjArgs((PyObject *)self,
2552 _PyIO_str_readline, NULL);
2553 if (line && !PyUnicode_Check(line)) {
2554 PyErr_Format(PyExc_IOError,
2555 "readline() should have returned an str object, "
2556 "not '%.200s'", Py_TYPE(line)->tp_name);
2557 Py_DECREF(line);
2558 return NULL;
2559 }
2560 }
2561
2562 if (line == NULL)
2563 return NULL;
2564
2565 if (PyUnicode_GET_SIZE(line) == 0) {
2566 /* Reached EOF or would have blocked */
2567 Py_DECREF(line);
2568 Py_CLEAR(self->snapshot);
2569 self->telling = self->seekable;
2570 return NULL;
2571 }
2572
2573 return line;
2574}
2575
2576static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002577textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578{
2579 CHECK_INITIALIZED(self);
2580 return PyObject_GetAttrString(self->buffer, "name");
2581}
2582
2583static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002584textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585{
2586 CHECK_INITIALIZED(self);
2587 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2588}
2589
2590static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002591textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592{
2593 PyObject *res;
2594 CHECK_INITIALIZED(self);
2595 if (self->decoder == NULL)
2596 Py_RETURN_NONE;
2597 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2598 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002599 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2600 PyErr_Clear();
2601 Py_RETURN_NONE;
2602 }
2603 else {
2604 return NULL;
2605 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606 }
2607 return res;
2608}
2609
2610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002611textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002612{
2613 CHECK_INITIALIZED(self);
2614 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2615}
2616
2617static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002618textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619{
2620 CHECK_INITIALIZED(self);
2621 return PyLong_FromSsize_t(self->chunk_size);
2622}
2623
2624static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002625textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626{
2627 Py_ssize_t n;
2628 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002629 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002630 if (n == -1 && PyErr_Occurred())
2631 return -1;
2632 if (n <= 0) {
2633 PyErr_SetString(PyExc_ValueError,
2634 "a strictly positive integer is required");
2635 return -1;
2636 }
2637 self->chunk_size = n;
2638 return 0;
2639}
2640
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002641static PyMethodDef textiowrapper_methods[] = {
2642 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2643 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2644 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2645 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2646 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2647 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002649 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2650 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2651 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2652 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2653 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002654 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002655
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002656 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2657 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2658 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002659 {NULL, NULL}
2660};
2661
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662static PyMemberDef textiowrapper_members[] = {
2663 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2664 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2665 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002666 {NULL}
2667};
2668
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002669static PyGetSetDef textiowrapper_getset[] = {
2670 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2671 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002672/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2673*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2675 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2676 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2677 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002678 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679};
2680
2681PyTypeObject PyTextIOWrapper_Type = {
2682 PyVarObject_HEAD_INIT(NULL, 0)
2683 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002684 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002686 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687 0, /*tp_print*/
2688 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002689 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002691 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002692 0, /*tp_as_number*/
2693 0, /*tp_as_sequence*/
2694 0, /*tp_as_mapping*/
2695 0, /*tp_hash */
2696 0, /*tp_call*/
2697 0, /*tp_str*/
2698 0, /*tp_getattro*/
2699 0, /*tp_setattro*/
2700 0, /*tp_as_buffer*/
2701 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2702 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002703 textiowrapper_doc, /* tp_doc */
2704 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2705 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002707 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002708 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002709 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2710 textiowrapper_methods, /* tp_methods */
2711 textiowrapper_members, /* tp_members */
2712 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002713 0, /* tp_base */
2714 0, /* tp_dict */
2715 0, /* tp_descr_get */
2716 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002717 offsetof(textio, dict), /*tp_dictoffset*/
2718 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 0, /* tp_alloc */
2720 PyType_GenericNew, /* tp_new */
2721};