blob: abdbeb765d930196f7b1b7090425f6e0a5c90123 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000661 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 /* Specialized encoding func (see below) */
663 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000664 /* Whether or not it's the start of the stream */
665 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666
667 /* Reads and writes are internally buffered in order to speed things up.
668 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000669
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670 Please also note that text to be written is first encoded before being
671 buffered. This is necessary so that encoding errors are immediately
672 reported to the caller, but it unfortunately means that the
673 IncrementalEncoder (whose encode() method is always written in Python)
674 becomes a bottleneck for small writes.
675 */
676 PyObject *decoded_chars; /* buffer for text returned from decoder */
677 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
678 PyObject *pending_bytes; /* list of bytes objects waiting to be
679 written, or NULL */
680 Py_ssize_t pending_bytes_count;
681 PyObject *snapshot;
682 /* snapshot is either None, or a tuple (dec_flags, next_input) where
683 * dec_flags is the second (integer) item of the decoder state and
684 * next_input is the chunk of input bytes that comes next after the
685 * snapshot point. We use this to reconstruct decoder states in tell().
686 */
687
688 /* Cache raw object if it's a FileIO object */
689 PyObject *raw;
690
691 PyObject *weakreflist;
692 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000693} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694
695
696/* A couple of specialized cases in order to bypass the slow incremental
697 encoding methods for the most popular encodings. */
698
699static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000700ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000701{
702 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
703 PyUnicode_GET_SIZE(text),
704 PyBytes_AS_STRING(self->errors));
705}
706
707static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000708utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709{
710 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
711 PyUnicode_GET_SIZE(text),
712 PyBytes_AS_STRING(self->errors), 1);
713}
714
715static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000716utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717{
718 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
719 PyUnicode_GET_SIZE(text),
720 PyBytes_AS_STRING(self->errors), -1);
721}
722
723static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000724utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000725{
Antoine Pitroue4501852009-05-14 18:55:55 +0000726 if (!self->encoding_start_of_stream) {
727 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000728#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000729 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000731 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000732#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000733 }
734 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
735 PyUnicode_GET_SIZE(text),
736 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
Antoine Pitroue4501852009-05-14 18:55:55 +0000739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000741{
742 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
743 PyUnicode_GET_SIZE(text),
744 PyBytes_AS_STRING(self->errors), 1);
745}
746
747static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000748utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000749{
750 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
751 PyUnicode_GET_SIZE(text),
752 PyBytes_AS_STRING(self->errors), -1);
753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000757{
758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
760#if defined(WORDS_BIGENDIAN)
761 return utf32be_encode(self, text);
762#else
763 return utf32le_encode(self, text);
764#endif
765 }
766 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
767 PyUnicode_GET_SIZE(text),
768 PyBytes_AS_STRING(self->errors), 0);
769}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000770
771static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000772utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000773{
774 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
775 PyUnicode_GET_SIZE(text),
776 PyBytes_AS_STRING(self->errors));
777}
778
779static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000780latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781{
782 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
783 PyUnicode_GET_SIZE(text),
784 PyBytes_AS_STRING(self->errors));
785}
786
787/* Map normalized encoding names onto the specialized encoding funcs */
788
789typedef struct {
790 const char *name;
791 encodefunc_t encodefunc;
792} encodefuncentry;
793
Antoine Pitrou24f36292009-03-28 22:16:42 +0000794static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795 {"ascii", (encodefunc_t) ascii_encode},
796 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000797 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798 {"utf-16-be", (encodefunc_t) utf16be_encode},
799 {"utf-16-le", (encodefunc_t) utf16le_encode},
800 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000801 {"utf-32-be", (encodefunc_t) utf32be_encode},
802 {"utf-32-le", (encodefunc_t) utf32le_encode},
803 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 {NULL, NULL}
805};
806
807
808static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000809textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810{
811 char *kwlist[] = {"buffer", "encoding", "errors",
812 "newline", "line_buffering",
813 NULL};
814 PyObject *buffer, *raw;
815 char *encoding = NULL;
816 char *errors = NULL;
817 char *newline = NULL;
818 int line_buffering = 0;
819 _PyIO_State *state = IO_STATE;
820
821 PyObject *res;
822 int r;
823
824 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000825 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
827 kwlist, &buffer, &encoding, &errors,
828 &newline, &line_buffering))
829 return -1;
830
831 if (newline && newline[0] != '\0'
832 && !(newline[0] == '\n' && newline[1] == '\0')
833 && !(newline[0] == '\r' && newline[1] == '\0')
834 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
835 PyErr_Format(PyExc_ValueError,
836 "illegal newline value: %s", newline);
837 return -1;
838 }
839
840 Py_CLEAR(self->buffer);
841 Py_CLEAR(self->encoding);
842 Py_CLEAR(self->encoder);
843 Py_CLEAR(self->decoder);
844 Py_CLEAR(self->readnl);
845 Py_CLEAR(self->decoded_chars);
846 Py_CLEAR(self->pending_bytes);
847 Py_CLEAR(self->snapshot);
848 Py_CLEAR(self->errors);
849 Py_CLEAR(self->raw);
850 self->decoded_chars_used = 0;
851 self->pending_bytes_count = 0;
852 self->encodefunc = NULL;
853
854 if (encoding == NULL) {
855 /* Try os.device_encoding(fileno) */
856 PyObject *fileno;
857 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
858 /* Ignore only AttributeError and UnsupportedOperation */
859 if (fileno == NULL) {
860 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
861 PyErr_ExceptionMatches(state->unsupported_operation)) {
862 PyErr_Clear();
863 }
864 else {
865 goto error;
866 }
867 }
868 else {
869 self->encoding = PyObject_CallMethod(state->os_module,
870 "device_encoding",
871 "N", fileno);
872 if (self->encoding == NULL)
873 goto error;
874 else if (!PyUnicode_Check(self->encoding))
875 Py_CLEAR(self->encoding);
876 }
877 }
878 if (encoding == NULL && self->encoding == NULL) {
879 if (state->locale_module == NULL) {
880 state->locale_module = PyImport_ImportModule("locale");
881 if (state->locale_module == NULL)
882 goto catch_ImportError;
883 else
884 goto use_locale;
885 }
886 else {
887 use_locale:
888 self->encoding = PyObject_CallMethod(
889 state->locale_module, "getpreferredencoding", NULL);
890 if (self->encoding == NULL) {
891 catch_ImportError:
892 /*
893 Importing locale can raise a ImportError because of
894 _functools, and locale.getpreferredencoding can raise a
895 ImportError if _locale is not available. These will happen
896 during module building.
897 */
898 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
899 PyErr_Clear();
900 self->encoding = PyUnicode_FromString("ascii");
901 }
902 else
903 goto error;
904 }
905 else if (!PyUnicode_Check(self->encoding))
906 Py_CLEAR(self->encoding);
907 }
908 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000909 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000910 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000911 if (encoding == NULL)
912 goto error;
913 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000914 else if (encoding != NULL) {
915 self->encoding = PyUnicode_FromString(encoding);
916 if (self->encoding == NULL)
917 goto error;
918 }
919 else {
920 PyErr_SetString(PyExc_IOError,
921 "could not determine default encoding");
922 }
923
924 if (errors == NULL)
925 errors = "strict";
926 self->errors = PyBytes_FromString(errors);
927 if (self->errors == NULL)
928 goto error;
929
930 self->chunk_size = 8192;
931 self->readuniversal = (newline == NULL || newline[0] == '\0');
932 self->line_buffering = line_buffering;
933 self->readtranslate = (newline == NULL);
934 if (newline) {
935 self->readnl = PyUnicode_FromString(newline);
936 if (self->readnl == NULL)
937 return -1;
938 }
939 self->writetranslate = (newline == NULL || newline[0] != '\0');
940 if (!self->readuniversal && self->readnl) {
941 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000942 if (self->writenl == NULL)
943 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 if (!strcmp(self->writenl, "\n"))
945 self->writenl = NULL;
946 }
947#ifdef MS_WINDOWS
948 else
949 self->writenl = "\r\n";
950#endif
951
952 /* Build the decoder object */
953 res = PyObject_CallMethod(buffer, "readable", NULL);
954 if (res == NULL)
955 goto error;
956 r = PyObject_IsTrue(res);
957 Py_DECREF(res);
958 if (r == -1)
959 goto error;
960 if (r == 1) {
961 self->decoder = PyCodec_IncrementalDecoder(
962 encoding, errors);
963 if (self->decoder == NULL)
964 goto error;
965
966 if (self->readuniversal) {
967 PyObject *incrementalDecoder = PyObject_CallFunction(
968 (PyObject *)&PyIncrementalNewlineDecoder_Type,
969 "Oi", self->decoder, (int)self->readtranslate);
970 if (incrementalDecoder == NULL)
971 goto error;
972 Py_CLEAR(self->decoder);
973 self->decoder = incrementalDecoder;
974 }
975 }
976
977 /* Build the encoder object */
978 res = PyObject_CallMethod(buffer, "writable", NULL);
979 if (res == NULL)
980 goto error;
981 r = PyObject_IsTrue(res);
982 Py_DECREF(res);
983 if (r == -1)
984 goto error;
985 if (r == 1) {
986 PyObject *ci;
987 self->encoder = PyCodec_IncrementalEncoder(
988 encoding, errors);
989 if (self->encoder == NULL)
990 goto error;
991 /* Get the normalized named of the codec */
992 ci = _PyCodec_Lookup(encoding);
993 if (ci == NULL)
994 goto error;
995 res = PyObject_GetAttrString(ci, "name");
996 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000997 if (res == NULL) {
998 if (PyErr_ExceptionMatches(PyExc_AttributeError))
999 PyErr_Clear();
1000 else
1001 goto error;
1002 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001003 else if (PyUnicode_Check(res)) {
1004 encodefuncentry *e = encodefuncs;
1005 while (e->name != NULL) {
1006 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1007 self->encodefunc = e->encodefunc;
1008 break;
1009 }
1010 e++;
1011 }
1012 }
1013 Py_XDECREF(res);
1014 }
1015
1016 self->buffer = buffer;
1017 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001018
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001019 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1020 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1021 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1022 raw = PyObject_GetAttrString(buffer, "raw");
1023 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001024 if (raw == NULL) {
1025 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1026 PyErr_Clear();
1027 else
1028 goto error;
1029 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 else if (Py_TYPE(raw) == &PyFileIO_Type)
1031 self->raw = raw;
1032 else
1033 Py_DECREF(raw);
1034 }
1035
1036 res = PyObject_CallMethod(buffer, "seekable", NULL);
1037 if (res == NULL)
1038 goto error;
1039 self->seekable = self->telling = PyObject_IsTrue(res);
1040 Py_DECREF(res);
1041
Antoine Pitroue4501852009-05-14 18:55:55 +00001042 self->encoding_start_of_stream = 0;
1043 if (self->seekable && self->encoder) {
1044 PyObject *cookieObj;
1045 int cmp;
1046
1047 self->encoding_start_of_stream = 1;
1048
1049 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1050 if (cookieObj == NULL)
1051 goto error;
1052
1053 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1054 Py_DECREF(cookieObj);
1055 if (cmp < 0) {
1056 goto error;
1057 }
1058
1059 if (cmp == 0) {
1060 self->encoding_start_of_stream = 0;
1061 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1062 _PyIO_zero, NULL);
1063 if (res == NULL)
1064 goto error;
1065 Py_DECREF(res);
1066 }
1067 }
1068
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001069 self->ok = 1;
1070 return 0;
1071
1072 error:
1073 return -1;
1074}
1075
1076static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001077_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078{
1079 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1080 return -1;
1081 self->ok = 0;
1082 Py_CLEAR(self->buffer);
1083 Py_CLEAR(self->encoding);
1084 Py_CLEAR(self->encoder);
1085 Py_CLEAR(self->decoder);
1086 Py_CLEAR(self->readnl);
1087 Py_CLEAR(self->decoded_chars);
1088 Py_CLEAR(self->pending_bytes);
1089 Py_CLEAR(self->snapshot);
1090 Py_CLEAR(self->errors);
1091 Py_CLEAR(self->raw);
1092 return 0;
1093}
1094
1095static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001096textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001097{
Antoine Pitroue033e062010-10-29 10:38:18 +00001098 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001099 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001100 return;
1101 _PyObject_GC_UNTRACK(self);
1102 if (self->weakreflist != NULL)
1103 PyObject_ClearWeakRefs((PyObject *)self);
1104 Py_CLEAR(self->dict);
1105 Py_TYPE(self)->tp_free((PyObject *)self);
1106}
1107
1108static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001109textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001110{
1111 Py_VISIT(self->buffer);
1112 Py_VISIT(self->encoding);
1113 Py_VISIT(self->encoder);
1114 Py_VISIT(self->decoder);
1115 Py_VISIT(self->readnl);
1116 Py_VISIT(self->decoded_chars);
1117 Py_VISIT(self->pending_bytes);
1118 Py_VISIT(self->snapshot);
1119 Py_VISIT(self->errors);
1120 Py_VISIT(self->raw);
1121
1122 Py_VISIT(self->dict);
1123 return 0;
1124}
1125
1126static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001127textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001129 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130 return -1;
1131 Py_CLEAR(self->dict);
1132 return 0;
1133}
1134
1135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001136textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001137
1138/* This macro takes some shortcuts to make the common case faster. */
1139#define CHECK_CLOSED(self) \
1140 do { \
1141 int r; \
1142 PyObject *_res; \
1143 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1144 if (self->raw != NULL) \
1145 r = _PyFileIO_closed(self->raw); \
1146 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001147 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 if (_res == NULL) \
1149 return NULL; \
1150 r = PyObject_IsTrue(_res); \
1151 Py_DECREF(_res); \
1152 if (r < 0) \
1153 return NULL; \
1154 } \
1155 if (r > 0) { \
1156 PyErr_SetString(PyExc_ValueError, \
1157 "I/O operation on closed file."); \
1158 return NULL; \
1159 } \
1160 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001161 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 return NULL; \
1163 } while (0)
1164
1165#define CHECK_INITIALIZED(self) \
1166 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001167 if (self->detached) { \
1168 PyErr_SetString(PyExc_ValueError, \
1169 "underlying buffer has been detached"); \
1170 } else { \
1171 PyErr_SetString(PyExc_ValueError, \
1172 "I/O operation on uninitialized object"); \
1173 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 return NULL; \
1175 }
1176
1177#define CHECK_INITIALIZED_INT(self) \
1178 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001179 if (self->detached) { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "underlying buffer has been detached"); \
1182 } else { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "I/O operation on uninitialized object"); \
1185 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 return -1; \
1187 }
1188
1189
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001191textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001192{
1193 PyObject *buffer, *res;
1194 CHECK_INITIALIZED(self);
1195 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1196 if (res == NULL)
1197 return NULL;
1198 Py_DECREF(res);
1199 buffer = self->buffer;
1200 self->buffer = NULL;
1201 self->detached = 1;
1202 self->ok = 0;
1203 return buffer;
1204}
1205
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001206Py_LOCAL_INLINE(const Py_UNICODE *)
1207findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1208{
1209 /* like wcschr, but doesn't stop at NULL characters */
1210 while (size-- > 0) {
1211 if (*s == ch)
1212 return s;
1213 s++;
1214 }
1215 return NULL;
1216}
1217
Antoine Pitrou24f36292009-03-28 22:16:42 +00001218/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 underlying buffered object, though. */
1220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001221_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001223 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224
1225 if (self->pending_bytes == NULL)
1226 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001227
1228 pending = self->pending_bytes;
1229 Py_INCREF(pending);
1230 self->pending_bytes_count = 0;
1231 Py_CLEAR(self->pending_bytes);
1232
1233 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1234 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 if (b == NULL)
1236 return -1;
1237 ret = PyObject_CallMethodObjArgs(self->buffer,
1238 _PyIO_str_write, b, NULL);
1239 Py_DECREF(b);
1240 if (ret == NULL)
1241 return -1;
1242 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return 0;
1244}
1245
1246static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001247textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248{
1249 PyObject *ret;
1250 PyObject *text; /* owned reference */
1251 PyObject *b;
1252 Py_ssize_t textlen;
1253 int haslf = 0;
1254 int needflush = 0;
1255
1256 CHECK_INITIALIZED(self);
1257
1258 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1259 return NULL;
1260 }
1261
1262 CHECK_CLOSED(self);
1263
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001264 if (self->encoder == NULL)
1265 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 Py_INCREF(text);
1268
1269 textlen = PyUnicode_GetSize(text);
1270
1271 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1272 if (findchar(PyUnicode_AS_UNICODE(text),
1273 PyUnicode_GET_SIZE(text), '\n'))
1274 haslf = 1;
1275
1276 if (haslf && self->writetranslate && self->writenl != NULL) {
1277 PyObject *newtext = PyObject_CallMethod(
1278 text, "replace", "ss", "\n", self->writenl);
1279 Py_DECREF(text);
1280 if (newtext == NULL)
1281 return NULL;
1282 text = newtext;
1283 }
1284
1285 if (self->line_buffering &&
1286 (haslf ||
1287 findchar(PyUnicode_AS_UNICODE(text),
1288 PyUnicode_GET_SIZE(text), '\r')))
1289 needflush = 1;
1290
1291 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001292 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001294 self->encoding_start_of_stream = 0;
1295 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 else
1297 b = PyObject_CallMethodObjArgs(self->encoder,
1298 _PyIO_str_encode, text, NULL);
1299 Py_DECREF(text);
1300 if (b == NULL)
1301 return NULL;
1302
1303 if (self->pending_bytes == NULL) {
1304 self->pending_bytes = PyList_New(0);
1305 if (self->pending_bytes == NULL) {
1306 Py_DECREF(b);
1307 return NULL;
1308 }
1309 self->pending_bytes_count = 0;
1310 }
1311 if (PyList_Append(self->pending_bytes, b) < 0) {
1312 Py_DECREF(b);
1313 return NULL;
1314 }
1315 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1316 Py_DECREF(b);
1317 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001318 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 return NULL;
1320 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001321
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 if (needflush) {
1323 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1324 if (ret == NULL)
1325 return NULL;
1326 Py_DECREF(ret);
1327 }
1328
1329 Py_CLEAR(self->snapshot);
1330
1331 if (self->decoder) {
1332 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1333 if (ret == NULL)
1334 return NULL;
1335 Py_DECREF(ret);
1336 }
1337
1338 return PyLong_FromSsize_t(textlen);
1339}
1340
1341/* Steal a reference to chars and store it in the decoded_char buffer;
1342 */
1343static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001344textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345{
1346 Py_CLEAR(self->decoded_chars);
1347 self->decoded_chars = chars;
1348 self->decoded_chars_used = 0;
1349}
1350
1351static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001352textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353{
1354 PyObject *chars;
1355 Py_ssize_t avail;
1356
1357 if (self->decoded_chars == NULL)
1358 return PyUnicode_FromStringAndSize(NULL, 0);
1359
1360 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1361 - self->decoded_chars_used);
1362
1363 assert(avail >= 0);
1364
1365 if (n < 0 || n > avail)
1366 n = avail;
1367
1368 if (self->decoded_chars_used > 0 || n < avail) {
1369 chars = PyUnicode_FromUnicode(
1370 PyUnicode_AS_UNICODE(self->decoded_chars)
1371 + self->decoded_chars_used, n);
1372 if (chars == NULL)
1373 return NULL;
1374 }
1375 else {
1376 chars = self->decoded_chars;
1377 Py_INCREF(chars);
1378 }
1379
1380 self->decoded_chars_used += n;
1381 return chars;
1382}
1383
1384/* Read and decode the next chunk of data from the BufferedReader.
1385 */
1386static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001387textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001388{
1389 PyObject *dec_buffer = NULL;
1390 PyObject *dec_flags = NULL;
1391 PyObject *input_chunk = NULL;
1392 PyObject *decoded_chars, *chunk_size;
1393 int eof;
1394
1395 /* The return value is True unless EOF was reached. The decoded string is
1396 * placed in self._decoded_chars (replacing its previous value). The
1397 * entire input chunk is sent to the decoder, though some of it may remain
1398 * buffered in the decoder, yet to be converted.
1399 */
1400
1401 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001402 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 return -1;
1404 }
1405
1406 if (self->telling) {
1407 /* To prepare for tell(), we need to snapshot a point in the file
1408 * where the decoder's input buffer is empty.
1409 */
1410
1411 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1412 _PyIO_str_getstate, NULL);
1413 if (state == NULL)
1414 return -1;
1415 /* Given this, we know there was a valid snapshot point
1416 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1417 */
1418 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1419 Py_DECREF(state);
1420 return -1;
1421 }
1422 Py_INCREF(dec_buffer);
1423 Py_INCREF(dec_flags);
1424 Py_DECREF(state);
1425 }
1426
1427 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1428 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1429 if (chunk_size == NULL)
1430 goto fail;
1431 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1432 _PyIO_str_read1, chunk_size, NULL);
1433 Py_DECREF(chunk_size);
1434 if (input_chunk == NULL)
1435 goto fail;
1436 assert(PyBytes_Check(input_chunk));
1437
1438 eof = (PyBytes_Size(input_chunk) == 0);
1439
1440 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1441 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1442 self->decoder, input_chunk, eof);
1443 }
1444 else {
1445 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1446 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1447 }
1448
1449 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1450 if (decoded_chars == NULL)
1451 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001452 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1454 eof = 0;
1455
1456 if (self->telling) {
1457 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1458 * next input to be decoded is dec_buffer + input_chunk.
1459 */
1460 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1461 if (next_input == NULL)
1462 goto fail;
1463 assert (PyBytes_Check(next_input));
1464 Py_DECREF(dec_buffer);
1465 Py_CLEAR(self->snapshot);
1466 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1467 }
1468 Py_DECREF(input_chunk);
1469
1470 return (eof == 0);
1471
1472 fail:
1473 Py_XDECREF(dec_buffer);
1474 Py_XDECREF(dec_flags);
1475 Py_XDECREF(input_chunk);
1476 return -1;
1477}
1478
1479static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001480textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481{
1482 Py_ssize_t n = -1;
1483 PyObject *result = NULL, *chunks = NULL;
1484
1485 CHECK_INITIALIZED(self);
1486
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001487 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 return NULL;
1489
1490 CHECK_CLOSED(self);
1491
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001492 if (self->decoder == NULL)
1493 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001494
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001495 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496 return NULL;
1497
1498 if (n < 0) {
1499 /* Read everything */
1500 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1501 PyObject *decoded;
1502 if (bytes == NULL)
1503 goto fail;
1504 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1505 bytes, Py_True, NULL);
1506 Py_DECREF(bytes);
1507 if (decoded == NULL)
1508 goto fail;
1509
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511
1512 if (result == NULL) {
1513 Py_DECREF(decoded);
1514 return NULL;
1515 }
1516
1517 PyUnicode_AppendAndDel(&result, decoded);
1518 if (result == NULL)
1519 goto fail;
1520
1521 Py_CLEAR(self->snapshot);
1522 return result;
1523 }
1524 else {
1525 int res = 1;
1526 Py_ssize_t remaining = n;
1527
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001528 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 if (result == NULL)
1530 goto fail;
1531 remaining -= PyUnicode_GET_SIZE(result);
1532
1533 /* Keep reading chunks until we have n characters to return */
1534 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536 if (res < 0)
1537 goto fail;
1538 if (res == 0) /* EOF */
1539 break;
1540 if (chunks == NULL) {
1541 chunks = PyList_New(0);
1542 if (chunks == NULL)
1543 goto fail;
1544 }
1545 if (PyList_Append(chunks, result) < 0)
1546 goto fail;
1547 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 if (result == NULL)
1550 goto fail;
1551 remaining -= PyUnicode_GET_SIZE(result);
1552 }
1553 if (chunks != NULL) {
1554 if (result != NULL && PyList_Append(chunks, result) < 0)
1555 goto fail;
1556 Py_CLEAR(result);
1557 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1558 if (result == NULL)
1559 goto fail;
1560 Py_CLEAR(chunks);
1561 }
1562 return result;
1563 }
1564 fail:
1565 Py_XDECREF(result);
1566 Py_XDECREF(chunks);
1567 return NULL;
1568}
1569
1570
1571/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1572 that is to the NUL character. Otherwise the function will produce
1573 incorrect results. */
1574static Py_UNICODE *
1575find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1576{
1577 Py_UNICODE *s = start;
1578 for (;;) {
1579 while (*s > ch)
1580 s++;
1581 if (*s == ch)
1582 return s;
1583 if (s == end)
1584 return NULL;
1585 s++;
1586 }
1587}
1588
1589Py_ssize_t
1590_PyIO_find_line_ending(
1591 int translated, int universal, PyObject *readnl,
1592 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1593{
1594 Py_ssize_t len = end - start;
1595
1596 if (translated) {
1597 /* Newlines are already translated, only search for \n */
1598 Py_UNICODE *pos = find_control_char(start, end, '\n');
1599 if (pos != NULL)
1600 return pos - start + 1;
1601 else {
1602 *consumed = len;
1603 return -1;
1604 }
1605 }
1606 else if (universal) {
1607 /* Universal newline search. Find any of \r, \r\n, \n
1608 * The decoder ensures that \r\n are not split in two pieces
1609 */
1610 Py_UNICODE *s = start;
1611 for (;;) {
1612 Py_UNICODE ch;
1613 /* Fast path for non-control chars. The loop always ends
1614 since the Py_UNICODE storage is NUL-terminated. */
1615 while (*s > '\r')
1616 s++;
1617 if (s >= end) {
1618 *consumed = len;
1619 return -1;
1620 }
1621 ch = *s++;
1622 if (ch == '\n')
1623 return s - start;
1624 if (ch == '\r') {
1625 if (*s == '\n')
1626 return s - start + 1;
1627 else
1628 return s - start;
1629 }
1630 }
1631 }
1632 else {
1633 /* Non-universal mode. */
1634 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1635 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1636 if (readnl_len == 1) {
1637 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1638 if (pos != NULL)
1639 return pos - start + 1;
1640 *consumed = len;
1641 return -1;
1642 }
1643 else {
1644 Py_UNICODE *s = start;
1645 Py_UNICODE *e = end - readnl_len + 1;
1646 Py_UNICODE *pos;
1647 if (e < s)
1648 e = s;
1649 while (s < e) {
1650 Py_ssize_t i;
1651 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1652 if (pos == NULL || pos >= e)
1653 break;
1654 for (i = 1; i < readnl_len; i++) {
1655 if (pos[i] != nl[i])
1656 break;
1657 }
1658 if (i == readnl_len)
1659 return pos - start + readnl_len;
1660 s = pos + 1;
1661 }
1662 pos = find_control_char(e, end, nl[0]);
1663 if (pos == NULL)
1664 *consumed = len;
1665 else
1666 *consumed = pos - start;
1667 return -1;
1668 }
1669 }
1670}
1671
1672static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001673_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674{
1675 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1676 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1677 int res;
1678
1679 CHECK_CLOSED(self);
1680
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001681 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 return NULL;
1683
1684 chunked = 0;
1685
1686 while (1) {
1687 Py_UNICODE *ptr;
1688 Py_ssize_t line_len;
1689 Py_ssize_t consumed = 0;
1690
1691 /* First, get some data if necessary */
1692 res = 1;
1693 while (!self->decoded_chars ||
1694 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001695 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 if (res < 0)
1697 goto error;
1698 if (res == 0)
1699 break;
1700 }
1701 if (res == 0) {
1702 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001703 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001704 Py_CLEAR(self->snapshot);
1705 start = endpos = offset_to_buffer = 0;
1706 break;
1707 }
1708
1709 if (remaining == NULL) {
1710 line = self->decoded_chars;
1711 start = self->decoded_chars_used;
1712 offset_to_buffer = 0;
1713 Py_INCREF(line);
1714 }
1715 else {
1716 assert(self->decoded_chars_used == 0);
1717 line = PyUnicode_Concat(remaining, self->decoded_chars);
1718 start = 0;
1719 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1720 Py_CLEAR(remaining);
1721 if (line == NULL)
1722 goto error;
1723 }
1724
1725 ptr = PyUnicode_AS_UNICODE(line);
1726 line_len = PyUnicode_GET_SIZE(line);
1727
1728 endpos = _PyIO_find_line_ending(
1729 self->readtranslate, self->readuniversal, self->readnl,
1730 ptr + start, ptr + line_len, &consumed);
1731 if (endpos >= 0) {
1732 endpos += start;
1733 if (limit >= 0 && (endpos - start) + chunked >= limit)
1734 endpos = start + limit - chunked;
1735 break;
1736 }
1737
1738 /* We can put aside up to `endpos` */
1739 endpos = consumed + start;
1740 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1741 /* Didn't find line ending, but reached length limit */
1742 endpos = start + limit - chunked;
1743 break;
1744 }
1745
1746 if (endpos > start) {
1747 /* No line ending seen yet - put aside current data */
1748 PyObject *s;
1749 if (chunks == NULL) {
1750 chunks = PyList_New(0);
1751 if (chunks == NULL)
1752 goto error;
1753 }
1754 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1755 if (s == NULL)
1756 goto error;
1757 if (PyList_Append(chunks, s) < 0) {
1758 Py_DECREF(s);
1759 goto error;
1760 }
1761 chunked += PyUnicode_GET_SIZE(s);
1762 Py_DECREF(s);
1763 }
1764 /* There may be some remaining bytes we'll have to prepend to the
1765 next chunk of data */
1766 if (endpos < line_len) {
1767 remaining = PyUnicode_FromUnicode(
1768 ptr + endpos, line_len - endpos);
1769 if (remaining == NULL)
1770 goto error;
1771 }
1772 Py_CLEAR(line);
1773 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001774 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001775 }
1776
1777 if (line != NULL) {
1778 /* Our line ends in the current buffer */
1779 self->decoded_chars_used = endpos - offset_to_buffer;
1780 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1781 if (start == 0 && Py_REFCNT(line) == 1) {
1782 if (PyUnicode_Resize(&line, endpos) < 0)
1783 goto error;
1784 }
1785 else {
1786 PyObject *s = PyUnicode_FromUnicode(
1787 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1788 Py_CLEAR(line);
1789 if (s == NULL)
1790 goto error;
1791 line = s;
1792 }
1793 }
1794 }
1795 if (remaining != NULL) {
1796 if (chunks == NULL) {
1797 chunks = PyList_New(0);
1798 if (chunks == NULL)
1799 goto error;
1800 }
1801 if (PyList_Append(chunks, remaining) < 0)
1802 goto error;
1803 Py_CLEAR(remaining);
1804 }
1805 if (chunks != NULL) {
1806 if (line != NULL && PyList_Append(chunks, line) < 0)
1807 goto error;
1808 Py_CLEAR(line);
1809 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1810 if (line == NULL)
1811 goto error;
1812 Py_DECREF(chunks);
1813 }
1814 if (line == NULL)
1815 line = PyUnicode_FromStringAndSize(NULL, 0);
1816
1817 return line;
1818
1819 error:
1820 Py_XDECREF(chunks);
1821 Py_XDECREF(remaining);
1822 Py_XDECREF(line);
1823 return NULL;
1824}
1825
1826static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001827textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828{
1829 Py_ssize_t limit = -1;
1830
1831 CHECK_INITIALIZED(self);
1832 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1833 return NULL;
1834 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001835 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001836}
1837
1838/* Seek and Tell */
1839
1840typedef struct {
1841 Py_off_t start_pos;
1842 int dec_flags;
1843 int bytes_to_feed;
1844 int chars_to_skip;
1845 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001846} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847
1848/*
1849 To speed up cookie packing/unpacking, we store the fields in a temporary
1850 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1851 The following macros define at which offsets in the intermediary byte
1852 string the various CookieStruct fields will be stored.
1853 */
1854
1855#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1856
1857#if defined(WORDS_BIGENDIAN)
1858
1859# define IS_LITTLE_ENDIAN 0
1860
1861/* We want the least significant byte of start_pos to also be the least
1862 significant byte of the cookie, which means that in big-endian mode we
1863 must copy the fields in reverse order. */
1864
1865# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1866# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1867# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1868# define OFF_CHARS_TO_SKIP (sizeof(char))
1869# define OFF_NEED_EOF 0
1870
1871#else
1872
1873# define IS_LITTLE_ENDIAN 1
1874
1875/* Little-endian mode: the least significant byte of start_pos will
1876 naturally end up the least significant byte of the cookie. */
1877
1878# define OFF_START_POS 0
1879# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1880# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1881# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1882# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1883
1884#endif
1885
1886static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001887textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001888{
1889 unsigned char buffer[COOKIE_BUF_LEN];
1890 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1891 if (cookieLong == NULL)
1892 return -1;
1893
1894 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1895 IS_LITTLE_ENDIAN, 0) < 0) {
1896 Py_DECREF(cookieLong);
1897 return -1;
1898 }
1899 Py_DECREF(cookieLong);
1900
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001901 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1902 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1903 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1904 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1905 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906
1907 return 0;
1908}
1909
1910static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001911textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912{
1913 unsigned char buffer[COOKIE_BUF_LEN];
1914
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001915 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1916 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1917 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1918 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1919 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001920
1921 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1922}
1923#undef IS_LITTLE_ENDIAN
1924
1925static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001926_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001927{
1928 PyObject *res;
1929 /* When seeking to the start of the stream, we call decoder.reset()
1930 rather than decoder.getstate().
1931 This is for a few decoders such as utf-16 for which the state value
1932 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1933 utf-16, that we are expecting a BOM).
1934 */
1935 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1936 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1937 else
1938 res = PyObject_CallMethod(self->decoder, "setstate",
1939 "((yi))", "", cookie->dec_flags);
1940 if (res == NULL)
1941 return -1;
1942 Py_DECREF(res);
1943 return 0;
1944}
1945
Antoine Pitroue4501852009-05-14 18:55:55 +00001946static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001947_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001948{
1949 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001950 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001951 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1952 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1953 self->encoding_start_of_stream = 1;
1954 }
1955 else {
1956 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1957 _PyIO_zero, NULL);
1958 self->encoding_start_of_stream = 0;
1959 }
1960 if (res == NULL)
1961 return -1;
1962 Py_DECREF(res);
1963 return 0;
1964}
1965
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001966static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001967textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968{
1969 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001970 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 PyObject *res;
1973 int cmp;
1974
1975 CHECK_INITIALIZED(self);
1976
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1978 return NULL;
1979 CHECK_CLOSED(self);
1980
1981 Py_INCREF(cookieObj);
1982
1983 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001984 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001985 goto fail;
1986 }
1987
1988 if (whence == 1) {
1989 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001990 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001991 if (cmp < 0)
1992 goto fail;
1993
1994 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001995 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996 goto fail;
1997 }
1998
1999 /* Seeking to the current position should attempt to
2000 * sync the underlying buffer with the current position.
2001 */
2002 Py_DECREF(cookieObj);
2003 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2004 if (cookieObj == NULL)
2005 goto fail;
2006 }
2007 else if (whence == 2) {
2008 /* seek relative to end of file */
2009
Antoine Pitroue4501852009-05-14 18:55:55 +00002010 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 if (cmp < 0)
2012 goto fail;
2013
2014 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002015 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 goto fail;
2017 }
2018
2019 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2020 if (res == NULL)
2021 goto fail;
2022 Py_DECREF(res);
2023
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002024 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 Py_CLEAR(self->snapshot);
2026 if (self->decoder) {
2027 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2028 if (res == NULL)
2029 goto fail;
2030 Py_DECREF(res);
2031 }
2032
2033 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2034 Py_XDECREF(cookieObj);
2035 return res;
2036 }
2037 else if (whence != 0) {
2038 PyErr_Format(PyExc_ValueError,
2039 "invalid whence (%d, should be 0, 1 or 2)", whence);
2040 goto fail;
2041 }
2042
Antoine Pitroue4501852009-05-14 18:55:55 +00002043 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (cmp < 0)
2045 goto fail;
2046
2047 if (cmp == 1) {
2048 PyErr_Format(PyExc_ValueError,
2049 "negative seek position %R", cookieObj);
2050 goto fail;
2051 }
2052
2053 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2054 if (res == NULL)
2055 goto fail;
2056 Py_DECREF(res);
2057
2058 /* The strategy of seek() is to go back to the safe start point
2059 * and replay the effect of read(chars_to_skip) from there.
2060 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002061 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 goto fail;
2063
2064 /* Seek back to the safe start point. */
2065 posobj = PyLong_FromOff_t(cookie.start_pos);
2066 if (posobj == NULL)
2067 goto fail;
2068 res = PyObject_CallMethodObjArgs(self->buffer,
2069 _PyIO_str_seek, posobj, NULL);
2070 Py_DECREF(posobj);
2071 if (res == NULL)
2072 goto fail;
2073 Py_DECREF(res);
2074
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002075 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 Py_CLEAR(self->snapshot);
2077
2078 /* Restore the decoder to its state from the safe start point. */
2079 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002080 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 goto fail;
2082 }
2083
2084 if (cookie.chars_to_skip) {
2085 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2086 PyObject *input_chunk = PyObject_CallMethod(
2087 self->buffer, "read", "i", cookie.bytes_to_feed);
2088 PyObject *decoded;
2089
2090 if (input_chunk == NULL)
2091 goto fail;
2092
2093 assert (PyBytes_Check(input_chunk));
2094
2095 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2096 if (self->snapshot == NULL) {
2097 Py_DECREF(input_chunk);
2098 goto fail;
2099 }
2100
2101 decoded = PyObject_CallMethod(self->decoder, "decode",
2102 "Oi", input_chunk, (int)cookie.need_eof);
2103
2104 if (decoded == NULL)
2105 goto fail;
2106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002107 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108
2109 /* Skip chars_to_skip of the decoded characters. */
2110 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2111 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2112 goto fail;
2113 }
2114 self->decoded_chars_used = cookie.chars_to_skip;
2115 }
2116 else {
2117 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2118 if (self->snapshot == NULL)
2119 goto fail;
2120 }
2121
Antoine Pitroue4501852009-05-14 18:55:55 +00002122 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2123 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002124 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002125 goto fail;
2126 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002127 return cookieObj;
2128 fail:
2129 Py_XDECREF(cookieObj);
2130 return NULL;
2131
2132}
2133
2134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002135textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136{
2137 PyObject *res;
2138 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002139 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 PyObject *next_input;
2141 Py_ssize_t chars_to_skip, chars_decoded;
2142 PyObject *saved_state = NULL;
2143 char *input, *input_end;
2144
2145 CHECK_INITIALIZED(self);
2146 CHECK_CLOSED(self);
2147
2148 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002149 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002150 goto fail;
2151 }
2152 if (!self->telling) {
2153 PyErr_SetString(PyExc_IOError,
2154 "telling position disabled by next() call");
2155 goto fail;
2156 }
2157
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002158 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002159 return NULL;
2160 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2161 if (res == NULL)
2162 goto fail;
2163 Py_DECREF(res);
2164
2165 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2166 if (posobj == NULL)
2167 goto fail;
2168
2169 if (self->decoder == NULL || self->snapshot == NULL) {
2170 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2171 return posobj;
2172 }
2173
2174#if defined(HAVE_LARGEFILE_SUPPORT)
2175 cookie.start_pos = PyLong_AsLongLong(posobj);
2176#else
2177 cookie.start_pos = PyLong_AsLong(posobj);
2178#endif
2179 if (PyErr_Occurred())
2180 goto fail;
2181
2182 /* Skip backward to the snapshot point (see _read_chunk). */
2183 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2184 goto fail;
2185
2186 assert (PyBytes_Check(next_input));
2187
2188 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2189
2190 /* How many decoded characters have been used up since the snapshot? */
2191 if (self->decoded_chars_used == 0) {
2192 /* We haven't moved from the snapshot point. */
2193 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002194 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195 }
2196
2197 chars_to_skip = self->decoded_chars_used;
2198
2199 /* Starting from the snapshot position, we will walk the decoder
2200 * forward until it gives us enough decoded characters.
2201 */
2202 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2203 _PyIO_str_getstate, NULL);
2204 if (saved_state == NULL)
2205 goto fail;
2206
2207 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002208 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 goto fail;
2210
2211 /* Feed the decoder one byte at a time. As we go, note the
2212 * nearest "safe start point" before the current location
2213 * (a point where the decoder has nothing buffered, so seek()
2214 * can safely start from there and advance to this location).
2215 */
2216 chars_decoded = 0;
2217 input = PyBytes_AS_STRING(next_input);
2218 input_end = input + PyBytes_GET_SIZE(next_input);
2219 while (input < input_end) {
2220 PyObject *state;
2221 char *dec_buffer;
2222 Py_ssize_t dec_buffer_len;
2223 int dec_flags;
2224
2225 PyObject *decoded = PyObject_CallMethod(
2226 self->decoder, "decode", "y#", input, 1);
2227 if (decoded == NULL)
2228 goto fail;
2229 assert (PyUnicode_Check(decoded));
2230 chars_decoded += PyUnicode_GET_SIZE(decoded);
2231 Py_DECREF(decoded);
2232
2233 cookie.bytes_to_feed += 1;
2234
2235 state = PyObject_CallMethodObjArgs(self->decoder,
2236 _PyIO_str_getstate, NULL);
2237 if (state == NULL)
2238 goto fail;
2239 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2240 Py_DECREF(state);
2241 goto fail;
2242 }
2243 Py_DECREF(state);
2244
2245 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2246 /* Decoder buffer is empty, so this is a safe start point. */
2247 cookie.start_pos += cookie.bytes_to_feed;
2248 chars_to_skip -= chars_decoded;
2249 cookie.dec_flags = dec_flags;
2250 cookie.bytes_to_feed = 0;
2251 chars_decoded = 0;
2252 }
2253 if (chars_decoded >= chars_to_skip)
2254 break;
2255 input++;
2256 }
2257 if (input == input_end) {
2258 /* We didn't get enough decoded data; signal EOF to get more. */
2259 PyObject *decoded = PyObject_CallMethod(
2260 self->decoder, "decode", "yi", "", /* final = */ 1);
2261 if (decoded == NULL)
2262 goto fail;
2263 assert (PyUnicode_Check(decoded));
2264 chars_decoded += PyUnicode_GET_SIZE(decoded);
2265 Py_DECREF(decoded);
2266 cookie.need_eof = 1;
2267
2268 if (chars_decoded < chars_to_skip) {
2269 PyErr_SetString(PyExc_IOError,
2270 "can't reconstruct logical file position");
2271 goto fail;
2272 }
2273 }
2274
2275 /* finally */
2276 Py_XDECREF(posobj);
2277 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2278 Py_DECREF(saved_state);
2279 if (res == NULL)
2280 return NULL;
2281 Py_DECREF(res);
2282
2283 /* The returned cookie corresponds to the last safe start point. */
2284 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002285 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286
2287 fail:
2288 Py_XDECREF(posobj);
2289 if (saved_state) {
2290 PyObject *type, *value, *traceback;
2291 PyErr_Fetch(&type, &value, &traceback);
2292
2293 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2294 Py_DECREF(saved_state);
2295 if (res == NULL)
2296 return NULL;
2297 Py_DECREF(res);
2298
2299 PyErr_Restore(type, value, traceback);
2300 }
2301 return NULL;
2302}
2303
2304static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002305textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002306{
2307 PyObject *pos = Py_None;
2308 PyObject *res;
2309
2310 CHECK_INITIALIZED(self)
2311 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2312 return NULL;
2313 }
2314
2315 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2316 if (res == NULL)
2317 return NULL;
2318 Py_DECREF(res);
2319
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002320 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002321}
2322
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002323static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002324textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002325{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002326 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002327
2328 CHECK_INITIALIZED(self);
2329
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002330 res = PyUnicode_FromString("<_io.TextIOWrapper");
2331 if (res == NULL)
2332 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002333 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2334 if (nameobj == NULL) {
2335 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2336 PyErr_Clear();
2337 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002338 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002339 }
2340 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002341 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002342 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002343 if (s == NULL)
2344 goto error;
2345 PyUnicode_AppendAndDel(&res, s);
2346 if (res == NULL)
2347 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002348 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002349 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2350 if (modeobj == NULL) {
2351 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2352 PyErr_Clear();
2353 else
2354 goto error;
2355 }
2356 else {
2357 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2358 Py_DECREF(modeobj);
2359 if (s == NULL)
2360 goto error;
2361 PyUnicode_AppendAndDel(&res, s);
2362 if (res == NULL)
2363 return NULL;
2364 }
2365 s = PyUnicode_FromFormat("%U encoding=%R>",
2366 res, self->encoding);
2367 Py_DECREF(res);
2368 return s;
2369error:
2370 Py_XDECREF(res);
2371 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002372}
2373
2374
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375/* Inquiries */
2376
2377static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002378textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379{
2380 CHECK_INITIALIZED(self);
2381 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2382}
2383
2384static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002385textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386{
2387 CHECK_INITIALIZED(self);
2388 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2389}
2390
2391static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002392textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393{
2394 CHECK_INITIALIZED(self);
2395 return PyObject_CallMethod(self->buffer, "readable", NULL);
2396}
2397
2398static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002399textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002400{
2401 CHECK_INITIALIZED(self);
2402 return PyObject_CallMethod(self->buffer, "writable", NULL);
2403}
2404
2405static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002406textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407{
2408 CHECK_INITIALIZED(self);
2409 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2410}
2411
2412static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002413textiowrapper_getstate(textio *self, PyObject *args)
2414{
2415 PyErr_Format(PyExc_TypeError,
2416 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2417 return NULL;
2418}
2419
2420static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002421textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422{
2423 CHECK_INITIALIZED(self);
2424 CHECK_CLOSED(self);
2425 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002426 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002427 return NULL;
2428 return PyObject_CallMethod(self->buffer, "flush", NULL);
2429}
2430
2431static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002432textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002433{
2434 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002435 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437
Antoine Pitrou6be88762010-05-03 16:48:20 +00002438 res = textiowrapper_closed_get(self, NULL);
2439 if (res == NULL)
2440 return NULL;
2441 r = PyObject_IsTrue(res);
2442 Py_DECREF(res);
2443 if (r < 0)
2444 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002445
Antoine Pitrou6be88762010-05-03 16:48:20 +00002446 if (r > 0) {
2447 Py_RETURN_NONE; /* stream already closed */
2448 }
2449 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002450 if (self->deallocating) {
2451 res = PyObject_CallMethod(self->buffer, "_dealloc_warn", "O", self);
2452 if (res)
2453 Py_DECREF(res);
2454 else
2455 PyErr_Clear();
2456 }
Antoine Pitrou6be88762010-05-03 16:48:20 +00002457 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2458 if (res == NULL) {
2459 return NULL;
2460 }
2461 else
2462 Py_DECREF(res);
2463
2464 return PyObject_CallMethod(self->buffer, "close", NULL);
2465 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466}
2467
2468static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002469textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470{
2471 PyObject *line;
2472
2473 CHECK_INITIALIZED(self);
2474
2475 self->telling = 0;
2476 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2477 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002478 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479 }
2480 else {
2481 line = PyObject_CallMethodObjArgs((PyObject *)self,
2482 _PyIO_str_readline, NULL);
2483 if (line && !PyUnicode_Check(line)) {
2484 PyErr_Format(PyExc_IOError,
2485 "readline() should have returned an str object, "
2486 "not '%.200s'", Py_TYPE(line)->tp_name);
2487 Py_DECREF(line);
2488 return NULL;
2489 }
2490 }
2491
2492 if (line == NULL)
2493 return NULL;
2494
2495 if (PyUnicode_GET_SIZE(line) == 0) {
2496 /* Reached EOF or would have blocked */
2497 Py_DECREF(line);
2498 Py_CLEAR(self->snapshot);
2499 self->telling = self->seekable;
2500 return NULL;
2501 }
2502
2503 return line;
2504}
2505
2506static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002507textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508{
2509 CHECK_INITIALIZED(self);
2510 return PyObject_GetAttrString(self->buffer, "name");
2511}
2512
2513static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002514textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515{
2516 CHECK_INITIALIZED(self);
2517 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2518}
2519
2520static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002521textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002522{
2523 PyObject *res;
2524 CHECK_INITIALIZED(self);
2525 if (self->decoder == NULL)
2526 Py_RETURN_NONE;
2527 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2528 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002529 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2530 PyErr_Clear();
2531 Py_RETURN_NONE;
2532 }
2533 else {
2534 return NULL;
2535 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 }
2537 return res;
2538}
2539
2540static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002541textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002542{
2543 CHECK_INITIALIZED(self);
2544 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2545}
2546
2547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002548textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549{
2550 CHECK_INITIALIZED(self);
2551 return PyLong_FromSsize_t(self->chunk_size);
2552}
2553
2554static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002555textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556{
2557 Py_ssize_t n;
2558 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002559 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560 if (n == -1 && PyErr_Occurred())
2561 return -1;
2562 if (n <= 0) {
2563 PyErr_SetString(PyExc_ValueError,
2564 "a strictly positive integer is required");
2565 return -1;
2566 }
2567 self->chunk_size = n;
2568 return 0;
2569}
2570
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002571static PyMethodDef textiowrapper_methods[] = {
2572 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2573 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2574 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2575 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2576 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2577 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2580 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2581 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2582 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2583 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002584 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002586 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2587 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2588 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589 {NULL, NULL}
2590};
2591
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592static PyMemberDef textiowrapper_members[] = {
2593 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2594 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2595 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596 {NULL}
2597};
2598
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599static PyGetSetDef textiowrapper_getset[] = {
2600 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2601 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002602/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2603*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002604 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2605 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2606 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2607 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002608 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002609};
2610
2611PyTypeObject PyTextIOWrapper_Type = {
2612 PyVarObject_HEAD_INIT(NULL, 0)
2613 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002614 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002616 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617 0, /*tp_print*/
2618 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002619 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002620 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002621 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622 0, /*tp_as_number*/
2623 0, /*tp_as_sequence*/
2624 0, /*tp_as_mapping*/
2625 0, /*tp_hash */
2626 0, /*tp_call*/
2627 0, /*tp_str*/
2628 0, /*tp_getattro*/
2629 0, /*tp_setattro*/
2630 0, /*tp_as_buffer*/
2631 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2632 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002633 textiowrapper_doc, /* tp_doc */
2634 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2635 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002637 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002638 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002639 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2640 textiowrapper_methods, /* tp_methods */
2641 textiowrapper_members, /* tp_members */
2642 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 0, /* tp_base */
2644 0, /* tp_dict */
2645 0, /* tp_descr_get */
2646 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002647 offsetof(textio, dict), /*tp_dictoffset*/
2648 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649 0, /* tp_alloc */
2650 PyType_GenericNew, /* tp_new */
2651};