blob: 0f5a73d4203b3974a8b70744b05b7c1d316a38e6 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
193 int pendingcr:1;
194 int translate:1;
195 unsigned int seennl:3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
686
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
689
690 PyObject *weakreflist;
691 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000692} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694
695/* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
720}
721
722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000723utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724{
Antoine Pitroue4501852009-05-14 18:55:55 +0000725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 }
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
752}
753
754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759#if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761#else
762 return utf32le_encode(self, text);
763#endif
764 }
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780{
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
784}
785
786/* Map normalized encoding names onto the specialized encoding funcs */
787
788typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791} encodefuncentry;
792
Antoine Pitrou24f36292009-03-28 22:16:42 +0000793static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000796 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {NULL, NULL}
804};
805
806
807static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000808textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809{
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
818 _PyIO_State *state = IO_STATE;
819
820 PyObject *res;
821 int r;
822
823 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000824 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
826 kwlist, &buffer, &encoding, &errors,
827 &newline, &line_buffering))
828 return -1;
829
830 if (newline && newline[0] != '\0'
831 && !(newline[0] == '\n' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\0')
833 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
834 PyErr_Format(PyExc_ValueError,
835 "illegal newline value: %s", newline);
836 return -1;
837 }
838
839 Py_CLEAR(self->buffer);
840 Py_CLEAR(self->encoding);
841 Py_CLEAR(self->encoder);
842 Py_CLEAR(self->decoder);
843 Py_CLEAR(self->readnl);
844 Py_CLEAR(self->decoded_chars);
845 Py_CLEAR(self->pending_bytes);
846 Py_CLEAR(self->snapshot);
847 Py_CLEAR(self->errors);
848 Py_CLEAR(self->raw);
849 self->decoded_chars_used = 0;
850 self->pending_bytes_count = 0;
851 self->encodefunc = NULL;
852
853 if (encoding == NULL) {
854 /* Try os.device_encoding(fileno) */
855 PyObject *fileno;
856 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
857 /* Ignore only AttributeError and UnsupportedOperation */
858 if (fileno == NULL) {
859 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
860 PyErr_ExceptionMatches(state->unsupported_operation)) {
861 PyErr_Clear();
862 }
863 else {
864 goto error;
865 }
866 }
867 else {
868 self->encoding = PyObject_CallMethod(state->os_module,
869 "device_encoding",
870 "N", fileno);
871 if (self->encoding == NULL)
872 goto error;
873 else if (!PyUnicode_Check(self->encoding))
874 Py_CLEAR(self->encoding);
875 }
876 }
877 if (encoding == NULL && self->encoding == NULL) {
878 if (state->locale_module == NULL) {
879 state->locale_module = PyImport_ImportModule("locale");
880 if (state->locale_module == NULL)
881 goto catch_ImportError;
882 else
883 goto use_locale;
884 }
885 else {
886 use_locale:
887 self->encoding = PyObject_CallMethod(
888 state->locale_module, "getpreferredencoding", NULL);
889 if (self->encoding == NULL) {
890 catch_ImportError:
891 /*
892 Importing locale can raise a ImportError because of
893 _functools, and locale.getpreferredencoding can raise a
894 ImportError if _locale is not available. These will happen
895 during module building.
896 */
897 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
898 PyErr_Clear();
899 self->encoding = PyUnicode_FromString("ascii");
900 }
901 else
902 goto error;
903 }
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
908 if (self->encoding != NULL)
909 encoding = _PyUnicode_AsString(self->encoding);
910 else if (encoding != NULL) {
911 self->encoding = PyUnicode_FromString(encoding);
912 if (self->encoding == NULL)
913 goto error;
914 }
915 else {
916 PyErr_SetString(PyExc_IOError,
917 "could not determine default encoding");
918 }
919
920 if (errors == NULL)
921 errors = "strict";
922 self->errors = PyBytes_FromString(errors);
923 if (self->errors == NULL)
924 goto error;
925
926 self->chunk_size = 8192;
927 self->readuniversal = (newline == NULL || newline[0] == '\0');
928 self->line_buffering = line_buffering;
929 self->readtranslate = (newline == NULL);
930 if (newline) {
931 self->readnl = PyUnicode_FromString(newline);
932 if (self->readnl == NULL)
933 return -1;
934 }
935 self->writetranslate = (newline == NULL || newline[0] != '\0');
936 if (!self->readuniversal && self->readnl) {
937 self->writenl = _PyUnicode_AsString(self->readnl);
938 if (!strcmp(self->writenl, "\n"))
939 self->writenl = NULL;
940 }
941#ifdef MS_WINDOWS
942 else
943 self->writenl = "\r\n";
944#endif
945
946 /* Build the decoder object */
947 res = PyObject_CallMethod(buffer, "readable", NULL);
948 if (res == NULL)
949 goto error;
950 r = PyObject_IsTrue(res);
951 Py_DECREF(res);
952 if (r == -1)
953 goto error;
954 if (r == 1) {
955 self->decoder = PyCodec_IncrementalDecoder(
956 encoding, errors);
957 if (self->decoder == NULL)
958 goto error;
959
960 if (self->readuniversal) {
961 PyObject *incrementalDecoder = PyObject_CallFunction(
962 (PyObject *)&PyIncrementalNewlineDecoder_Type,
963 "Oi", self->decoder, (int)self->readtranslate);
964 if (incrementalDecoder == NULL)
965 goto error;
966 Py_CLEAR(self->decoder);
967 self->decoder = incrementalDecoder;
968 }
969 }
970
971 /* Build the encoder object */
972 res = PyObject_CallMethod(buffer, "writable", NULL);
973 if (res == NULL)
974 goto error;
975 r = PyObject_IsTrue(res);
976 Py_DECREF(res);
977 if (r == -1)
978 goto error;
979 if (r == 1) {
980 PyObject *ci;
981 self->encoder = PyCodec_IncrementalEncoder(
982 encoding, errors);
983 if (self->encoder == NULL)
984 goto error;
985 /* Get the normalized named of the codec */
986 ci = _PyCodec_Lookup(encoding);
987 if (ci == NULL)
988 goto error;
989 res = PyObject_GetAttrString(ci, "name");
990 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000991 if (res == NULL) {
992 if (PyErr_ExceptionMatches(PyExc_AttributeError))
993 PyErr_Clear();
994 else
995 goto error;
996 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000997 else if (PyUnicode_Check(res)) {
998 encodefuncentry *e = encodefuncs;
999 while (e->name != NULL) {
1000 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1001 self->encodefunc = e->encodefunc;
1002 break;
1003 }
1004 e++;
1005 }
1006 }
1007 Py_XDECREF(res);
1008 }
1009
1010 self->buffer = buffer;
1011 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001012
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1014 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1015 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1016 raw = PyObject_GetAttrString(buffer, "raw");
1017 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001018 if (raw == NULL) {
1019 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 PyErr_Clear();
1021 else
1022 goto error;
1023 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001024 else if (Py_TYPE(raw) == &PyFileIO_Type)
1025 self->raw = raw;
1026 else
1027 Py_DECREF(raw);
1028 }
1029
1030 res = PyObject_CallMethod(buffer, "seekable", NULL);
1031 if (res == NULL)
1032 goto error;
1033 self->seekable = self->telling = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035
Antoine Pitroue4501852009-05-14 18:55:55 +00001036 self->encoding_start_of_stream = 0;
1037 if (self->seekable && self->encoder) {
1038 PyObject *cookieObj;
1039 int cmp;
1040
1041 self->encoding_start_of_stream = 1;
1042
1043 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1044 if (cookieObj == NULL)
1045 goto error;
1046
1047 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1048 Py_DECREF(cookieObj);
1049 if (cmp < 0) {
1050 goto error;
1051 }
1052
1053 if (cmp == 0) {
1054 self->encoding_start_of_stream = 0;
1055 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1056 _PyIO_zero, NULL);
1057 if (res == NULL)
1058 goto error;
1059 Py_DECREF(res);
1060 }
1061 }
1062
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 self->ok = 1;
1064 return 0;
1065
1066 error:
1067 return -1;
1068}
1069
1070static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001071_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001072{
1073 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1074 return -1;
1075 self->ok = 0;
1076 Py_CLEAR(self->buffer);
1077 Py_CLEAR(self->encoding);
1078 Py_CLEAR(self->encoder);
1079 Py_CLEAR(self->decoder);
1080 Py_CLEAR(self->readnl);
1081 Py_CLEAR(self->decoded_chars);
1082 Py_CLEAR(self->pending_bytes);
1083 Py_CLEAR(self->snapshot);
1084 Py_CLEAR(self->errors);
1085 Py_CLEAR(self->raw);
1086 return 0;
1087}
1088
1089static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001090textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001092 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 return;
1094 _PyObject_GC_UNTRACK(self);
1095 if (self->weakreflist != NULL)
1096 PyObject_ClearWeakRefs((PyObject *)self);
1097 Py_CLEAR(self->dict);
1098 Py_TYPE(self)->tp_free((PyObject *)self);
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 Py_VISIT(self->buffer);
1105 Py_VISIT(self->encoding);
1106 Py_VISIT(self->encoder);
1107 Py_VISIT(self->decoder);
1108 Py_VISIT(self->readnl);
1109 Py_VISIT(self->decoded_chars);
1110 Py_VISIT(self->pending_bytes);
1111 Py_VISIT(self->snapshot);
1112 Py_VISIT(self->errors);
1113 Py_VISIT(self->raw);
1114
1115 Py_VISIT(self->dict);
1116 return 0;
1117}
1118
1119static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001120textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001122 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123 return -1;
1124 Py_CLEAR(self->dict);
1125 return 0;
1126}
1127
1128static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001129textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130
1131/* This macro takes some shortcuts to make the common case faster. */
1132#define CHECK_CLOSED(self) \
1133 do { \
1134 int r; \
1135 PyObject *_res; \
1136 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1137 if (self->raw != NULL) \
1138 r = _PyFileIO_closed(self->raw); \
1139 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001140 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 if (_res == NULL) \
1142 return NULL; \
1143 r = PyObject_IsTrue(_res); \
1144 Py_DECREF(_res); \
1145 if (r < 0) \
1146 return NULL; \
1147 } \
1148 if (r > 0) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "I/O operation on closed file."); \
1151 return NULL; \
1152 } \
1153 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return NULL; \
1156 } while (0)
1157
1158#define CHECK_INITIALIZED(self) \
1159 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001160 if (self->detached) { \
1161 PyErr_SetString(PyExc_ValueError, \
1162 "underlying buffer has been detached"); \
1163 } else { \
1164 PyErr_SetString(PyExc_ValueError, \
1165 "I/O operation on uninitialized object"); \
1166 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001167 return NULL; \
1168 }
1169
1170#define CHECK_INITIALIZED_INT(self) \
1171 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001172 if (self->detached) { \
1173 PyErr_SetString(PyExc_ValueError, \
1174 "underlying buffer has been detached"); \
1175 } else { \
1176 PyErr_SetString(PyExc_ValueError, \
1177 "I/O operation on uninitialized object"); \
1178 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179 return -1; \
1180 }
1181
1182
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001183static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001185{
1186 PyObject *buffer, *res;
1187 CHECK_INITIALIZED(self);
1188 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1189 if (res == NULL)
1190 return NULL;
1191 Py_DECREF(res);
1192 buffer = self->buffer;
1193 self->buffer = NULL;
1194 self->detached = 1;
1195 self->ok = 0;
1196 return buffer;
1197}
1198
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199Py_LOCAL_INLINE(const Py_UNICODE *)
1200findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1201{
1202 /* like wcschr, but doesn't stop at NULL characters */
1203 while (size-- > 0) {
1204 if (*s == ch)
1205 return s;
1206 s++;
1207 }
1208 return NULL;
1209}
1210
Antoine Pitrou24f36292009-03-28 22:16:42 +00001211/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001212 underlying buffered object, though. */
1213static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001214_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215{
1216 PyObject *b, *ret;
1217
1218 if (self->pending_bytes == NULL)
1219 return 0;
1220 b = _PyBytes_Join(_PyIO_empty_bytes, self->pending_bytes);
1221 if (b == NULL)
1222 return -1;
1223 ret = PyObject_CallMethodObjArgs(self->buffer,
1224 _PyIO_str_write, b, NULL);
1225 Py_DECREF(b);
1226 if (ret == NULL)
1227 return -1;
1228 Py_DECREF(ret);
1229 Py_CLEAR(self->pending_bytes);
1230 self->pending_bytes_count = 0;
1231 return 0;
1232}
1233
1234static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001235textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236{
1237 PyObject *ret;
1238 PyObject *text; /* owned reference */
1239 PyObject *b;
1240 Py_ssize_t textlen;
1241 int haslf = 0;
1242 int needflush = 0;
1243
1244 CHECK_INITIALIZED(self);
1245
1246 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1247 return NULL;
1248 }
1249
1250 CHECK_CLOSED(self);
1251
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001252 if (self->encoder == NULL) {
1253 PyErr_SetString(PyExc_IOError, "not writable");
1254 return NULL;
1255 }
1256
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001257 Py_INCREF(text);
1258
1259 textlen = PyUnicode_GetSize(text);
1260
1261 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1262 if (findchar(PyUnicode_AS_UNICODE(text),
1263 PyUnicode_GET_SIZE(text), '\n'))
1264 haslf = 1;
1265
1266 if (haslf && self->writetranslate && self->writenl != NULL) {
1267 PyObject *newtext = PyObject_CallMethod(
1268 text, "replace", "ss", "\n", self->writenl);
1269 Py_DECREF(text);
1270 if (newtext == NULL)
1271 return NULL;
1272 text = newtext;
1273 }
1274
1275 if (self->line_buffering &&
1276 (haslf ||
1277 findchar(PyUnicode_AS_UNICODE(text),
1278 PyUnicode_GET_SIZE(text), '\r')))
1279 needflush = 1;
1280
1281 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001282 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001284 self->encoding_start_of_stream = 0;
1285 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 else
1287 b = PyObject_CallMethodObjArgs(self->encoder,
1288 _PyIO_str_encode, text, NULL);
1289 Py_DECREF(text);
1290 if (b == NULL)
1291 return NULL;
1292
1293 if (self->pending_bytes == NULL) {
1294 self->pending_bytes = PyList_New(0);
1295 if (self->pending_bytes == NULL) {
1296 Py_DECREF(b);
1297 return NULL;
1298 }
1299 self->pending_bytes_count = 0;
1300 }
1301 if (PyList_Append(self->pending_bytes, b) < 0) {
1302 Py_DECREF(b);
1303 return NULL;
1304 }
1305 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1306 Py_DECREF(b);
1307 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001308 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 return NULL;
1310 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001311
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 if (needflush) {
1313 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1314 if (ret == NULL)
1315 return NULL;
1316 Py_DECREF(ret);
1317 }
1318
1319 Py_CLEAR(self->snapshot);
1320
1321 if (self->decoder) {
1322 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1323 if (ret == NULL)
1324 return NULL;
1325 Py_DECREF(ret);
1326 }
1327
1328 return PyLong_FromSsize_t(textlen);
1329}
1330
1331/* Steal a reference to chars and store it in the decoded_char buffer;
1332 */
1333static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001334textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335{
1336 Py_CLEAR(self->decoded_chars);
1337 self->decoded_chars = chars;
1338 self->decoded_chars_used = 0;
1339}
1340
1341static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001342textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001343{
1344 PyObject *chars;
1345 Py_ssize_t avail;
1346
1347 if (self->decoded_chars == NULL)
1348 return PyUnicode_FromStringAndSize(NULL, 0);
1349
1350 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1351 - self->decoded_chars_used);
1352
1353 assert(avail >= 0);
1354
1355 if (n < 0 || n > avail)
1356 n = avail;
1357
1358 if (self->decoded_chars_used > 0 || n < avail) {
1359 chars = PyUnicode_FromUnicode(
1360 PyUnicode_AS_UNICODE(self->decoded_chars)
1361 + self->decoded_chars_used, n);
1362 if (chars == NULL)
1363 return NULL;
1364 }
1365 else {
1366 chars = self->decoded_chars;
1367 Py_INCREF(chars);
1368 }
1369
1370 self->decoded_chars_used += n;
1371 return chars;
1372}
1373
1374/* Read and decode the next chunk of data from the BufferedReader.
1375 */
1376static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001377textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378{
1379 PyObject *dec_buffer = NULL;
1380 PyObject *dec_flags = NULL;
1381 PyObject *input_chunk = NULL;
1382 PyObject *decoded_chars, *chunk_size;
1383 int eof;
1384
1385 /* The return value is True unless EOF was reached. The decoded string is
1386 * placed in self._decoded_chars (replacing its previous value). The
1387 * entire input chunk is sent to the decoder, though some of it may remain
1388 * buffered in the decoder, yet to be converted.
1389 */
1390
1391 if (self->decoder == NULL) {
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001392 PyErr_SetString(PyExc_IOError, "not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393 return -1;
1394 }
1395
1396 if (self->telling) {
1397 /* To prepare for tell(), we need to snapshot a point in the file
1398 * where the decoder's input buffer is empty.
1399 */
1400
1401 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1402 _PyIO_str_getstate, NULL);
1403 if (state == NULL)
1404 return -1;
1405 /* Given this, we know there was a valid snapshot point
1406 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1407 */
1408 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1409 Py_DECREF(state);
1410 return -1;
1411 }
1412 Py_INCREF(dec_buffer);
1413 Py_INCREF(dec_flags);
1414 Py_DECREF(state);
1415 }
1416
1417 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1418 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1419 if (chunk_size == NULL)
1420 goto fail;
1421 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1422 _PyIO_str_read1, chunk_size, NULL);
1423 Py_DECREF(chunk_size);
1424 if (input_chunk == NULL)
1425 goto fail;
1426 assert(PyBytes_Check(input_chunk));
1427
1428 eof = (PyBytes_Size(input_chunk) == 0);
1429
1430 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1431 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1432 self->decoder, input_chunk, eof);
1433 }
1434 else {
1435 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1436 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1437 }
1438
1439 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1440 if (decoded_chars == NULL)
1441 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001442 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1444 eof = 0;
1445
1446 if (self->telling) {
1447 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1448 * next input to be decoded is dec_buffer + input_chunk.
1449 */
1450 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1451 if (next_input == NULL)
1452 goto fail;
1453 assert (PyBytes_Check(next_input));
1454 Py_DECREF(dec_buffer);
1455 Py_CLEAR(self->snapshot);
1456 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1457 }
1458 Py_DECREF(input_chunk);
1459
1460 return (eof == 0);
1461
1462 fail:
1463 Py_XDECREF(dec_buffer);
1464 Py_XDECREF(dec_flags);
1465 Py_XDECREF(input_chunk);
1466 return -1;
1467}
1468
1469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001470textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001471{
1472 Py_ssize_t n = -1;
1473 PyObject *result = NULL, *chunks = NULL;
1474
1475 CHECK_INITIALIZED(self);
1476
1477 if (!PyArg_ParseTuple(args, "|n:read", &n))
1478 return NULL;
1479
1480 CHECK_CLOSED(self);
1481
Benjamin Petersona1b49012009-03-31 23:11:32 +00001482 if (self->decoder == NULL) {
1483 PyErr_SetString(PyExc_IOError, "not readable");
1484 return NULL;
1485 }
1486
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001487 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001488 return NULL;
1489
1490 if (n < 0) {
1491 /* Read everything */
1492 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1493 PyObject *decoded;
1494 if (bytes == NULL)
1495 goto fail;
1496 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1497 bytes, Py_True, NULL);
1498 Py_DECREF(bytes);
1499 if (decoded == NULL)
1500 goto fail;
1501
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001502 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503
1504 if (result == NULL) {
1505 Py_DECREF(decoded);
1506 return NULL;
1507 }
1508
1509 PyUnicode_AppendAndDel(&result, decoded);
1510 if (result == NULL)
1511 goto fail;
1512
1513 Py_CLEAR(self->snapshot);
1514 return result;
1515 }
1516 else {
1517 int res = 1;
1518 Py_ssize_t remaining = n;
1519
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001520 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 if (result == NULL)
1522 goto fail;
1523 remaining -= PyUnicode_GET_SIZE(result);
1524
1525 /* Keep reading chunks until we have n characters to return */
1526 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001527 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001528 if (res < 0)
1529 goto fail;
1530 if (res == 0) /* EOF */
1531 break;
1532 if (chunks == NULL) {
1533 chunks = PyList_New(0);
1534 if (chunks == NULL)
1535 goto fail;
1536 }
1537 if (PyList_Append(chunks, result) < 0)
1538 goto fail;
1539 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001540 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 if (result == NULL)
1542 goto fail;
1543 remaining -= PyUnicode_GET_SIZE(result);
1544 }
1545 if (chunks != NULL) {
1546 if (result != NULL && PyList_Append(chunks, result) < 0)
1547 goto fail;
1548 Py_CLEAR(result);
1549 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1550 if (result == NULL)
1551 goto fail;
1552 Py_CLEAR(chunks);
1553 }
1554 return result;
1555 }
1556 fail:
1557 Py_XDECREF(result);
1558 Py_XDECREF(chunks);
1559 return NULL;
1560}
1561
1562
1563/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1564 that is to the NUL character. Otherwise the function will produce
1565 incorrect results. */
1566static Py_UNICODE *
1567find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1568{
1569 Py_UNICODE *s = start;
1570 for (;;) {
1571 while (*s > ch)
1572 s++;
1573 if (*s == ch)
1574 return s;
1575 if (s == end)
1576 return NULL;
1577 s++;
1578 }
1579}
1580
1581Py_ssize_t
1582_PyIO_find_line_ending(
1583 int translated, int universal, PyObject *readnl,
1584 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1585{
1586 Py_ssize_t len = end - start;
1587
1588 if (translated) {
1589 /* Newlines are already translated, only search for \n */
1590 Py_UNICODE *pos = find_control_char(start, end, '\n');
1591 if (pos != NULL)
1592 return pos - start + 1;
1593 else {
1594 *consumed = len;
1595 return -1;
1596 }
1597 }
1598 else if (universal) {
1599 /* Universal newline search. Find any of \r, \r\n, \n
1600 * The decoder ensures that \r\n are not split in two pieces
1601 */
1602 Py_UNICODE *s = start;
1603 for (;;) {
1604 Py_UNICODE ch;
1605 /* Fast path for non-control chars. The loop always ends
1606 since the Py_UNICODE storage is NUL-terminated. */
1607 while (*s > '\r')
1608 s++;
1609 if (s >= end) {
1610 *consumed = len;
1611 return -1;
1612 }
1613 ch = *s++;
1614 if (ch == '\n')
1615 return s - start;
1616 if (ch == '\r') {
1617 if (*s == '\n')
1618 return s - start + 1;
1619 else
1620 return s - start;
1621 }
1622 }
1623 }
1624 else {
1625 /* Non-universal mode. */
1626 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1627 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1628 if (readnl_len == 1) {
1629 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1630 if (pos != NULL)
1631 return pos - start + 1;
1632 *consumed = len;
1633 return -1;
1634 }
1635 else {
1636 Py_UNICODE *s = start;
1637 Py_UNICODE *e = end - readnl_len + 1;
1638 Py_UNICODE *pos;
1639 if (e < s)
1640 e = s;
1641 while (s < e) {
1642 Py_ssize_t i;
1643 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1644 if (pos == NULL || pos >= e)
1645 break;
1646 for (i = 1; i < readnl_len; i++) {
1647 if (pos[i] != nl[i])
1648 break;
1649 }
1650 if (i == readnl_len)
1651 return pos - start + readnl_len;
1652 s = pos + 1;
1653 }
1654 pos = find_control_char(e, end, nl[0]);
1655 if (pos == NULL)
1656 *consumed = len;
1657 else
1658 *consumed = pos - start;
1659 return -1;
1660 }
1661 }
1662}
1663
1664static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001665_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666{
1667 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1668 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1669 int res;
1670
1671 CHECK_CLOSED(self);
1672
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001673 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 return NULL;
1675
1676 chunked = 0;
1677
1678 while (1) {
1679 Py_UNICODE *ptr;
1680 Py_ssize_t line_len;
1681 Py_ssize_t consumed = 0;
1682
1683 /* First, get some data if necessary */
1684 res = 1;
1685 while (!self->decoded_chars ||
1686 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001687 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (res < 0)
1689 goto error;
1690 if (res == 0)
1691 break;
1692 }
1693 if (res == 0) {
1694 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001695 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 Py_CLEAR(self->snapshot);
1697 start = endpos = offset_to_buffer = 0;
1698 break;
1699 }
1700
1701 if (remaining == NULL) {
1702 line = self->decoded_chars;
1703 start = self->decoded_chars_used;
1704 offset_to_buffer = 0;
1705 Py_INCREF(line);
1706 }
1707 else {
1708 assert(self->decoded_chars_used == 0);
1709 line = PyUnicode_Concat(remaining, self->decoded_chars);
1710 start = 0;
1711 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1712 Py_CLEAR(remaining);
1713 if (line == NULL)
1714 goto error;
1715 }
1716
1717 ptr = PyUnicode_AS_UNICODE(line);
1718 line_len = PyUnicode_GET_SIZE(line);
1719
1720 endpos = _PyIO_find_line_ending(
1721 self->readtranslate, self->readuniversal, self->readnl,
1722 ptr + start, ptr + line_len, &consumed);
1723 if (endpos >= 0) {
1724 endpos += start;
1725 if (limit >= 0 && (endpos - start) + chunked >= limit)
1726 endpos = start + limit - chunked;
1727 break;
1728 }
1729
1730 /* We can put aside up to `endpos` */
1731 endpos = consumed + start;
1732 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1733 /* Didn't find line ending, but reached length limit */
1734 endpos = start + limit - chunked;
1735 break;
1736 }
1737
1738 if (endpos > start) {
1739 /* No line ending seen yet - put aside current data */
1740 PyObject *s;
1741 if (chunks == NULL) {
1742 chunks = PyList_New(0);
1743 if (chunks == NULL)
1744 goto error;
1745 }
1746 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1747 if (s == NULL)
1748 goto error;
1749 if (PyList_Append(chunks, s) < 0) {
1750 Py_DECREF(s);
1751 goto error;
1752 }
1753 chunked += PyUnicode_GET_SIZE(s);
1754 Py_DECREF(s);
1755 }
1756 /* There may be some remaining bytes we'll have to prepend to the
1757 next chunk of data */
1758 if (endpos < line_len) {
1759 remaining = PyUnicode_FromUnicode(
1760 ptr + endpos, line_len - endpos);
1761 if (remaining == NULL)
1762 goto error;
1763 }
1764 Py_CLEAR(line);
1765 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001766 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767 }
1768
1769 if (line != NULL) {
1770 /* Our line ends in the current buffer */
1771 self->decoded_chars_used = endpos - offset_to_buffer;
1772 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1773 if (start == 0 && Py_REFCNT(line) == 1) {
1774 if (PyUnicode_Resize(&line, endpos) < 0)
1775 goto error;
1776 }
1777 else {
1778 PyObject *s = PyUnicode_FromUnicode(
1779 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1780 Py_CLEAR(line);
1781 if (s == NULL)
1782 goto error;
1783 line = s;
1784 }
1785 }
1786 }
1787 if (remaining != NULL) {
1788 if (chunks == NULL) {
1789 chunks = PyList_New(0);
1790 if (chunks == NULL)
1791 goto error;
1792 }
1793 if (PyList_Append(chunks, remaining) < 0)
1794 goto error;
1795 Py_CLEAR(remaining);
1796 }
1797 if (chunks != NULL) {
1798 if (line != NULL && PyList_Append(chunks, line) < 0)
1799 goto error;
1800 Py_CLEAR(line);
1801 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1802 if (line == NULL)
1803 goto error;
1804 Py_DECREF(chunks);
1805 }
1806 if (line == NULL)
1807 line = PyUnicode_FromStringAndSize(NULL, 0);
1808
1809 return line;
1810
1811 error:
1812 Py_XDECREF(chunks);
1813 Py_XDECREF(remaining);
1814 Py_XDECREF(line);
1815 return NULL;
1816}
1817
1818static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001819textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001820{
1821 Py_ssize_t limit = -1;
1822
1823 CHECK_INITIALIZED(self);
1824 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1825 return NULL;
1826 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001827 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828}
1829
1830/* Seek and Tell */
1831
1832typedef struct {
1833 Py_off_t start_pos;
1834 int dec_flags;
1835 int bytes_to_feed;
1836 int chars_to_skip;
1837 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001838} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839
1840/*
1841 To speed up cookie packing/unpacking, we store the fields in a temporary
1842 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1843 The following macros define at which offsets in the intermediary byte
1844 string the various CookieStruct fields will be stored.
1845 */
1846
1847#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1848
1849#if defined(WORDS_BIGENDIAN)
1850
1851# define IS_LITTLE_ENDIAN 0
1852
1853/* We want the least significant byte of start_pos to also be the least
1854 significant byte of the cookie, which means that in big-endian mode we
1855 must copy the fields in reverse order. */
1856
1857# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1858# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1859# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1860# define OFF_CHARS_TO_SKIP (sizeof(char))
1861# define OFF_NEED_EOF 0
1862
1863#else
1864
1865# define IS_LITTLE_ENDIAN 1
1866
1867/* Little-endian mode: the least significant byte of start_pos will
1868 naturally end up the least significant byte of the cookie. */
1869
1870# define OFF_START_POS 0
1871# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1872# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1873# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1874# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1875
1876#endif
1877
1878static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001879textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001880{
1881 unsigned char buffer[COOKIE_BUF_LEN];
1882 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1883 if (cookieLong == NULL)
1884 return -1;
1885
1886 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1887 IS_LITTLE_ENDIAN, 0) < 0) {
1888 Py_DECREF(cookieLong);
1889 return -1;
1890 }
1891 Py_DECREF(cookieLong);
1892
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001893 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1894 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1895 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1896 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1897 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898
1899 return 0;
1900}
1901
1902static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001903textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001904{
1905 unsigned char buffer[COOKIE_BUF_LEN];
1906
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001907 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1908 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1909 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1910 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1911 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001912
1913 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1914}
1915#undef IS_LITTLE_ENDIAN
1916
1917static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001918_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919{
1920 PyObject *res;
1921 /* When seeking to the start of the stream, we call decoder.reset()
1922 rather than decoder.getstate().
1923 This is for a few decoders such as utf-16 for which the state value
1924 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1925 utf-16, that we are expecting a BOM).
1926 */
1927 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1928 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1929 else
1930 res = PyObject_CallMethod(self->decoder, "setstate",
1931 "((yi))", "", cookie->dec_flags);
1932 if (res == NULL)
1933 return -1;
1934 Py_DECREF(res);
1935 return 0;
1936}
1937
Antoine Pitroue4501852009-05-14 18:55:55 +00001938static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001939_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001940{
1941 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001942 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001943 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1944 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1945 self->encoding_start_of_stream = 1;
1946 }
1947 else {
1948 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1949 _PyIO_zero, NULL);
1950 self->encoding_start_of_stream = 0;
1951 }
1952 if (res == NULL)
1953 return -1;
1954 Py_DECREF(res);
1955 return 0;
1956}
1957
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001958static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001959textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001960{
1961 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001962 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964 PyObject *res;
1965 int cmp;
1966
1967 CHECK_INITIALIZED(self);
1968
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1970 return NULL;
1971 CHECK_CLOSED(self);
1972
1973 Py_INCREF(cookieObj);
1974
1975 if (!self->seekable) {
1976 PyErr_SetString(PyExc_IOError,
1977 "underlying stream is not seekable");
1978 goto fail;
1979 }
1980
1981 if (whence == 1) {
1982 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001983 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001984 if (cmp < 0)
1985 goto fail;
1986
1987 if (cmp == 0) {
1988 PyErr_SetString(PyExc_IOError,
1989 "can't do nonzero cur-relative seeks");
1990 goto fail;
1991 }
1992
1993 /* Seeking to the current position should attempt to
1994 * sync the underlying buffer with the current position.
1995 */
1996 Py_DECREF(cookieObj);
1997 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
1998 if (cookieObj == NULL)
1999 goto fail;
2000 }
2001 else if (whence == 2) {
2002 /* seek relative to end of file */
2003
Antoine Pitroue4501852009-05-14 18:55:55 +00002004 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 if (cmp < 0)
2006 goto fail;
2007
2008 if (cmp == 0) {
2009 PyErr_SetString(PyExc_IOError,
2010 "can't do nonzero end-relative seeks");
2011 goto fail;
2012 }
2013
2014 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2015 if (res == NULL)
2016 goto fail;
2017 Py_DECREF(res);
2018
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002019 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020 Py_CLEAR(self->snapshot);
2021 if (self->decoder) {
2022 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2023 if (res == NULL)
2024 goto fail;
2025 Py_DECREF(res);
2026 }
2027
2028 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2029 Py_XDECREF(cookieObj);
2030 return res;
2031 }
2032 else if (whence != 0) {
2033 PyErr_Format(PyExc_ValueError,
2034 "invalid whence (%d, should be 0, 1 or 2)", whence);
2035 goto fail;
2036 }
2037
Antoine Pitroue4501852009-05-14 18:55:55 +00002038 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 if (cmp < 0)
2040 goto fail;
2041
2042 if (cmp == 1) {
2043 PyErr_Format(PyExc_ValueError,
2044 "negative seek position %R", cookieObj);
2045 goto fail;
2046 }
2047
2048 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2049 if (res == NULL)
2050 goto fail;
2051 Py_DECREF(res);
2052
2053 /* The strategy of seek() is to go back to the safe start point
2054 * and replay the effect of read(chars_to_skip) from there.
2055 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002056 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057 goto fail;
2058
2059 /* Seek back to the safe start point. */
2060 posobj = PyLong_FromOff_t(cookie.start_pos);
2061 if (posobj == NULL)
2062 goto fail;
2063 res = PyObject_CallMethodObjArgs(self->buffer,
2064 _PyIO_str_seek, posobj, NULL);
2065 Py_DECREF(posobj);
2066 if (res == NULL)
2067 goto fail;
2068 Py_DECREF(res);
2069
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002070 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 Py_CLEAR(self->snapshot);
2072
2073 /* Restore the decoder to its state from the safe start point. */
2074 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002075 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 goto fail;
2077 }
2078
2079 if (cookie.chars_to_skip) {
2080 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2081 PyObject *input_chunk = PyObject_CallMethod(
2082 self->buffer, "read", "i", cookie.bytes_to_feed);
2083 PyObject *decoded;
2084
2085 if (input_chunk == NULL)
2086 goto fail;
2087
2088 assert (PyBytes_Check(input_chunk));
2089
2090 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2091 if (self->snapshot == NULL) {
2092 Py_DECREF(input_chunk);
2093 goto fail;
2094 }
2095
2096 decoded = PyObject_CallMethod(self->decoder, "decode",
2097 "Oi", input_chunk, (int)cookie.need_eof);
2098
2099 if (decoded == NULL)
2100 goto fail;
2101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002102 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103
2104 /* Skip chars_to_skip of the decoded characters. */
2105 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2106 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2107 goto fail;
2108 }
2109 self->decoded_chars_used = cookie.chars_to_skip;
2110 }
2111 else {
2112 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2113 if (self->snapshot == NULL)
2114 goto fail;
2115 }
2116
Antoine Pitroue4501852009-05-14 18:55:55 +00002117 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2118 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002119 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002120 goto fail;
2121 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002122 return cookieObj;
2123 fail:
2124 Py_XDECREF(cookieObj);
2125 return NULL;
2126
2127}
2128
2129static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002130textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002131{
2132 PyObject *res;
2133 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002134 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002135 PyObject *next_input;
2136 Py_ssize_t chars_to_skip, chars_decoded;
2137 PyObject *saved_state = NULL;
2138 char *input, *input_end;
2139
2140 CHECK_INITIALIZED(self);
2141 CHECK_CLOSED(self);
2142
2143 if (!self->seekable) {
2144 PyErr_SetString(PyExc_IOError,
2145 "underlying stream is not seekable");
2146 goto fail;
2147 }
2148 if (!self->telling) {
2149 PyErr_SetString(PyExc_IOError,
2150 "telling position disabled by next() call");
2151 goto fail;
2152 }
2153
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002154 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155 return NULL;
2156 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2157 if (res == NULL)
2158 goto fail;
2159 Py_DECREF(res);
2160
2161 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2162 if (posobj == NULL)
2163 goto fail;
2164
2165 if (self->decoder == NULL || self->snapshot == NULL) {
2166 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2167 return posobj;
2168 }
2169
2170#if defined(HAVE_LARGEFILE_SUPPORT)
2171 cookie.start_pos = PyLong_AsLongLong(posobj);
2172#else
2173 cookie.start_pos = PyLong_AsLong(posobj);
2174#endif
2175 if (PyErr_Occurred())
2176 goto fail;
2177
2178 /* Skip backward to the snapshot point (see _read_chunk). */
2179 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2180 goto fail;
2181
2182 assert (PyBytes_Check(next_input));
2183
2184 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2185
2186 /* How many decoded characters have been used up since the snapshot? */
2187 if (self->decoded_chars_used == 0) {
2188 /* We haven't moved from the snapshot point. */
2189 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002190 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 }
2192
2193 chars_to_skip = self->decoded_chars_used;
2194
2195 /* Starting from the snapshot position, we will walk the decoder
2196 * forward until it gives us enough decoded characters.
2197 */
2198 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2199 _PyIO_str_getstate, NULL);
2200 if (saved_state == NULL)
2201 goto fail;
2202
2203 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002204 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205 goto fail;
2206
2207 /* Feed the decoder one byte at a time. As we go, note the
2208 * nearest "safe start point" before the current location
2209 * (a point where the decoder has nothing buffered, so seek()
2210 * can safely start from there and advance to this location).
2211 */
2212 chars_decoded = 0;
2213 input = PyBytes_AS_STRING(next_input);
2214 input_end = input + PyBytes_GET_SIZE(next_input);
2215 while (input < input_end) {
2216 PyObject *state;
2217 char *dec_buffer;
2218 Py_ssize_t dec_buffer_len;
2219 int dec_flags;
2220
2221 PyObject *decoded = PyObject_CallMethod(
2222 self->decoder, "decode", "y#", input, 1);
2223 if (decoded == NULL)
2224 goto fail;
2225 assert (PyUnicode_Check(decoded));
2226 chars_decoded += PyUnicode_GET_SIZE(decoded);
2227 Py_DECREF(decoded);
2228
2229 cookie.bytes_to_feed += 1;
2230
2231 state = PyObject_CallMethodObjArgs(self->decoder,
2232 _PyIO_str_getstate, NULL);
2233 if (state == NULL)
2234 goto fail;
2235 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2236 Py_DECREF(state);
2237 goto fail;
2238 }
2239 Py_DECREF(state);
2240
2241 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2242 /* Decoder buffer is empty, so this is a safe start point. */
2243 cookie.start_pos += cookie.bytes_to_feed;
2244 chars_to_skip -= chars_decoded;
2245 cookie.dec_flags = dec_flags;
2246 cookie.bytes_to_feed = 0;
2247 chars_decoded = 0;
2248 }
2249 if (chars_decoded >= chars_to_skip)
2250 break;
2251 input++;
2252 }
2253 if (input == input_end) {
2254 /* We didn't get enough decoded data; signal EOF to get more. */
2255 PyObject *decoded = PyObject_CallMethod(
2256 self->decoder, "decode", "yi", "", /* final = */ 1);
2257 if (decoded == NULL)
2258 goto fail;
2259 assert (PyUnicode_Check(decoded));
2260 chars_decoded += PyUnicode_GET_SIZE(decoded);
2261 Py_DECREF(decoded);
2262 cookie.need_eof = 1;
2263
2264 if (chars_decoded < chars_to_skip) {
2265 PyErr_SetString(PyExc_IOError,
2266 "can't reconstruct logical file position");
2267 goto fail;
2268 }
2269 }
2270
2271 /* finally */
2272 Py_XDECREF(posobj);
2273 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2274 Py_DECREF(saved_state);
2275 if (res == NULL)
2276 return NULL;
2277 Py_DECREF(res);
2278
2279 /* The returned cookie corresponds to the last safe start point. */
2280 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002281 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282
2283 fail:
2284 Py_XDECREF(posobj);
2285 if (saved_state) {
2286 PyObject *type, *value, *traceback;
2287 PyErr_Fetch(&type, &value, &traceback);
2288
2289 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2290 Py_DECREF(saved_state);
2291 if (res == NULL)
2292 return NULL;
2293 Py_DECREF(res);
2294
2295 PyErr_Restore(type, value, traceback);
2296 }
2297 return NULL;
2298}
2299
2300static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002301textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302{
2303 PyObject *pos = Py_None;
2304 PyObject *res;
2305
2306 CHECK_INITIALIZED(self)
2307 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2308 return NULL;
2309 }
2310
2311 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2312 if (res == NULL)
2313 return NULL;
2314 Py_DECREF(res);
2315
2316 if (pos != Py_None) {
2317 res = PyObject_CallMethodObjArgs((PyObject *) self,
2318 _PyIO_str_seek, pos, NULL);
2319 if (res == NULL)
2320 return NULL;
2321 Py_DECREF(res);
2322 }
2323
2324 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2325}
2326
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002327static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002328textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002329{
Antoine Pitrou716c4442009-05-23 19:04:03 +00002330 PyObject *nameobj, *res;
2331
2332 CHECK_INITIALIZED(self);
2333
2334 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2335 if (nameobj == NULL) {
2336 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2337 PyErr_Clear();
2338 else
2339 return NULL;
2340 res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>",
2341 self->encoding);
2342 }
2343 else {
2344 res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>",
2345 nameobj, self->encoding);
2346 Py_DECREF(nameobj);
2347 }
2348 return res;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002349}
2350
2351
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002352/* Inquiries */
2353
2354static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002355textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356{
2357 CHECK_INITIALIZED(self);
2358 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2359}
2360
2361static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002362textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363{
2364 CHECK_INITIALIZED(self);
2365 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2366}
2367
2368static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002369textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370{
2371 CHECK_INITIALIZED(self);
2372 return PyObject_CallMethod(self->buffer, "readable", NULL);
2373}
2374
2375static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002376textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377{
2378 CHECK_INITIALIZED(self);
2379 return PyObject_CallMethod(self->buffer, "writable", NULL);
2380}
2381
2382static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002383textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384{
2385 CHECK_INITIALIZED(self);
2386 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2387}
2388
2389static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002390textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391{
2392 CHECK_INITIALIZED(self);
2393 CHECK_CLOSED(self);
2394 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002395 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 return NULL;
2397 return PyObject_CallMethod(self->buffer, "flush", NULL);
2398}
2399
2400static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002401textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002402{
2403 PyObject *res;
2404 CHECK_INITIALIZED(self);
2405 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2406 if (res == NULL) {
2407 /* If flush() fails, just give up */
2408 PyErr_Clear();
2409 }
2410 else
2411 Py_DECREF(res);
2412
2413 return PyObject_CallMethod(self->buffer, "close", NULL);
2414}
2415
2416static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002417textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418{
2419 PyObject *line;
2420
2421 CHECK_INITIALIZED(self);
2422
2423 self->telling = 0;
2424 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2425 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002426 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002427 }
2428 else {
2429 line = PyObject_CallMethodObjArgs((PyObject *)self,
2430 _PyIO_str_readline, NULL);
2431 if (line && !PyUnicode_Check(line)) {
2432 PyErr_Format(PyExc_IOError,
2433 "readline() should have returned an str object, "
2434 "not '%.200s'", Py_TYPE(line)->tp_name);
2435 Py_DECREF(line);
2436 return NULL;
2437 }
2438 }
2439
2440 if (line == NULL)
2441 return NULL;
2442
2443 if (PyUnicode_GET_SIZE(line) == 0) {
2444 /* Reached EOF or would have blocked */
2445 Py_DECREF(line);
2446 Py_CLEAR(self->snapshot);
2447 self->telling = self->seekable;
2448 return NULL;
2449 }
2450
2451 return line;
2452}
2453
2454static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002455textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002456{
2457 CHECK_INITIALIZED(self);
2458 return PyObject_GetAttrString(self->buffer, "name");
2459}
2460
2461static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002462textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463{
2464 CHECK_INITIALIZED(self);
2465 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2466}
2467
2468static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002469textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470{
2471 PyObject *res;
2472 CHECK_INITIALIZED(self);
2473 if (self->decoder == NULL)
2474 Py_RETURN_NONE;
2475 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2476 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002477 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2478 PyErr_Clear();
2479 Py_RETURN_NONE;
2480 }
2481 else {
2482 return NULL;
2483 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484 }
2485 return res;
2486}
2487
2488static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002489textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002490{
2491 CHECK_INITIALIZED(self);
2492 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2493}
2494
2495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497{
2498 CHECK_INITIALIZED(self);
2499 return PyLong_FromSsize_t(self->chunk_size);
2500}
2501
2502static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 Py_ssize_t n;
2506 CHECK_INITIALIZED_INT(self);
2507 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2508 if (n == -1 && PyErr_Occurred())
2509 return -1;
2510 if (n <= 0) {
2511 PyErr_SetString(PyExc_ValueError,
2512 "a strictly positive integer is required");
2513 return -1;
2514 }
2515 self->chunk_size = n;
2516 return 0;
2517}
2518
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002519static PyMethodDef textiowrapper_methods[] = {
2520 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2521 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2522 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2523 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2524 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2525 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002527 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2528 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2529 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2530 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2531 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002533 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2534 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2535 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536 {NULL, NULL}
2537};
2538
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002539static PyMemberDef textiowrapper_members[] = {
2540 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2541 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2542 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543 {NULL}
2544};
2545
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002546static PyGetSetDef textiowrapper_getset[] = {
2547 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2548 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002549/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2550*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2552 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2553 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2554 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002555 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556};
2557
2558PyTypeObject PyTextIOWrapper_Type = {
2559 PyVarObject_HEAD_INIT(NULL, 0)
2560 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002561 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002563 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564 0, /*tp_print*/
2565 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002566 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002568 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569 0, /*tp_as_number*/
2570 0, /*tp_as_sequence*/
2571 0, /*tp_as_mapping*/
2572 0, /*tp_hash */
2573 0, /*tp_call*/
2574 0, /*tp_str*/
2575 0, /*tp_getattro*/
2576 0, /*tp_setattro*/
2577 0, /*tp_as_buffer*/
2578 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2579 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002580 textiowrapper_doc, /* tp_doc */
2581 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2582 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002584 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002585 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002586 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2587 textiowrapper_methods, /* tp_methods */
2588 textiowrapper_members, /* tp_members */
2589 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 0, /* tp_base */
2591 0, /* tp_dict */
2592 0, /* tp_descr_get */
2593 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002594 offsetof(textio, dict), /*tp_dictoffset*/
2595 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596 0, /* tp_alloc */
2597 PyType_GenericNew, /* tp_new */
2598};