blob: ba28d59e3e3964018f90e515d7d02febd0c35026 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
14/* TextIOBase */
15
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000016PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000017 "Base class for text I/O.\n"
18 "\n"
19 "This class provides a character and line based interface to stream\n"
20 "I/O. There is no readinto method because Python's character strings\n"
21 "are immutable. There is no public constructor.\n"
22 );
23
24static PyObject *
25_unsupported(const char *message)
26{
27 PyErr_SetString(IO_STATE->unsupported_operation, message);
28 return NULL;
29}
30
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000031PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000032 "Separate the underlying buffer from the TextIOBase and return it.\n"
33 "\n"
34 "After the underlying buffer has been detached, the TextIO is in an\n"
35 "unusable state.\n"
36 );
37
38static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000039textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000040{
41 return _unsupported("detach");
42}
43
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000044PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000045 "Read at most n characters from stream.\n"
46 "\n"
47 "Read from underlying buffer until we have n characters or we hit EOF.\n"
48 "If n is negative or omitted, read until EOF.\n"
49 );
50
51static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000053{
54 return _unsupported("read");
55}
56
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000058 "Read until newline or EOF.\n"
59 "\n"
60 "Returns an empty string if EOF is hit immediately.\n"
61 );
62
63static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000064textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000065{
66 return _unsupported("readline");
67}
68
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000069PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000070 "Write string to stream.\n"
71 "Returns the number of characters written (which is always equal to\n"
72 "the length of the string).\n"
73 );
74
75static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000076textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000077{
78 return _unsupported("write");
79}
80
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000081PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000082 "Encoding of the text stream.\n"
83 "\n"
84 "Subclasses should override.\n"
85 );
86
87static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000088textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000089{
90 Py_RETURN_NONE;
91}
92
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000093PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000094 "Line endings translated so far.\n"
95 "\n"
96 "Only line endings translated during reading are considered.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103{
104 Py_RETURN_NONE;
105}
106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000107PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000108 "The error setting of the decoder or encoder.\n"
109 "\n"
110 "Subclasses should override.\n"
111 );
112
113static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000115{
116 Py_RETURN_NONE;
117}
118
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000119
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120static PyMethodDef textiobase_methods[] = {
121 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
122 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
123 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
124 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125 {NULL, NULL}
126};
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128static PyGetSetDef textiobase_getset[] = {
129 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
130 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
131 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000132 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000133};
134
135PyTypeObject PyTextIOBase_Type = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "_io._TextIOBase", /*tp_name*/
138 0, /*tp_basicsize*/
139 0, /*tp_itemsize*/
140 0, /*tp_dealloc*/
141 0, /*tp_print*/
142 0, /*tp_getattr*/
143 0, /*tp_setattr*/
144 0, /*tp_compare */
145 0, /*tp_repr*/
146 0, /*tp_as_number*/
147 0, /*tp_as_sequence*/
148 0, /*tp_as_mapping*/
149 0, /*tp_hash */
150 0, /*tp_call*/
151 0, /*tp_str*/
152 0, /*tp_getattro*/
153 0, /*tp_setattro*/
154 0, /*tp_as_buffer*/
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000156 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 0, /* tp_iter */
162 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000163 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000164 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000165 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000166 &PyIOBase_Type, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 0, /* tp_new */
174};
175
176
177/* IncrementalNewlineDecoder */
178
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000179PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000180 "Codec used when reading a file in universal newlines mode. It wraps\n"
181 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
182 "records the types of newlines encountered. When used with\n"
183 "translate=False, it ensures that the newline sequence is returned in\n"
184 "one piece. When used with decoder=None, it expects unicode strings as\n"
185 "decode input and translates newlines without first invoking an external\n"
186 "decoder.\n"
187 );
188
189typedef struct {
190 PyObject_HEAD
191 PyObject *decoder;
192 PyObject *errors;
Antoine Pitrou384c9202009-09-21 21:42:29 +0000193 signed int pendingcr: 1;
194 signed int translate: 1;
195 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000196} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000197
198static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000199incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000200 PyObject *args, PyObject *kwds)
201{
202 PyObject *decoder;
203 int translate;
204 PyObject *errors = NULL;
205 char *kwlist[] = {"decoder", "translate", "errors", NULL};
206
207 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
208 kwlist, &decoder, &translate, &errors))
209 return -1;
210
211 self->decoder = decoder;
212 Py_INCREF(decoder);
213
214 if (errors == NULL) {
215 self->errors = PyUnicode_FromString("strict");
216 if (self->errors == NULL)
217 return -1;
218 }
219 else {
220 Py_INCREF(errors);
221 self->errors = errors;
222 }
223
224 self->translate = translate;
225 self->seennl = 0;
226 self->pendingcr = 0;
227
228 return 0;
229}
230
231static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000232incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000233{
234 Py_CLEAR(self->decoder);
235 Py_CLEAR(self->errors);
236 Py_TYPE(self)->tp_free((PyObject *)self);
237}
238
239#define SEEN_CR 1
240#define SEEN_LF 2
241#define SEEN_CRLF 4
242#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
243
244PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000245_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000246 PyObject *input, int final)
247{
248 PyObject *output;
249 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251
252 if (self->decoder == NULL) {
253 PyErr_SetString(PyExc_ValueError,
254 "IncrementalNewlineDecoder.__init__ not called");
255 return NULL;
256 }
257
258 /* decode input (with the eventual \r from a previous pass) */
259 if (self->decoder != Py_None) {
260 output = PyObject_CallMethodObjArgs(self->decoder,
261 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
262 }
263 else {
264 output = input;
265 Py_INCREF(output);
266 }
267
268 if (output == NULL)
269 return NULL;
270
271 if (!PyUnicode_Check(output)) {
272 PyErr_SetString(PyExc_TypeError,
273 "decoder should return a string result");
274 goto error;
275 }
276
277 output_len = PyUnicode_GET_SIZE(output);
278 if (self->pendingcr && (final || output_len > 0)) {
279 Py_UNICODE *out;
280 PyObject *modified = PyUnicode_FromUnicode(NULL, output_len + 1);
281 if (modified == NULL)
282 goto error;
283 out = PyUnicode_AS_UNICODE(modified);
284 out[0] = '\r';
285 memcpy(out + 1, PyUnicode_AS_UNICODE(output),
286 output_len * sizeof(Py_UNICODE));
287 Py_DECREF(output);
288 output = modified;
289 self->pendingcr = 0;
290 output_len++;
291 }
292
293 /* retain last \r even when not translating data:
294 * then readline() is sure to get \r\n in one pass
295 */
296 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000297 if (output_len > 0
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000298 && PyUnicode_AS_UNICODE(output)[output_len - 1] == '\r') {
299
300 if (Py_REFCNT(output) == 1) {
301 if (PyUnicode_Resize(&output, output_len - 1) < 0)
302 goto error;
303 }
304 else {
305 PyObject *modified = PyUnicode_FromUnicode(
306 PyUnicode_AS_UNICODE(output),
307 output_len - 1);
308 if (modified == NULL)
309 goto error;
310 Py_DECREF(output);
311 output = modified;
312 }
313 self->pendingcr = 1;
314 }
315 }
316
317 /* Record which newlines are read and do newline translation if desired,
318 all in one pass. */
319 {
320 Py_UNICODE *in_str;
321 Py_ssize_t len;
322 int seennl = self->seennl;
323 int only_lf = 0;
324
325 in_str = PyUnicode_AS_UNICODE(output);
326 len = PyUnicode_GET_SIZE(output);
327
328 if (len == 0)
329 return output;
330
331 /* If, up to now, newlines are consistently \n, do a quick check
332 for the \r *byte* with the libc's optimized memchr.
333 */
334 if (seennl == SEEN_LF || seennl == 0) {
Antoine Pitrou0e941892009-03-06 23:57:20 +0000335 only_lf = (memchr(in_str, '\r', len * sizeof(Py_UNICODE)) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000336 }
337
Antoine Pitrou66913e22009-03-06 23:40:56 +0000338 if (only_lf) {
339 /* If not already seen, quick scan for a possible "\n" character.
340 (there's nothing else to be done, even when in translation mode)
341 */
342 if (seennl == 0 &&
343 memchr(in_str, '\n', len * sizeof(Py_UNICODE)) != NULL) {
344 Py_UNICODE *s, *end;
345 s = in_str;
346 end = in_str + len;
347 for (;;) {
348 Py_UNICODE c;
349 /* Fast loop for non-control characters */
350 while (*s > '\n')
351 s++;
352 c = *s++;
353 if (c == '\n') {
354 seennl |= SEEN_LF;
355 break;
356 }
357 if (s > end)
358 break;
359 }
360 }
361 /* Finished: we have scanned for newlines, and none of them
362 need translating */
363 }
364 else if (!self->translate) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000365 Py_UNICODE *s, *end;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000366 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367 if (seennl == SEEN_ALL)
368 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 s = in_str;
370 end = in_str + len;
371 for (;;) {
372 Py_UNICODE c;
373 /* Fast loop for non-control characters */
374 while (*s > '\r')
375 s++;
376 c = *s++;
377 if (c == '\n')
378 seennl |= SEEN_LF;
379 else if (c == '\r') {
380 if (*s == '\n') {
381 seennl |= SEEN_CRLF;
382 s++;
383 }
384 else
385 seennl |= SEEN_CR;
386 }
387 if (s > end)
388 break;
389 if (seennl == SEEN_ALL)
390 break;
391 }
392 endscan:
393 ;
394 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000395 else {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 PyObject *translated = NULL;
397 Py_UNICODE *out_str;
398 Py_UNICODE *in, *out, *end;
399 if (Py_REFCNT(output) != 1) {
400 /* We could try to optimize this so that we only do a copy
401 when there is something to translate. On the other hand,
402 most decoders should only output non-shared strings, i.e.
403 translation is done in place. */
404 translated = PyUnicode_FromUnicode(NULL, len);
405 if (translated == NULL)
406 goto error;
407 assert(Py_REFCNT(translated) == 1);
408 memcpy(PyUnicode_AS_UNICODE(translated),
409 PyUnicode_AS_UNICODE(output),
410 len * sizeof(Py_UNICODE));
411 }
412 else {
413 translated = output;
414 }
415 out_str = PyUnicode_AS_UNICODE(translated);
416 in = in_str;
417 out = out_str;
418 end = in_str + len;
419 for (;;) {
420 Py_UNICODE c;
421 /* Fast loop for non-control characters */
422 while ((c = *in++) > '\r')
423 *out++ = c;
424 if (c == '\n') {
425 *out++ = c;
426 seennl |= SEEN_LF;
427 continue;
428 }
429 if (c == '\r') {
430 if (*in == '\n') {
431 in++;
432 seennl |= SEEN_CRLF;
433 }
434 else
435 seennl |= SEEN_CR;
436 *out++ = '\n';
437 continue;
438 }
439 if (in > end)
440 break;
441 *out++ = c;
442 }
443 if (translated != output) {
444 Py_DECREF(output);
445 output = translated;
446 }
447 if (out - out_str != len) {
448 if (PyUnicode_Resize(&output, out - out_str) < 0)
449 goto error;
450 }
451 }
452 self->seennl |= seennl;
453 }
454
455 return output;
456
457 error:
458 Py_DECREF(output);
459 return NULL;
460}
461
462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000463incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 PyObject *args, PyObject *kwds)
465{
466 char *kwlist[] = {"input", "final", NULL};
467 PyObject *input;
468 int final = 0;
469
470 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
471 kwlist, &input, &final))
472 return NULL;
473 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478{
479 PyObject *buffer;
480 unsigned PY_LONG_LONG flag;
481
482 if (self->decoder != Py_None) {
483 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
484 _PyIO_str_getstate, NULL);
485 if (state == NULL)
486 return NULL;
487 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
488 Py_DECREF(state);
489 return NULL;
490 }
491 Py_INCREF(buffer);
492 Py_DECREF(state);
493 }
494 else {
495 buffer = PyBytes_FromString("");
496 flag = 0;
497 }
498 flag <<= 1;
499 if (self->pendingcr)
500 flag |= 1;
501 return Py_BuildValue("NK", buffer, flag);
502}
503
504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000505incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000506{
507 PyObject *buffer;
508 unsigned PY_LONG_LONG flag;
509
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
511 return NULL;
512
513 self->pendingcr = (int) flag & 1;
514 flag >>= 1;
515
516 if (self->decoder != Py_None)
517 return PyObject_CallMethod(self->decoder,
518 "setstate", "((OK))", buffer, flag);
519 else
520 Py_RETURN_NONE;
521}
522
523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000524incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000525{
526 self->seennl = 0;
527 self->pendingcr = 0;
528 if (self->decoder != Py_None)
529 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
530 else
531 Py_RETURN_NONE;
532}
533
534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000535incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000536{
537 switch (self->seennl) {
538 case SEEN_CR:
539 return PyUnicode_FromString("\r");
540 case SEEN_LF:
541 return PyUnicode_FromString("\n");
542 case SEEN_CRLF:
543 return PyUnicode_FromString("\r\n");
544 case SEEN_CR | SEEN_LF:
545 return Py_BuildValue("ss", "\r", "\n");
546 case SEEN_CR | SEEN_CRLF:
547 return Py_BuildValue("ss", "\r", "\r\n");
548 case SEEN_LF | SEEN_CRLF:
549 return Py_BuildValue("ss", "\n", "\r\n");
550 case SEEN_CR | SEEN_LF | SEEN_CRLF:
551 return Py_BuildValue("sss", "\r", "\n", "\r\n");
552 default:
553 Py_RETURN_NONE;
554 }
555
556}
557
558
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000559static PyMethodDef incrementalnewlinedecoder_methods[] = {
560 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
561 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
562 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
563 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000564 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000565};
566
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000567static PyGetSetDef incrementalnewlinedecoder_getset[] = {
568 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
572PyTypeObject PyIncrementalNewlineDecoder_Type = {
573 PyVarObject_HEAD_INIT(NULL, 0)
574 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000575 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000576 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578 0, /*tp_print*/
579 0, /*tp_getattr*/
580 0, /*tp_setattr*/
581 0, /*tp_compare */
582 0, /*tp_repr*/
583 0, /*tp_as_number*/
584 0, /*tp_as_sequence*/
585 0, /*tp_as_mapping*/
586 0, /*tp_hash */
587 0, /*tp_call*/
588 0, /*tp_str*/
589 0, /*tp_getattro*/
590 0, /*tp_setattro*/
591 0, /*tp_as_buffer*/
592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000594 0, /* tp_traverse */
595 0, /* tp_clear */
596 0, /* tp_richcompare */
597 0, /*tp_weaklistoffset*/
598 0, /* tp_iter */
599 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_base */
604 0, /* tp_dict */
605 0, /* tp_descr_get */
606 0, /* tp_descr_set */
607 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000608 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000609 0, /* tp_alloc */
610 PyType_GenericNew, /* tp_new */
611};
612
613
614/* TextIOWrapper */
615
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 "Character and line based layer over a BufferedIOBase object, buffer.\n"
618 "\n"
619 "encoding gives the name of the encoding that the stream will be\n"
620 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
621 "\n"
622 "errors determines the strictness of encoding and decoding (see the\n"
623 "codecs.register) and defaults to \"strict\".\n"
624 "\n"
625 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
626 "handling of line endings. If it is None, universal newlines is\n"
627 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
628 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
629 "caller. Conversely, on output, '\\n' is translated to the system\n"
630 "default line seperator, os.linesep. If newline is any other of its\n"
631 "legal values, that newline becomes the newline when the file is read\n"
632 "and it is returned untranslated. On output, '\\n' is converted to the\n"
633 "newline.\n"
634 "\n"
635 "If line_buffering is True, a call to flush is implied when a call to\n"
636 "write contains a newline character."
637 );
638
639typedef PyObject *
640 (*encodefunc_t)(PyObject *, PyObject *);
641
642typedef struct
643{
644 PyObject_HEAD
645 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000646 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000647 Py_ssize_t chunk_size;
648 PyObject *buffer;
649 PyObject *encoding;
650 PyObject *encoder;
651 PyObject *decoder;
652 PyObject *readnl;
653 PyObject *errors;
654 const char *writenl; /* utf-8 encoded, NULL stands for \n */
655 char line_buffering;
656 char readuniversal;
657 char readtranslate;
658 char writetranslate;
659 char seekable;
660 char telling;
661 /* Specialized encoding func (see below) */
662 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000663 /* Whether or not it's the start of the stream */
664 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665
666 /* Reads and writes are internally buffered in order to speed things up.
667 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000668
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 Please also note that text to be written is first encoded before being
670 buffered. This is necessary so that encoding errors are immediately
671 reported to the caller, but it unfortunately means that the
672 IncrementalEncoder (whose encode() method is always written in Python)
673 becomes a bottleneck for small writes.
674 */
675 PyObject *decoded_chars; /* buffer for text returned from decoder */
676 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
677 PyObject *pending_bytes; /* list of bytes objects waiting to be
678 written, or NULL */
679 Py_ssize_t pending_bytes_count;
680 PyObject *snapshot;
681 /* snapshot is either None, or a tuple (dec_flags, next_input) where
682 * dec_flags is the second (integer) item of the decoder state and
683 * next_input is the chunk of input bytes that comes next after the
684 * snapshot point. We use this to reconstruct decoder states in tell().
685 */
686
687 /* Cache raw object if it's a FileIO object */
688 PyObject *raw;
689
690 PyObject *weakreflist;
691 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000692} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693
694
695/* A couple of specialized cases in order to bypass the slow incremental
696 encoding methods for the most popular encodings. */
697
698static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000699ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000700{
701 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(text),
702 PyUnicode_GET_SIZE(text),
703 PyBytes_AS_STRING(self->errors));
704}
705
706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000707utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000708{
709 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
710 PyUnicode_GET_SIZE(text),
711 PyBytes_AS_STRING(self->errors), 1);
712}
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
717 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
718 PyUnicode_GET_SIZE(text),
719 PyBytes_AS_STRING(self->errors), -1);
720}
721
722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000723utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724{
Antoine Pitroue4501852009-05-14 18:55:55 +0000725 if (!self->encoding_start_of_stream) {
726 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000728 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000730 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000732 }
733 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
734 PyUnicode_GET_SIZE(text),
735 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
Antoine Pitroue4501852009-05-14 18:55:55 +0000738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000740{
741 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
742 PyUnicode_GET_SIZE(text),
743 PyBytes_AS_STRING(self->errors), 1);
744}
745
746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000747utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000748{
749 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
750 PyUnicode_GET_SIZE(text),
751 PyBytes_AS_STRING(self->errors), -1);
752}
753
754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
757 if (!self->encoding_start_of_stream) {
758 /* Skip the BOM and use native byte ordering */
759#if defined(WORDS_BIGENDIAN)
760 return utf32be_encode(self, text);
761#else
762 return utf32le_encode(self, text);
763#endif
764 }
765 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
766 PyUnicode_GET_SIZE(text),
767 PyBytes_AS_STRING(self->errors), 0);
768}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769
770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000772{
773 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(text),
774 PyUnicode_GET_SIZE(text),
775 PyBytes_AS_STRING(self->errors));
776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780{
781 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(text),
782 PyUnicode_GET_SIZE(text),
783 PyBytes_AS_STRING(self->errors));
784}
785
786/* Map normalized encoding names onto the specialized encoding funcs */
787
788typedef struct {
789 const char *name;
790 encodefunc_t encodefunc;
791} encodefuncentry;
792
Antoine Pitrou24f36292009-03-28 22:16:42 +0000793static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794 {"ascii", (encodefunc_t) ascii_encode},
795 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000796 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797 {"utf-16-be", (encodefunc_t) utf16be_encode},
798 {"utf-16-le", (encodefunc_t) utf16le_encode},
799 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000800 {"utf-32-be", (encodefunc_t) utf32be_encode},
801 {"utf-32-le", (encodefunc_t) utf32le_encode},
802 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {NULL, NULL}
804};
805
806
807static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000808textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809{
810 char *kwlist[] = {"buffer", "encoding", "errors",
811 "newline", "line_buffering",
812 NULL};
813 PyObject *buffer, *raw;
814 char *encoding = NULL;
815 char *errors = NULL;
816 char *newline = NULL;
817 int line_buffering = 0;
818 _PyIO_State *state = IO_STATE;
819
820 PyObject *res;
821 int r;
822
823 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000824 self->detached = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000825 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
826 kwlist, &buffer, &encoding, &errors,
827 &newline, &line_buffering))
828 return -1;
829
830 if (newline && newline[0] != '\0'
831 && !(newline[0] == '\n' && newline[1] == '\0')
832 && !(newline[0] == '\r' && newline[1] == '\0')
833 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
834 PyErr_Format(PyExc_ValueError,
835 "illegal newline value: %s", newline);
836 return -1;
837 }
838
839 Py_CLEAR(self->buffer);
840 Py_CLEAR(self->encoding);
841 Py_CLEAR(self->encoder);
842 Py_CLEAR(self->decoder);
843 Py_CLEAR(self->readnl);
844 Py_CLEAR(self->decoded_chars);
845 Py_CLEAR(self->pending_bytes);
846 Py_CLEAR(self->snapshot);
847 Py_CLEAR(self->errors);
848 Py_CLEAR(self->raw);
849 self->decoded_chars_used = 0;
850 self->pending_bytes_count = 0;
851 self->encodefunc = NULL;
852
853 if (encoding == NULL) {
854 /* Try os.device_encoding(fileno) */
855 PyObject *fileno;
856 fileno = PyObject_CallMethod(buffer, "fileno", NULL);
857 /* Ignore only AttributeError and UnsupportedOperation */
858 if (fileno == NULL) {
859 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
860 PyErr_ExceptionMatches(state->unsupported_operation)) {
861 PyErr_Clear();
862 }
863 else {
864 goto error;
865 }
866 }
867 else {
868 self->encoding = PyObject_CallMethod(state->os_module,
869 "device_encoding",
870 "N", fileno);
871 if (self->encoding == NULL)
872 goto error;
873 else if (!PyUnicode_Check(self->encoding))
874 Py_CLEAR(self->encoding);
875 }
876 }
877 if (encoding == NULL && self->encoding == NULL) {
878 if (state->locale_module == NULL) {
879 state->locale_module = PyImport_ImportModule("locale");
880 if (state->locale_module == NULL)
881 goto catch_ImportError;
882 else
883 goto use_locale;
884 }
885 else {
886 use_locale:
887 self->encoding = PyObject_CallMethod(
888 state->locale_module, "getpreferredencoding", NULL);
889 if (self->encoding == NULL) {
890 catch_ImportError:
891 /*
892 Importing locale can raise a ImportError because of
893 _functools, and locale.getpreferredencoding can raise a
894 ImportError if _locale is not available. These will happen
895 during module building.
896 */
897 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
898 PyErr_Clear();
899 self->encoding = PyUnicode_FromString("ascii");
900 }
901 else
902 goto error;
903 }
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
908 if (self->encoding != NULL)
909 encoding = _PyUnicode_AsString(self->encoding);
910 else if (encoding != NULL) {
911 self->encoding = PyUnicode_FromString(encoding);
912 if (self->encoding == NULL)
913 goto error;
914 }
915 else {
916 PyErr_SetString(PyExc_IOError,
917 "could not determine default encoding");
918 }
919
920 if (errors == NULL)
921 errors = "strict";
922 self->errors = PyBytes_FromString(errors);
923 if (self->errors == NULL)
924 goto error;
925
926 self->chunk_size = 8192;
927 self->readuniversal = (newline == NULL || newline[0] == '\0');
928 self->line_buffering = line_buffering;
929 self->readtranslate = (newline == NULL);
930 if (newline) {
931 self->readnl = PyUnicode_FromString(newline);
932 if (self->readnl == NULL)
933 return -1;
934 }
935 self->writetranslate = (newline == NULL || newline[0] != '\0');
936 if (!self->readuniversal && self->readnl) {
937 self->writenl = _PyUnicode_AsString(self->readnl);
938 if (!strcmp(self->writenl, "\n"))
939 self->writenl = NULL;
940 }
941#ifdef MS_WINDOWS
942 else
943 self->writenl = "\r\n";
944#endif
945
946 /* Build the decoder object */
947 res = PyObject_CallMethod(buffer, "readable", NULL);
948 if (res == NULL)
949 goto error;
950 r = PyObject_IsTrue(res);
951 Py_DECREF(res);
952 if (r == -1)
953 goto error;
954 if (r == 1) {
955 self->decoder = PyCodec_IncrementalDecoder(
956 encoding, errors);
957 if (self->decoder == NULL)
958 goto error;
959
960 if (self->readuniversal) {
961 PyObject *incrementalDecoder = PyObject_CallFunction(
962 (PyObject *)&PyIncrementalNewlineDecoder_Type,
963 "Oi", self->decoder, (int)self->readtranslate);
964 if (incrementalDecoder == NULL)
965 goto error;
966 Py_CLEAR(self->decoder);
967 self->decoder = incrementalDecoder;
968 }
969 }
970
971 /* Build the encoder object */
972 res = PyObject_CallMethod(buffer, "writable", NULL);
973 if (res == NULL)
974 goto error;
975 r = PyObject_IsTrue(res);
976 Py_DECREF(res);
977 if (r == -1)
978 goto error;
979 if (r == 1) {
980 PyObject *ci;
981 self->encoder = PyCodec_IncrementalEncoder(
982 encoding, errors);
983 if (self->encoder == NULL)
984 goto error;
985 /* Get the normalized named of the codec */
986 ci = _PyCodec_Lookup(encoding);
987 if (ci == NULL)
988 goto error;
989 res = PyObject_GetAttrString(ci, "name");
990 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +0000991 if (res == NULL) {
992 if (PyErr_ExceptionMatches(PyExc_AttributeError))
993 PyErr_Clear();
994 else
995 goto error;
996 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000997 else if (PyUnicode_Check(res)) {
998 encodefuncentry *e = encodefuncs;
999 while (e->name != NULL) {
1000 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1001 self->encodefunc = e->encodefunc;
1002 break;
1003 }
1004 e++;
1005 }
1006 }
1007 Py_XDECREF(res);
1008 }
1009
1010 self->buffer = buffer;
1011 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001012
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1014 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1015 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1016 raw = PyObject_GetAttrString(buffer, "raw");
1017 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001018 if (raw == NULL) {
1019 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1020 PyErr_Clear();
1021 else
1022 goto error;
1023 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001024 else if (Py_TYPE(raw) == &PyFileIO_Type)
1025 self->raw = raw;
1026 else
1027 Py_DECREF(raw);
1028 }
1029
1030 res = PyObject_CallMethod(buffer, "seekable", NULL);
1031 if (res == NULL)
1032 goto error;
1033 self->seekable = self->telling = PyObject_IsTrue(res);
1034 Py_DECREF(res);
1035
Antoine Pitroue4501852009-05-14 18:55:55 +00001036 self->encoding_start_of_stream = 0;
1037 if (self->seekable && self->encoder) {
1038 PyObject *cookieObj;
1039 int cmp;
1040
1041 self->encoding_start_of_stream = 1;
1042
1043 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1044 if (cookieObj == NULL)
1045 goto error;
1046
1047 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1048 Py_DECREF(cookieObj);
1049 if (cmp < 0) {
1050 goto error;
1051 }
1052
1053 if (cmp == 0) {
1054 self->encoding_start_of_stream = 0;
1055 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1056 _PyIO_zero, NULL);
1057 if (res == NULL)
1058 goto error;
1059 Py_DECREF(res);
1060 }
1061 }
1062
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001063 self->ok = 1;
1064 return 0;
1065
1066 error:
1067 return -1;
1068}
1069
1070static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001071_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001072{
1073 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1074 return -1;
1075 self->ok = 0;
1076 Py_CLEAR(self->buffer);
1077 Py_CLEAR(self->encoding);
1078 Py_CLEAR(self->encoder);
1079 Py_CLEAR(self->decoder);
1080 Py_CLEAR(self->readnl);
1081 Py_CLEAR(self->decoded_chars);
1082 Py_CLEAR(self->pending_bytes);
1083 Py_CLEAR(self->snapshot);
1084 Py_CLEAR(self->errors);
1085 Py_CLEAR(self->raw);
1086 return 0;
1087}
1088
1089static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001090textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001092 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093 return;
1094 _PyObject_GC_UNTRACK(self);
1095 if (self->weakreflist != NULL)
1096 PyObject_ClearWeakRefs((PyObject *)self);
1097 Py_CLEAR(self->dict);
1098 Py_TYPE(self)->tp_free((PyObject *)self);
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 Py_VISIT(self->buffer);
1105 Py_VISIT(self->encoding);
1106 Py_VISIT(self->encoder);
1107 Py_VISIT(self->decoder);
1108 Py_VISIT(self->readnl);
1109 Py_VISIT(self->decoded_chars);
1110 Py_VISIT(self->pending_bytes);
1111 Py_VISIT(self->snapshot);
1112 Py_VISIT(self->errors);
1113 Py_VISIT(self->raw);
1114
1115 Py_VISIT(self->dict);
1116 return 0;
1117}
1118
1119static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001120textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001121{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001122 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123 return -1;
1124 Py_CLEAR(self->dict);
1125 return 0;
1126}
1127
1128static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001129textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001130
1131/* This macro takes some shortcuts to make the common case faster. */
1132#define CHECK_CLOSED(self) \
1133 do { \
1134 int r; \
1135 PyObject *_res; \
1136 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1137 if (self->raw != NULL) \
1138 r = _PyFileIO_closed(self->raw); \
1139 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001140 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 if (_res == NULL) \
1142 return NULL; \
1143 r = PyObject_IsTrue(_res); \
1144 Py_DECREF(_res); \
1145 if (r < 0) \
1146 return NULL; \
1147 } \
1148 if (r > 0) { \
1149 PyErr_SetString(PyExc_ValueError, \
1150 "I/O operation on closed file."); \
1151 return NULL; \
1152 } \
1153 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return NULL; \
1156 } while (0)
1157
1158#define CHECK_INITIALIZED(self) \
1159 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001160 if (self->detached) { \
1161 PyErr_SetString(PyExc_ValueError, \
1162 "underlying buffer has been detached"); \
1163 } else { \
1164 PyErr_SetString(PyExc_ValueError, \
1165 "I/O operation on uninitialized object"); \
1166 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001167 return NULL; \
1168 }
1169
1170#define CHECK_INITIALIZED_INT(self) \
1171 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001172 if (self->detached) { \
1173 PyErr_SetString(PyExc_ValueError, \
1174 "underlying buffer has been detached"); \
1175 } else { \
1176 PyErr_SetString(PyExc_ValueError, \
1177 "I/O operation on uninitialized object"); \
1178 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001179 return -1; \
1180 }
1181
1182
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001183static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001184textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001185{
1186 PyObject *buffer, *res;
1187 CHECK_INITIALIZED(self);
1188 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1189 if (res == NULL)
1190 return NULL;
1191 Py_DECREF(res);
1192 buffer = self->buffer;
1193 self->buffer = NULL;
1194 self->detached = 1;
1195 self->ok = 0;
1196 return buffer;
1197}
1198
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199Py_LOCAL_INLINE(const Py_UNICODE *)
1200findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
1201{
1202 /* like wcschr, but doesn't stop at NULL characters */
1203 while (size-- > 0) {
1204 if (*s == ch)
1205 return s;
1206 s++;
1207 }
1208 return NULL;
1209}
1210
Antoine Pitrou24f36292009-03-28 22:16:42 +00001211/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001212 underlying buffered object, though. */
1213static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001214_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001215{
Amaury Forgeot d'Arcaf0312a2009-08-29 23:19:16 +00001216 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001217
1218 if (self->pending_bytes == NULL)
1219 return 0;
Amaury Forgeot d'Arcaf0312a2009-08-29 23:19:16 +00001220
1221 pending = self->pending_bytes;
1222 Py_INCREF(pending);
1223 self->pending_bytes_count = 0;
1224 Py_CLEAR(self->pending_bytes);
1225
1226 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1227 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228 if (b == NULL)
1229 return -1;
1230 ret = PyObject_CallMethodObjArgs(self->buffer,
1231 _PyIO_str_write, b, NULL);
1232 Py_DECREF(b);
1233 if (ret == NULL)
1234 return -1;
1235 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 return 0;
1237}
1238
1239static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001240textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241{
1242 PyObject *ret;
1243 PyObject *text; /* owned reference */
1244 PyObject *b;
1245 Py_ssize_t textlen;
1246 int haslf = 0;
1247 int needflush = 0;
1248
1249 CHECK_INITIALIZED(self);
1250
1251 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1252 return NULL;
1253 }
1254
1255 CHECK_CLOSED(self);
1256
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001257 if (self->encoder == NULL) {
1258 PyErr_SetString(PyExc_IOError, "not writable");
1259 return NULL;
1260 }
1261
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001262 Py_INCREF(text);
1263
1264 textlen = PyUnicode_GetSize(text);
1265
1266 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1267 if (findchar(PyUnicode_AS_UNICODE(text),
1268 PyUnicode_GET_SIZE(text), '\n'))
1269 haslf = 1;
1270
1271 if (haslf && self->writetranslate && self->writenl != NULL) {
1272 PyObject *newtext = PyObject_CallMethod(
1273 text, "replace", "ss", "\n", self->writenl);
1274 Py_DECREF(text);
1275 if (newtext == NULL)
1276 return NULL;
1277 text = newtext;
1278 }
1279
1280 if (self->line_buffering &&
1281 (haslf ||
1282 findchar(PyUnicode_AS_UNICODE(text),
1283 PyUnicode_GET_SIZE(text), '\r')))
1284 needflush = 1;
1285
1286 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001287 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001288 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001289 self->encoding_start_of_stream = 0;
1290 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 else
1292 b = PyObject_CallMethodObjArgs(self->encoder,
1293 _PyIO_str_encode, text, NULL);
1294 Py_DECREF(text);
1295 if (b == NULL)
1296 return NULL;
1297
1298 if (self->pending_bytes == NULL) {
1299 self->pending_bytes = PyList_New(0);
1300 if (self->pending_bytes == NULL) {
1301 Py_DECREF(b);
1302 return NULL;
1303 }
1304 self->pending_bytes_count = 0;
1305 }
1306 if (PyList_Append(self->pending_bytes, b) < 0) {
1307 Py_DECREF(b);
1308 return NULL;
1309 }
1310 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1311 Py_DECREF(b);
1312 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001313 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001314 return NULL;
1315 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001316
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 if (needflush) {
1318 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1319 if (ret == NULL)
1320 return NULL;
1321 Py_DECREF(ret);
1322 }
1323
1324 Py_CLEAR(self->snapshot);
1325
1326 if (self->decoder) {
1327 ret = PyObject_CallMethod(self->decoder, "reset", NULL);
1328 if (ret == NULL)
1329 return NULL;
1330 Py_DECREF(ret);
1331 }
1332
1333 return PyLong_FromSsize_t(textlen);
1334}
1335
1336/* Steal a reference to chars and store it in the decoded_char buffer;
1337 */
1338static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001339textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001340{
1341 Py_CLEAR(self->decoded_chars);
1342 self->decoded_chars = chars;
1343 self->decoded_chars_used = 0;
1344}
1345
1346static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348{
1349 PyObject *chars;
1350 Py_ssize_t avail;
1351
1352 if (self->decoded_chars == NULL)
1353 return PyUnicode_FromStringAndSize(NULL, 0);
1354
1355 avail = (PyUnicode_GET_SIZE(self->decoded_chars)
1356 - self->decoded_chars_used);
1357
1358 assert(avail >= 0);
1359
1360 if (n < 0 || n > avail)
1361 n = avail;
1362
1363 if (self->decoded_chars_used > 0 || n < avail) {
1364 chars = PyUnicode_FromUnicode(
1365 PyUnicode_AS_UNICODE(self->decoded_chars)
1366 + self->decoded_chars_used, n);
1367 if (chars == NULL)
1368 return NULL;
1369 }
1370 else {
1371 chars = self->decoded_chars;
1372 Py_INCREF(chars);
1373 }
1374
1375 self->decoded_chars_used += n;
1376 return chars;
1377}
1378
1379/* Read and decode the next chunk of data from the BufferedReader.
1380 */
1381static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001382textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383{
1384 PyObject *dec_buffer = NULL;
1385 PyObject *dec_flags = NULL;
1386 PyObject *input_chunk = NULL;
1387 PyObject *decoded_chars, *chunk_size;
1388 int eof;
1389
1390 /* The return value is True unless EOF was reached. The decoded string is
1391 * placed in self._decoded_chars (replacing its previous value). The
1392 * entire input chunk is sent to the decoder, though some of it may remain
1393 * buffered in the decoder, yet to be converted.
1394 */
1395
1396 if (self->decoder == NULL) {
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001397 PyErr_SetString(PyExc_IOError, "not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 return -1;
1399 }
1400
1401 if (self->telling) {
1402 /* To prepare for tell(), we need to snapshot a point in the file
1403 * where the decoder's input buffer is empty.
1404 */
1405
1406 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1407 _PyIO_str_getstate, NULL);
1408 if (state == NULL)
1409 return -1;
1410 /* Given this, we know there was a valid snapshot point
1411 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1412 */
1413 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1414 Py_DECREF(state);
1415 return -1;
1416 }
1417 Py_INCREF(dec_buffer);
1418 Py_INCREF(dec_flags);
1419 Py_DECREF(state);
1420 }
1421
1422 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1423 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1424 if (chunk_size == NULL)
1425 goto fail;
1426 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
1427 _PyIO_str_read1, chunk_size, NULL);
1428 Py_DECREF(chunk_size);
1429 if (input_chunk == NULL)
1430 goto fail;
1431 assert(PyBytes_Check(input_chunk));
1432
1433 eof = (PyBytes_Size(input_chunk) == 0);
1434
1435 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1436 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1437 self->decoder, input_chunk, eof);
1438 }
1439 else {
1440 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1441 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1442 }
1443
1444 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1445 if (decoded_chars == NULL)
1446 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001447 textiowrapper_set_decoded_chars(self, decoded_chars);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448 if (PyUnicode_GET_SIZE(decoded_chars) > 0)
1449 eof = 0;
1450
1451 if (self->telling) {
1452 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1453 * next input to be decoded is dec_buffer + input_chunk.
1454 */
1455 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1456 if (next_input == NULL)
1457 goto fail;
1458 assert (PyBytes_Check(next_input));
1459 Py_DECREF(dec_buffer);
1460 Py_CLEAR(self->snapshot);
1461 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1462 }
1463 Py_DECREF(input_chunk);
1464
1465 return (eof == 0);
1466
1467 fail:
1468 Py_XDECREF(dec_buffer);
1469 Py_XDECREF(dec_flags);
1470 Py_XDECREF(input_chunk);
1471 return -1;
1472}
1473
1474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001475textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001476{
1477 Py_ssize_t n = -1;
1478 PyObject *result = NULL, *chunks = NULL;
1479
1480 CHECK_INITIALIZED(self);
1481
Benjamin Peterson6b59f772009-12-13 19:30:15 +00001482 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483 return NULL;
1484
1485 CHECK_CLOSED(self);
1486
Benjamin Petersona1b49012009-03-31 23:11:32 +00001487 if (self->decoder == NULL) {
1488 PyErr_SetString(PyExc_IOError, "not readable");
1489 return NULL;
1490 }
1491
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001492 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001493 return NULL;
1494
1495 if (n < 0) {
1496 /* Read everything */
1497 PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
1498 PyObject *decoded;
1499 if (bytes == NULL)
1500 goto fail;
1501 decoded = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_decode,
1502 bytes, Py_True, NULL);
1503 Py_DECREF(bytes);
1504 if (decoded == NULL)
1505 goto fail;
1506
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001507 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508
1509 if (result == NULL) {
1510 Py_DECREF(decoded);
1511 return NULL;
1512 }
1513
1514 PyUnicode_AppendAndDel(&result, decoded);
1515 if (result == NULL)
1516 goto fail;
1517
1518 Py_CLEAR(self->snapshot);
1519 return result;
1520 }
1521 else {
1522 int res = 1;
1523 Py_ssize_t remaining = n;
1524
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001525 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 if (result == NULL)
1527 goto fail;
1528 remaining -= PyUnicode_GET_SIZE(result);
1529
1530 /* Keep reading chunks until we have n characters to return */
1531 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001532 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533 if (res < 0)
1534 goto fail;
1535 if (res == 0) /* EOF */
1536 break;
1537 if (chunks == NULL) {
1538 chunks = PyList_New(0);
1539 if (chunks == NULL)
1540 goto fail;
1541 }
1542 if (PyList_Append(chunks, result) < 0)
1543 goto fail;
1544 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001545 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 if (result == NULL)
1547 goto fail;
1548 remaining -= PyUnicode_GET_SIZE(result);
1549 }
1550 if (chunks != NULL) {
1551 if (result != NULL && PyList_Append(chunks, result) < 0)
1552 goto fail;
1553 Py_CLEAR(result);
1554 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1555 if (result == NULL)
1556 goto fail;
1557 Py_CLEAR(chunks);
1558 }
1559 return result;
1560 }
1561 fail:
1562 Py_XDECREF(result);
1563 Py_XDECREF(chunks);
1564 return NULL;
1565}
1566
1567
1568/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
1569 that is to the NUL character. Otherwise the function will produce
1570 incorrect results. */
1571static Py_UNICODE *
1572find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
1573{
1574 Py_UNICODE *s = start;
1575 for (;;) {
1576 while (*s > ch)
1577 s++;
1578 if (*s == ch)
1579 return s;
1580 if (s == end)
1581 return NULL;
1582 s++;
1583 }
1584}
1585
1586Py_ssize_t
1587_PyIO_find_line_ending(
1588 int translated, int universal, PyObject *readnl,
1589 Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
1590{
1591 Py_ssize_t len = end - start;
1592
1593 if (translated) {
1594 /* Newlines are already translated, only search for \n */
1595 Py_UNICODE *pos = find_control_char(start, end, '\n');
1596 if (pos != NULL)
1597 return pos - start + 1;
1598 else {
1599 *consumed = len;
1600 return -1;
1601 }
1602 }
1603 else if (universal) {
1604 /* Universal newline search. Find any of \r, \r\n, \n
1605 * The decoder ensures that \r\n are not split in two pieces
1606 */
1607 Py_UNICODE *s = start;
1608 for (;;) {
1609 Py_UNICODE ch;
1610 /* Fast path for non-control chars. The loop always ends
1611 since the Py_UNICODE storage is NUL-terminated. */
1612 while (*s > '\r')
1613 s++;
1614 if (s >= end) {
1615 *consumed = len;
1616 return -1;
1617 }
1618 ch = *s++;
1619 if (ch == '\n')
1620 return s - start;
1621 if (ch == '\r') {
1622 if (*s == '\n')
1623 return s - start + 1;
1624 else
1625 return s - start;
1626 }
1627 }
1628 }
1629 else {
1630 /* Non-universal mode. */
1631 Py_ssize_t readnl_len = PyUnicode_GET_SIZE(readnl);
1632 Py_UNICODE *nl = PyUnicode_AS_UNICODE(readnl);
1633 if (readnl_len == 1) {
1634 Py_UNICODE *pos = find_control_char(start, end, nl[0]);
1635 if (pos != NULL)
1636 return pos - start + 1;
1637 *consumed = len;
1638 return -1;
1639 }
1640 else {
1641 Py_UNICODE *s = start;
1642 Py_UNICODE *e = end - readnl_len + 1;
1643 Py_UNICODE *pos;
1644 if (e < s)
1645 e = s;
1646 while (s < e) {
1647 Py_ssize_t i;
1648 Py_UNICODE *pos = find_control_char(s, end, nl[0]);
1649 if (pos == NULL || pos >= e)
1650 break;
1651 for (i = 1; i < readnl_len; i++) {
1652 if (pos[i] != nl[i])
1653 break;
1654 }
1655 if (i == readnl_len)
1656 return pos - start + readnl_len;
1657 s = pos + 1;
1658 }
1659 pos = find_control_char(e, end, nl[0]);
1660 if (pos == NULL)
1661 *consumed = len;
1662 else
1663 *consumed = pos - start;
1664 return -1;
1665 }
1666 }
1667}
1668
1669static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001670_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671{
1672 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1673 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1674 int res;
1675
1676 CHECK_CLOSED(self);
1677
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001678 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 return NULL;
1680
1681 chunked = 0;
1682
1683 while (1) {
1684 Py_UNICODE *ptr;
1685 Py_ssize_t line_len;
1686 Py_ssize_t consumed = 0;
1687
1688 /* First, get some data if necessary */
1689 res = 1;
1690 while (!self->decoded_chars ||
1691 !PyUnicode_GET_SIZE(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001692 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (res < 0)
1694 goto error;
1695 if (res == 0)
1696 break;
1697 }
1698 if (res == 0) {
1699 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001700 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 Py_CLEAR(self->snapshot);
1702 start = endpos = offset_to_buffer = 0;
1703 break;
1704 }
1705
1706 if (remaining == NULL) {
1707 line = self->decoded_chars;
1708 start = self->decoded_chars_used;
1709 offset_to_buffer = 0;
1710 Py_INCREF(line);
1711 }
1712 else {
1713 assert(self->decoded_chars_used == 0);
1714 line = PyUnicode_Concat(remaining, self->decoded_chars);
1715 start = 0;
1716 offset_to_buffer = PyUnicode_GET_SIZE(remaining);
1717 Py_CLEAR(remaining);
1718 if (line == NULL)
1719 goto error;
1720 }
1721
1722 ptr = PyUnicode_AS_UNICODE(line);
1723 line_len = PyUnicode_GET_SIZE(line);
1724
1725 endpos = _PyIO_find_line_ending(
1726 self->readtranslate, self->readuniversal, self->readnl,
1727 ptr + start, ptr + line_len, &consumed);
1728 if (endpos >= 0) {
1729 endpos += start;
1730 if (limit >= 0 && (endpos - start) + chunked >= limit)
1731 endpos = start + limit - chunked;
1732 break;
1733 }
1734
1735 /* We can put aside up to `endpos` */
1736 endpos = consumed + start;
1737 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1738 /* Didn't find line ending, but reached length limit */
1739 endpos = start + limit - chunked;
1740 break;
1741 }
1742
1743 if (endpos > start) {
1744 /* No line ending seen yet - put aside current data */
1745 PyObject *s;
1746 if (chunks == NULL) {
1747 chunks = PyList_New(0);
1748 if (chunks == NULL)
1749 goto error;
1750 }
1751 s = PyUnicode_FromUnicode(ptr + start, endpos - start);
1752 if (s == NULL)
1753 goto error;
1754 if (PyList_Append(chunks, s) < 0) {
1755 Py_DECREF(s);
1756 goto error;
1757 }
1758 chunked += PyUnicode_GET_SIZE(s);
1759 Py_DECREF(s);
1760 }
1761 /* There may be some remaining bytes we'll have to prepend to the
1762 next chunk of data */
1763 if (endpos < line_len) {
1764 remaining = PyUnicode_FromUnicode(
1765 ptr + endpos, line_len - endpos);
1766 if (remaining == NULL)
1767 goto error;
1768 }
1769 Py_CLEAR(line);
1770 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001771 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 }
1773
1774 if (line != NULL) {
1775 /* Our line ends in the current buffer */
1776 self->decoded_chars_used = endpos - offset_to_buffer;
1777 if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
1778 if (start == 0 && Py_REFCNT(line) == 1) {
1779 if (PyUnicode_Resize(&line, endpos) < 0)
1780 goto error;
1781 }
1782 else {
1783 PyObject *s = PyUnicode_FromUnicode(
1784 PyUnicode_AS_UNICODE(line) + start, endpos - start);
1785 Py_CLEAR(line);
1786 if (s == NULL)
1787 goto error;
1788 line = s;
1789 }
1790 }
1791 }
1792 if (remaining != NULL) {
1793 if (chunks == NULL) {
1794 chunks = PyList_New(0);
1795 if (chunks == NULL)
1796 goto error;
1797 }
1798 if (PyList_Append(chunks, remaining) < 0)
1799 goto error;
1800 Py_CLEAR(remaining);
1801 }
1802 if (chunks != NULL) {
1803 if (line != NULL && PyList_Append(chunks, line) < 0)
1804 goto error;
1805 Py_CLEAR(line);
1806 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1807 if (line == NULL)
1808 goto error;
1809 Py_DECREF(chunks);
1810 }
1811 if (line == NULL)
1812 line = PyUnicode_FromStringAndSize(NULL, 0);
1813
1814 return line;
1815
1816 error:
1817 Py_XDECREF(chunks);
1818 Py_XDECREF(remaining);
1819 Py_XDECREF(line);
1820 return NULL;
1821}
1822
1823static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001824textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825{
1826 Py_ssize_t limit = -1;
1827
1828 CHECK_INITIALIZED(self);
1829 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1830 return NULL;
1831 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001832 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001833}
1834
1835/* Seek and Tell */
1836
1837typedef struct {
1838 Py_off_t start_pos;
1839 int dec_flags;
1840 int bytes_to_feed;
1841 int chars_to_skip;
1842 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001843} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844
1845/*
1846 To speed up cookie packing/unpacking, we store the fields in a temporary
1847 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1848 The following macros define at which offsets in the intermediary byte
1849 string the various CookieStruct fields will be stored.
1850 */
1851
1852#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1853
1854#if defined(WORDS_BIGENDIAN)
1855
1856# define IS_LITTLE_ENDIAN 0
1857
1858/* We want the least significant byte of start_pos to also be the least
1859 significant byte of the cookie, which means that in big-endian mode we
1860 must copy the fields in reverse order. */
1861
1862# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1863# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1864# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1865# define OFF_CHARS_TO_SKIP (sizeof(char))
1866# define OFF_NEED_EOF 0
1867
1868#else
1869
1870# define IS_LITTLE_ENDIAN 1
1871
1872/* Little-endian mode: the least significant byte of start_pos will
1873 naturally end up the least significant byte of the cookie. */
1874
1875# define OFF_START_POS 0
1876# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1877# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1878# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1879# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1880
1881#endif
1882
1883static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001884textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885{
1886 unsigned char buffer[COOKIE_BUF_LEN];
1887 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1888 if (cookieLong == NULL)
1889 return -1;
1890
1891 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1892 IS_LITTLE_ENDIAN, 0) < 0) {
1893 Py_DECREF(cookieLong);
1894 return -1;
1895 }
1896 Py_DECREF(cookieLong);
1897
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001898 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1899 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1900 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1901 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1902 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903
1904 return 0;
1905}
1906
1907static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001908textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001909{
1910 unsigned char buffer[COOKIE_BUF_LEN];
1911
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001912 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1913 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1914 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1915 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1916 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917
1918 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1919}
1920#undef IS_LITTLE_ENDIAN
1921
1922static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001923_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924{
1925 PyObject *res;
1926 /* When seeking to the start of the stream, we call decoder.reset()
1927 rather than decoder.getstate().
1928 This is for a few decoders such as utf-16 for which the state value
1929 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1930 utf-16, that we are expecting a BOM).
1931 */
1932 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1933 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1934 else
1935 res = PyObject_CallMethod(self->decoder, "setstate",
1936 "((yi))", "", cookie->dec_flags);
1937 if (res == NULL)
1938 return -1;
1939 Py_DECREF(res);
1940 return 0;
1941}
1942
Antoine Pitroue4501852009-05-14 18:55:55 +00001943static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001944_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001945{
1946 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001947 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001948 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1949 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1950 self->encoding_start_of_stream = 1;
1951 }
1952 else {
1953 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1954 _PyIO_zero, NULL);
1955 self->encoding_start_of_stream = 0;
1956 }
1957 if (res == NULL)
1958 return -1;
1959 Py_DECREF(res);
1960 return 0;
1961}
1962
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001964textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965{
1966 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001967 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969 PyObject *res;
1970 int cmp;
1971
1972 CHECK_INITIALIZED(self);
1973
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
1975 return NULL;
1976 CHECK_CLOSED(self);
1977
1978 Py_INCREF(cookieObj);
1979
1980 if (!self->seekable) {
1981 PyErr_SetString(PyExc_IOError,
1982 "underlying stream is not seekable");
1983 goto fail;
1984 }
1985
1986 if (whence == 1) {
1987 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00001988 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (cmp < 0)
1990 goto fail;
1991
1992 if (cmp == 0) {
1993 PyErr_SetString(PyExc_IOError,
1994 "can't do nonzero cur-relative seeks");
1995 goto fail;
1996 }
1997
1998 /* Seeking to the current position should attempt to
1999 * sync the underlying buffer with the current position.
2000 */
2001 Py_DECREF(cookieObj);
2002 cookieObj = PyObject_CallMethod((PyObject *)self, "tell", NULL);
2003 if (cookieObj == NULL)
2004 goto fail;
2005 }
2006 else if (whence == 2) {
2007 /* seek relative to end of file */
2008
Antoine Pitroue4501852009-05-14 18:55:55 +00002009 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010 if (cmp < 0)
2011 goto fail;
2012
2013 if (cmp == 0) {
2014 PyErr_SetString(PyExc_IOError,
2015 "can't do nonzero end-relative seeks");
2016 goto fail;
2017 }
2018
2019 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2020 if (res == NULL)
2021 goto fail;
2022 Py_DECREF(res);
2023
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002024 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 Py_CLEAR(self->snapshot);
2026 if (self->decoder) {
2027 res = PyObject_CallMethod(self->decoder, "reset", NULL);
2028 if (res == NULL)
2029 goto fail;
2030 Py_DECREF(res);
2031 }
2032
2033 res = PyObject_CallMethod(self->buffer, "seek", "ii", 0, 2);
2034 Py_XDECREF(cookieObj);
2035 return res;
2036 }
2037 else if (whence != 0) {
2038 PyErr_Format(PyExc_ValueError,
2039 "invalid whence (%d, should be 0, 1 or 2)", whence);
2040 goto fail;
2041 }
2042
Antoine Pitroue4501852009-05-14 18:55:55 +00002043 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (cmp < 0)
2045 goto fail;
2046
2047 if (cmp == 1) {
2048 PyErr_Format(PyExc_ValueError,
2049 "negative seek position %R", cookieObj);
2050 goto fail;
2051 }
2052
2053 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2054 if (res == NULL)
2055 goto fail;
2056 Py_DECREF(res);
2057
2058 /* The strategy of seek() is to go back to the safe start point
2059 * and replay the effect of read(chars_to_skip) from there.
2060 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002061 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 goto fail;
2063
2064 /* Seek back to the safe start point. */
2065 posobj = PyLong_FromOff_t(cookie.start_pos);
2066 if (posobj == NULL)
2067 goto fail;
2068 res = PyObject_CallMethodObjArgs(self->buffer,
2069 _PyIO_str_seek, posobj, NULL);
2070 Py_DECREF(posobj);
2071 if (res == NULL)
2072 goto fail;
2073 Py_DECREF(res);
2074
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002075 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 Py_CLEAR(self->snapshot);
2077
2078 /* Restore the decoder to its state from the safe start point. */
2079 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002080 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 goto fail;
2082 }
2083
2084 if (cookie.chars_to_skip) {
2085 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2086 PyObject *input_chunk = PyObject_CallMethod(
2087 self->buffer, "read", "i", cookie.bytes_to_feed);
2088 PyObject *decoded;
2089
2090 if (input_chunk == NULL)
2091 goto fail;
2092
2093 assert (PyBytes_Check(input_chunk));
2094
2095 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2096 if (self->snapshot == NULL) {
2097 Py_DECREF(input_chunk);
2098 goto fail;
2099 }
2100
2101 decoded = PyObject_CallMethod(self->decoder, "decode",
2102 "Oi", input_chunk, (int)cookie.need_eof);
2103
2104 if (decoded == NULL)
2105 goto fail;
2106
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002107 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108
2109 /* Skip chars_to_skip of the decoded characters. */
2110 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2111 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2112 goto fail;
2113 }
2114 self->decoded_chars_used = cookie.chars_to_skip;
2115 }
2116 else {
2117 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2118 if (self->snapshot == NULL)
2119 goto fail;
2120 }
2121
Antoine Pitroue4501852009-05-14 18:55:55 +00002122 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2123 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002124 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002125 goto fail;
2126 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002127 return cookieObj;
2128 fail:
2129 Py_XDECREF(cookieObj);
2130 return NULL;
2131
2132}
2133
2134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002135textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136{
2137 PyObject *res;
2138 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002139 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 PyObject *next_input;
2141 Py_ssize_t chars_to_skip, chars_decoded;
2142 PyObject *saved_state = NULL;
2143 char *input, *input_end;
2144
2145 CHECK_INITIALIZED(self);
2146 CHECK_CLOSED(self);
2147
2148 if (!self->seekable) {
2149 PyErr_SetString(PyExc_IOError,
2150 "underlying stream is not seekable");
2151 goto fail;
2152 }
2153 if (!self->telling) {
2154 PyErr_SetString(PyExc_IOError,
2155 "telling position disabled by next() call");
2156 goto fail;
2157 }
2158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002159 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160 return NULL;
2161 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2162 if (res == NULL)
2163 goto fail;
2164 Py_DECREF(res);
2165
2166 posobj = PyObject_CallMethod(self->buffer, "tell", NULL);
2167 if (posobj == NULL)
2168 goto fail;
2169
2170 if (self->decoder == NULL || self->snapshot == NULL) {
2171 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2172 return posobj;
2173 }
2174
2175#if defined(HAVE_LARGEFILE_SUPPORT)
2176 cookie.start_pos = PyLong_AsLongLong(posobj);
2177#else
2178 cookie.start_pos = PyLong_AsLong(posobj);
2179#endif
2180 if (PyErr_Occurred())
2181 goto fail;
2182
2183 /* Skip backward to the snapshot point (see _read_chunk). */
2184 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2185 goto fail;
2186
2187 assert (PyBytes_Check(next_input));
2188
2189 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2190
2191 /* How many decoded characters have been used up since the snapshot? */
2192 if (self->decoded_chars_used == 0) {
2193 /* We haven't moved from the snapshot point. */
2194 Py_DECREF(posobj);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002195 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 }
2197
2198 chars_to_skip = self->decoded_chars_used;
2199
2200 /* Starting from the snapshot position, we will walk the decoder
2201 * forward until it gives us enough decoded characters.
2202 */
2203 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2204 _PyIO_str_getstate, NULL);
2205 if (saved_state == NULL)
2206 goto fail;
2207
2208 /* Note our initial start point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002209 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 goto fail;
2211
2212 /* Feed the decoder one byte at a time. As we go, note the
2213 * nearest "safe start point" before the current location
2214 * (a point where the decoder has nothing buffered, so seek()
2215 * can safely start from there and advance to this location).
2216 */
2217 chars_decoded = 0;
2218 input = PyBytes_AS_STRING(next_input);
2219 input_end = input + PyBytes_GET_SIZE(next_input);
2220 while (input < input_end) {
2221 PyObject *state;
2222 char *dec_buffer;
2223 Py_ssize_t dec_buffer_len;
2224 int dec_flags;
2225
2226 PyObject *decoded = PyObject_CallMethod(
2227 self->decoder, "decode", "y#", input, 1);
2228 if (decoded == NULL)
2229 goto fail;
2230 assert (PyUnicode_Check(decoded));
2231 chars_decoded += PyUnicode_GET_SIZE(decoded);
2232 Py_DECREF(decoded);
2233
2234 cookie.bytes_to_feed += 1;
2235
2236 state = PyObject_CallMethodObjArgs(self->decoder,
2237 _PyIO_str_getstate, NULL);
2238 if (state == NULL)
2239 goto fail;
2240 if (!PyArg_Parse(state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) {
2241 Py_DECREF(state);
2242 goto fail;
2243 }
2244 Py_DECREF(state);
2245
2246 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2247 /* Decoder buffer is empty, so this is a safe start point. */
2248 cookie.start_pos += cookie.bytes_to_feed;
2249 chars_to_skip -= chars_decoded;
2250 cookie.dec_flags = dec_flags;
2251 cookie.bytes_to_feed = 0;
2252 chars_decoded = 0;
2253 }
2254 if (chars_decoded >= chars_to_skip)
2255 break;
2256 input++;
2257 }
2258 if (input == input_end) {
2259 /* We didn't get enough decoded data; signal EOF to get more. */
2260 PyObject *decoded = PyObject_CallMethod(
2261 self->decoder, "decode", "yi", "", /* final = */ 1);
2262 if (decoded == NULL)
2263 goto fail;
2264 assert (PyUnicode_Check(decoded));
2265 chars_decoded += PyUnicode_GET_SIZE(decoded);
2266 Py_DECREF(decoded);
2267 cookie.need_eof = 1;
2268
2269 if (chars_decoded < chars_to_skip) {
2270 PyErr_SetString(PyExc_IOError,
2271 "can't reconstruct logical file position");
2272 goto fail;
2273 }
2274 }
2275
2276 /* finally */
2277 Py_XDECREF(posobj);
2278 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2279 Py_DECREF(saved_state);
2280 if (res == NULL)
2281 return NULL;
2282 Py_DECREF(res);
2283
2284 /* The returned cookie corresponds to the last safe start point. */
2285 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002286 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287
2288 fail:
2289 Py_XDECREF(posobj);
2290 if (saved_state) {
2291 PyObject *type, *value, *traceback;
2292 PyErr_Fetch(&type, &value, &traceback);
2293
2294 res = PyObject_CallMethod(self->decoder, "setstate", "(O)", saved_state);
2295 Py_DECREF(saved_state);
2296 if (res == NULL)
2297 return NULL;
2298 Py_DECREF(res);
2299
2300 PyErr_Restore(type, value, traceback);
2301 }
2302 return NULL;
2303}
2304
2305static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002306textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002307{
2308 PyObject *pos = Py_None;
2309 PyObject *res;
2310
2311 CHECK_INITIALIZED(self)
2312 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2313 return NULL;
2314 }
2315
2316 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2317 if (res == NULL)
2318 return NULL;
2319 Py_DECREF(res);
2320
2321 if (pos != Py_None) {
2322 res = PyObject_CallMethodObjArgs((PyObject *) self,
2323 _PyIO_str_seek, pos, NULL);
2324 if (res == NULL)
2325 return NULL;
2326 Py_DECREF(res);
2327 }
2328
2329 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, NULL);
2330}
2331
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002332static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002333textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002334{
Antoine Pitrou716c4442009-05-23 19:04:03 +00002335 PyObject *nameobj, *res;
2336
2337 CHECK_INITIALIZED(self);
2338
2339 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2340 if (nameobj == NULL) {
2341 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2342 PyErr_Clear();
2343 else
2344 return NULL;
2345 res = PyUnicode_FromFormat("<_io.TextIOWrapper encoding=%R>",
2346 self->encoding);
2347 }
2348 else {
2349 res = PyUnicode_FromFormat("<_io.TextIOWrapper name=%R encoding=%R>",
2350 nameobj, self->encoding);
2351 Py_DECREF(nameobj);
2352 }
2353 return res;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002354}
2355
2356
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357/* Inquiries */
2358
2359static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002360textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002361{
2362 CHECK_INITIALIZED(self);
2363 return PyObject_CallMethod(self->buffer, "fileno", NULL);
2364}
2365
2366static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002367textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368{
2369 CHECK_INITIALIZED(self);
2370 return PyObject_CallMethod(self->buffer, "seekable", NULL);
2371}
2372
2373static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002374textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375{
2376 CHECK_INITIALIZED(self);
2377 return PyObject_CallMethod(self->buffer, "readable", NULL);
2378}
2379
2380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002381textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382{
2383 CHECK_INITIALIZED(self);
2384 return PyObject_CallMethod(self->buffer, "writable", NULL);
2385}
2386
2387static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002388textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389{
2390 CHECK_INITIALIZED(self);
2391 return PyObject_CallMethod(self->buffer, "isatty", NULL);
2392}
2393
2394static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002395textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396{
2397 CHECK_INITIALIZED(self);
2398 CHECK_CLOSED(self);
2399 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002400 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401 return NULL;
2402 return PyObject_CallMethod(self->buffer, "flush", NULL);
2403}
2404
2405static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002406textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407{
2408 PyObject *res;
2409 CHECK_INITIALIZED(self);
2410 res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
2411 if (res == NULL) {
2412 /* If flush() fails, just give up */
2413 PyErr_Clear();
2414 }
2415 else
2416 Py_DECREF(res);
2417
2418 return PyObject_CallMethod(self->buffer, "close", NULL);
2419}
2420
2421static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002422textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002423{
2424 PyObject *line;
2425
2426 CHECK_INITIALIZED(self);
2427
2428 self->telling = 0;
2429 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2430 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002431 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002432 }
2433 else {
2434 line = PyObject_CallMethodObjArgs((PyObject *)self,
2435 _PyIO_str_readline, NULL);
2436 if (line && !PyUnicode_Check(line)) {
2437 PyErr_Format(PyExc_IOError,
2438 "readline() should have returned an str object, "
2439 "not '%.200s'", Py_TYPE(line)->tp_name);
2440 Py_DECREF(line);
2441 return NULL;
2442 }
2443 }
2444
2445 if (line == NULL)
2446 return NULL;
2447
2448 if (PyUnicode_GET_SIZE(line) == 0) {
2449 /* Reached EOF or would have blocked */
2450 Py_DECREF(line);
2451 Py_CLEAR(self->snapshot);
2452 self->telling = self->seekable;
2453 return NULL;
2454 }
2455
2456 return line;
2457}
2458
2459static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002460textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461{
2462 CHECK_INITIALIZED(self);
2463 return PyObject_GetAttrString(self->buffer, "name");
2464}
2465
2466static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002467textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468{
2469 CHECK_INITIALIZED(self);
2470 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2471}
2472
2473static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002474textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475{
2476 PyObject *res;
2477 CHECK_INITIALIZED(self);
2478 if (self->decoder == NULL)
2479 Py_RETURN_NONE;
2480 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2481 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002482 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2483 PyErr_Clear();
2484 Py_RETURN_NONE;
2485 }
2486 else {
2487 return NULL;
2488 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489 }
2490 return res;
2491}
2492
2493static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002494textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002495{
2496 CHECK_INITIALIZED(self);
2497 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2498}
2499
2500static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002501textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502{
2503 CHECK_INITIALIZED(self);
2504 return PyLong_FromSsize_t(self->chunk_size);
2505}
2506
2507static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002508textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509{
2510 Py_ssize_t n;
2511 CHECK_INITIALIZED_INT(self);
2512 n = PyNumber_AsSsize_t(arg, PyExc_TypeError);
2513 if (n == -1 && PyErr_Occurred())
2514 return -1;
2515 if (n <= 0) {
2516 PyErr_SetString(PyExc_ValueError,
2517 "a strictly positive integer is required");
2518 return -1;
2519 }
2520 self->chunk_size = n;
2521 return 0;
2522}
2523
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002524static PyMethodDef textiowrapper_methods[] = {
2525 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2526 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2527 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2528 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2529 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2530 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002532 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2533 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2534 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2535 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2536 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002538 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2539 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2540 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541 {NULL, NULL}
2542};
2543
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002544static PyMemberDef textiowrapper_members[] = {
2545 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2546 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2547 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548 {NULL}
2549};
2550
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551static PyGetSetDef textiowrapper_getset[] = {
2552 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2553 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2555*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002556 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2557 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2558 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2559 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002560 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561};
2562
2563PyTypeObject PyTextIOWrapper_Type = {
2564 PyVarObject_HEAD_INIT(NULL, 0)
2565 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002568 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569 0, /*tp_print*/
2570 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002571 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002573 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 0, /*tp_as_number*/
2575 0, /*tp_as_sequence*/
2576 0, /*tp_as_mapping*/
2577 0, /*tp_hash */
2578 0, /*tp_call*/
2579 0, /*tp_str*/
2580 0, /*tp_getattro*/
2581 0, /*tp_setattro*/
2582 0, /*tp_as_buffer*/
2583 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2584 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585 textiowrapper_doc, /* tp_doc */
2586 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2587 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002588 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002589 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002591 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2592 textiowrapper_methods, /* tp_methods */
2593 textiowrapper_members, /* tp_members */
2594 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595 0, /* tp_base */
2596 0, /* tp_dict */
2597 0, /* tp_descr_get */
2598 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599 offsetof(textio, dict), /*tp_dictoffset*/
2600 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002601 0, /* tp_alloc */
2602 PyType_GenericNew, /* tp_new */
2603};