blob: 2ded719e9c70b5af6b8d63f515d6a778a3faa59a [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020014_Py_identifier(close);
15_Py_identifier(_dealloc_warn);
16_Py_identifier(decode);
17_Py_identifier(device_encoding);
18_Py_identifier(fileno);
19_Py_identifier(flush);
20_Py_identifier(getpreferredencoding);
21_Py_identifier(isatty);
22_Py_identifier(read);
23_Py_identifier(readable);
24_Py_identifier(replace);
25_Py_identifier(reset);
26_Py_identifier(seek);
27_Py_identifier(seekable);
28_Py_identifier(setstate);
29_Py_identifier(tell);
30_Py_identifier(writable);
31
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000032/* TextIOBase */
33
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000034PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035 "Base class for text I/O.\n"
36 "\n"
37 "This class provides a character and line based interface to stream\n"
38 "I/O. There is no readinto method because Python's character strings\n"
39 "are immutable. There is no public constructor.\n"
40 );
41
42static PyObject *
43_unsupported(const char *message)
44{
45 PyErr_SetString(IO_STATE->unsupported_operation, message);
46 return NULL;
47}
48
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000049PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000050 "Separate the underlying buffer from the TextIOBase and return it.\n"
51 "\n"
52 "After the underlying buffer has been detached, the TextIO is in an\n"
53 "unusable state.\n"
54 );
55
56static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000057textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000058{
59 return _unsupported("detach");
60}
61
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000062PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000063 "Read at most n characters from stream.\n"
64 "\n"
65 "Read from underlying buffer until we have n characters or we hit EOF.\n"
66 "If n is negative or omitted, read until EOF.\n"
67 );
68
69static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000070textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000071{
72 return _unsupported("read");
73}
74
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000075PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000076 "Read until newline or EOF.\n"
77 "\n"
78 "Returns an empty string if EOF is hit immediately.\n"
79 );
80
81static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000082textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000083{
84 return _unsupported("readline");
85}
86
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000087PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000088 "Write string to stream.\n"
89 "Returns the number of characters written (which is always equal to\n"
90 "the length of the string).\n"
91 );
92
93static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000094textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000095{
96 return _unsupported("write");
97}
98
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000099PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000100 "Encoding of the text stream.\n"
101 "\n"
102 "Subclasses should override.\n"
103 );
104
105static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000106textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000107{
108 Py_RETURN_NONE;
109}
110
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000111PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000112 "Line endings translated so far.\n"
113 "\n"
114 "Only line endings translated during reading are considered.\n"
115 "\n"
116 "Subclasses should override.\n"
117 );
118
119static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000120textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000121{
122 Py_RETURN_NONE;
123}
124
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000125PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000126 "The error setting of the decoder or encoder.\n"
127 "\n"
128 "Subclasses should override.\n"
129 );
130
131static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000132textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000133{
134 Py_RETURN_NONE;
135}
136
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000138static PyMethodDef textiobase_methods[] = {
139 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
140 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
141 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
142 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000143 {NULL, NULL}
144};
145
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000146static PyGetSetDef textiobase_getset[] = {
147 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
148 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
149 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000150 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000151};
152
153PyTypeObject PyTextIOBase_Type = {
154 PyVarObject_HEAD_INIT(NULL, 0)
155 "_io._TextIOBase", /*tp_name*/
156 0, /*tp_basicsize*/
157 0, /*tp_itemsize*/
158 0, /*tp_dealloc*/
159 0, /*tp_print*/
160 0, /*tp_getattr*/
161 0, /*tp_setattr*/
162 0, /*tp_compare */
163 0, /*tp_repr*/
164 0, /*tp_as_number*/
165 0, /*tp_as_sequence*/
166 0, /*tp_as_mapping*/
167 0, /*tp_hash */
168 0, /*tp_call*/
169 0, /*tp_str*/
170 0, /*tp_getattro*/
171 0, /*tp_setattro*/
172 0, /*tp_as_buffer*/
173 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000174 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000175 0, /* tp_traverse */
176 0, /* tp_clear */
177 0, /* tp_richcompare */
178 0, /* tp_weaklistoffset */
179 0, /* tp_iter */
180 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000181 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000182 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000183 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000184 &PyIOBase_Type, /* tp_base */
185 0, /* tp_dict */
186 0, /* tp_descr_get */
187 0, /* tp_descr_set */
188 0, /* tp_dictoffset */
189 0, /* tp_init */
190 0, /* tp_alloc */
191 0, /* tp_new */
192};
193
194
195/* IncrementalNewlineDecoder */
196
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000197PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000198 "Codec used when reading a file in universal newlines mode. It wraps\n"
199 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
200 "records the types of newlines encountered. When used with\n"
201 "translate=False, it ensures that the newline sequence is returned in\n"
202 "one piece. When used with decoder=None, it expects unicode strings as\n"
203 "decode input and translates newlines without first invoking an external\n"
204 "decoder.\n"
205 );
206
207typedef struct {
208 PyObject_HEAD
209 PyObject *decoder;
210 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000211 signed int pendingcr: 1;
212 signed int translate: 1;
213 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000214} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000215
216static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218 PyObject *args, PyObject *kwds)
219{
220 PyObject *decoder;
221 int translate;
222 PyObject *errors = NULL;
223 char *kwlist[] = {"decoder", "translate", "errors", NULL};
224
225 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
226 kwlist, &decoder, &translate, &errors))
227 return -1;
228
229 self->decoder = decoder;
230 Py_INCREF(decoder);
231
232 if (errors == NULL) {
233 self->errors = PyUnicode_FromString("strict");
234 if (self->errors == NULL)
235 return -1;
236 }
237 else {
238 Py_INCREF(errors);
239 self->errors = errors;
240 }
241
242 self->translate = translate;
243 self->seennl = 0;
244 self->pendingcr = 0;
245
246 return 0;
247}
248
249static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000250incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000251{
252 Py_CLEAR(self->decoder);
253 Py_CLEAR(self->errors);
254 Py_TYPE(self)->tp_free((PyObject *)self);
255}
256
257#define SEEN_CR 1
258#define SEEN_LF 2
259#define SEEN_CRLF 4
260#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
261
262PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000263_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000264 PyObject *input, int final)
265{
266 PyObject *output;
267 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000268 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000269
270 if (self->decoder == NULL) {
271 PyErr_SetString(PyExc_ValueError,
272 "IncrementalNewlineDecoder.__init__ not called");
273 return NULL;
274 }
275
276 /* decode input (with the eventual \r from a previous pass) */
277 if (self->decoder != Py_None) {
278 output = PyObject_CallMethodObjArgs(self->decoder,
279 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
280 }
281 else {
282 output = input;
283 Py_INCREF(output);
284 }
285
286 if (output == NULL)
287 return NULL;
288
289 if (!PyUnicode_Check(output)) {
290 PyErr_SetString(PyExc_TypeError,
291 "decoder should return a string result");
292 goto error;
293 }
294
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200295 if (PyUnicode_READY(output) == -1)
296 goto error;
297
298 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000299 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200300 /* Prefix output with CR */
301 int kind;
302 PyObject *modified;
303 char *out;
304
305 modified = PyUnicode_New(output_len + 1,
306 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000307 if (modified == NULL)
308 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200309 kind = PyUnicode_KIND(modified);
310 out = PyUnicode_DATA(modified);
311 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200312 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000313 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200314 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000315 self->pendingcr = 0;
316 output_len++;
317 }
318
319 /* retain last \r even when not translating data:
320 * then readline() is sure to get \r\n in one pass
321 */
322 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000323 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
325 {
326 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
327 if (modified == NULL)
328 goto error;
329 Py_DECREF(output);
330 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 self->pendingcr = 1;
332 }
333 }
334
335 /* Record which newlines are read and do newline translation if desired,
336 all in one pass. */
337 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200338 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 Py_ssize_t len;
340 int seennl = self->seennl;
341 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200344 in_str = PyUnicode_DATA(output);
345 len = PyUnicode_GET_LENGTH(output);
346 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
348 if (len == 0)
349 return output;
350
351 /* If, up to now, newlines are consistently \n, do a quick check
352 for the \r *byte* with the libc's optimized memchr.
353 */
354 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200355 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356 }
357
Antoine Pitrou66913e22009-03-06 23:40:56 +0000358 if (only_lf) {
359 /* If not already seen, quick scan for a possible "\n" character.
360 (there's nothing else to be done, even when in translation mode)
361 */
362 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200363 memchr(in_str, '\n', kind * len) != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200364 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000365 for (;;) {
Victor Stinnerf7b8cb62011-09-29 03:28:17 +0200366 Py_UCS4 c;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000367 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 while (PyUnicode_READ(kind, in_str, i) > '\n')
369 i++;
370 c = PyUnicode_READ(kind, in_str, i++);
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 if (c == '\n') {
372 seennl |= SEEN_LF;
373 break;
374 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200375 if (i >= len)
Antoine Pitrou66913e22009-03-06 23:40:56 +0000376 break;
377 }
378 }
379 /* Finished: we have scanned for newlines, and none of them
380 need translating */
381 }
382 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200383 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000385 if (seennl == SEEN_ALL)
386 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000387 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200388 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 while (PyUnicode_READ(kind, in_str, i) > '\r')
391 i++;
392 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 if (c == '\n')
394 seennl |= SEEN_LF;
395 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000399 }
400 else
401 seennl |= SEEN_CR;
402 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 break;
405 if (seennl == SEEN_ALL)
406 break;
407 }
408 endscan:
409 ;
410 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000411 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200412 void *translated;
413 int kind = PyUnicode_KIND(output);
414 void *in_str = PyUnicode_DATA(output);
415 Py_ssize_t in, out;
416 /* XXX: Previous in-place translation here is disabled as
417 resizing is not possible anymore */
418 /* We could try to optimize this so that we only do a copy
419 when there is something to translate. On the other hand,
420 we already know there is a \r byte, so chances are high
421 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200422 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200423 if (translated == NULL) {
424 PyErr_NoMemory();
425 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000426 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000428 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
432 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 seennl |= SEEN_LF;
436 continue;
437 }
438 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 in++;
441 seennl |= SEEN_CRLF;
442 }
443 else
444 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200445 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000446 continue;
447 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000449 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200450 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000451 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 Py_DECREF(output);
453 output = PyUnicode_FromKindAndData(kind, translated, out);
454 if (!output)
455 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 }
457 self->seennl |= seennl;
458 }
459
460 return output;
461
462 error:
463 Py_DECREF(output);
464 return NULL;
465}
466
467static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000468incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000469 PyObject *args, PyObject *kwds)
470{
471 char *kwlist[] = {"input", "final", NULL};
472 PyObject *input;
473 int final = 0;
474
475 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
476 kwlist, &input, &final))
477 return NULL;
478 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
479}
480
481static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000482incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000483{
484 PyObject *buffer;
485 unsigned PY_LONG_LONG flag;
486
487 if (self->decoder != Py_None) {
488 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
489 _PyIO_str_getstate, NULL);
490 if (state == NULL)
491 return NULL;
492 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
493 Py_DECREF(state);
494 return NULL;
495 }
496 Py_INCREF(buffer);
497 Py_DECREF(state);
498 }
499 else {
500 buffer = PyBytes_FromString("");
501 flag = 0;
502 }
503 flag <<= 1;
504 if (self->pendingcr)
505 flag |= 1;
506 return Py_BuildValue("NK", buffer, flag);
507}
508
509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000510incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000511{
512 PyObject *buffer;
513 unsigned PY_LONG_LONG flag;
514
515 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
516 return NULL;
517
518 self->pendingcr = (int) flag & 1;
519 flag >>= 1;
520
521 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200522 return _PyObject_CallMethodId(self->decoder,
523 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000524 else
525 Py_RETURN_NONE;
526}
527
528static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000529incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000530{
531 self->seennl = 0;
532 self->pendingcr = 0;
533 if (self->decoder != Py_None)
534 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
535 else
536 Py_RETURN_NONE;
537}
538
539static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000540incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000541{
542 switch (self->seennl) {
543 case SEEN_CR:
544 return PyUnicode_FromString("\r");
545 case SEEN_LF:
546 return PyUnicode_FromString("\n");
547 case SEEN_CRLF:
548 return PyUnicode_FromString("\r\n");
549 case SEEN_CR | SEEN_LF:
550 return Py_BuildValue("ss", "\r", "\n");
551 case SEEN_CR | SEEN_CRLF:
552 return Py_BuildValue("ss", "\r", "\r\n");
553 case SEEN_LF | SEEN_CRLF:
554 return Py_BuildValue("ss", "\n", "\r\n");
555 case SEEN_CR | SEEN_LF | SEEN_CRLF:
556 return Py_BuildValue("sss", "\r", "\n", "\r\n");
557 default:
558 Py_RETURN_NONE;
559 }
560
561}
562
563
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000564static PyMethodDef incrementalnewlinedecoder_methods[] = {
565 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
566 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
567 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
568 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000569 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570};
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyGetSetDef incrementalnewlinedecoder_getset[] = {
573 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000574 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575};
576
577PyTypeObject PyIncrementalNewlineDecoder_Type = {
578 PyVarObject_HEAD_INIT(NULL, 0)
579 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000581 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583 0, /*tp_print*/
584 0, /*tp_getattr*/
585 0, /*tp_setattr*/
586 0, /*tp_compare */
587 0, /*tp_repr*/
588 0, /*tp_as_number*/
589 0, /*tp_as_sequence*/
590 0, /*tp_as_mapping*/
591 0, /*tp_hash */
592 0, /*tp_call*/
593 0, /*tp_str*/
594 0, /*tp_getattro*/
595 0, /*tp_setattro*/
596 0, /*tp_as_buffer*/
597 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000598 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599 0, /* tp_traverse */
600 0, /* tp_clear */
601 0, /* tp_richcompare */
602 0, /*tp_weaklistoffset*/
603 0, /* tp_iter */
604 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000605 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000606 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000607 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 0, /* tp_base */
609 0, /* tp_dict */
610 0, /* tp_descr_get */
611 0, /* tp_descr_set */
612 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_alloc */
615 PyType_GenericNew, /* tp_new */
616};
617
618
619/* TextIOWrapper */
620
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 "Character and line based layer over a BufferedIOBase object, buffer.\n"
623 "\n"
624 "encoding gives the name of the encoding that the stream will be\n"
625 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
626 "\n"
627 "errors determines the strictness of encoding and decoding (see the\n"
628 "codecs.register) and defaults to \"strict\".\n"
629 "\n"
630 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
631 "handling of line endings. If it is None, universal newlines is\n"
632 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
633 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
634 "caller. Conversely, on output, '\\n' is translated to the system\n"
635 "default line seperator, os.linesep. If newline is any other of its\n"
636 "legal values, that newline becomes the newline when the file is read\n"
637 "and it is returned untranslated. On output, '\\n' is converted to the\n"
638 "newline.\n"
639 "\n"
640 "If line_buffering is True, a call to flush is implied when a call to\n"
641 "write contains a newline character."
642 );
643
644typedef PyObject *
645 (*encodefunc_t)(PyObject *, PyObject *);
646
647typedef struct
648{
649 PyObject_HEAD
650 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000651 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000652 Py_ssize_t chunk_size;
653 PyObject *buffer;
654 PyObject *encoding;
655 PyObject *encoder;
656 PyObject *decoder;
657 PyObject *readnl;
658 PyObject *errors;
659 const char *writenl; /* utf-8 encoded, NULL stands for \n */
660 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200661 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000662 char readuniversal;
663 char readtranslate;
664 char writetranslate;
665 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200666 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000668 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000669 /* Specialized encoding func (see below) */
670 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000671 /* Whether or not it's the start of the stream */
672 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673
674 /* Reads and writes are internally buffered in order to speed things up.
675 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000676
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 Please also note that text to be written is first encoded before being
678 buffered. This is necessary so that encoding errors are immediately
679 reported to the caller, but it unfortunately means that the
680 IncrementalEncoder (whose encode() method is always written in Python)
681 becomes a bottleneck for small writes.
682 */
683 PyObject *decoded_chars; /* buffer for text returned from decoder */
684 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
685 PyObject *pending_bytes; /* list of bytes objects waiting to be
686 written, or NULL */
687 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000688
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689 /* snapshot is either None, or a tuple (dec_flags, next_input) where
690 * dec_flags is the second (integer) item of the decoder state and
691 * next_input is the chunk of input bytes that comes next after the
692 * snapshot point. We use this to reconstruct decoder states in tell().
693 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000694 PyObject *snapshot;
695 /* Bytes-to-characters ratio for the current chunk. Serves as input for
696 the heuristic in tell(). */
697 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698
699 /* Cache raw object if it's a FileIO object */
700 PyObject *raw;
701
702 PyObject *weakreflist;
703 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000704} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000705
706
707/* A couple of specialized cases in order to bypass the slow incremental
708 encoding methods for the most popular encodings. */
709
710static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000711ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000712{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200713 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714}
715
716static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000717utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718{
719 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
720 PyUnicode_GET_SIZE(text),
721 PyBytes_AS_STRING(self->errors), 1);
722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
727 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
728 PyUnicode_GET_SIZE(text),
729 PyBytes_AS_STRING(self->errors), -1);
730}
731
732static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000733utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734{
Antoine Pitroue4501852009-05-14 18:55:55 +0000735 if (!self->encoding_start_of_stream) {
736 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000738 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000740 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000742 }
743 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
744 PyUnicode_GET_SIZE(text),
745 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746}
747
Antoine Pitroue4501852009-05-14 18:55:55 +0000748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000750{
751 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
752 PyUnicode_GET_SIZE(text),
753 PyBytes_AS_STRING(self->errors), 1);
754}
755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000758{
759 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
760 PyUnicode_GET_SIZE(text),
761 PyBytes_AS_STRING(self->errors), -1);
762}
763
764static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000765utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000766{
767 if (!self->encoding_start_of_stream) {
768 /* Skip the BOM and use native byte ordering */
769#if defined(WORDS_BIGENDIAN)
770 return utf32be_encode(self, text);
771#else
772 return utf32le_encode(self, text);
773#endif
774 }
775 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
776 PyUnicode_GET_SIZE(text),
777 PyBytes_AS_STRING(self->errors), 0);
778}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000779
780static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000781utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200783 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784}
785
786static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000787latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200789 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790}
791
792/* Map normalized encoding names onto the specialized encoding funcs */
793
794typedef struct {
795 const char *name;
796 encodefunc_t encodefunc;
797} encodefuncentry;
798
Antoine Pitrou24f36292009-03-28 22:16:42 +0000799static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000800 {"ascii", (encodefunc_t) ascii_encode},
801 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000802 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {"utf-16-be", (encodefunc_t) utf16be_encode},
804 {"utf-16-le", (encodefunc_t) utf16le_encode},
805 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000806 {"utf-32-be", (encodefunc_t) utf32be_encode},
807 {"utf-32-le", (encodefunc_t) utf32le_encode},
808 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {NULL, NULL}
810};
811
812
813static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000814textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815{
816 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200817 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 NULL};
819 PyObject *buffer, *raw;
820 char *encoding = NULL;
821 char *errors = NULL;
822 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200823 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824 _PyIO_State *state = IO_STATE;
825
826 PyObject *res;
827 int r;
828
829 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000830 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200831 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000832 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200833 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 return -1;
835
836 if (newline && newline[0] != '\0'
837 && !(newline[0] == '\n' && newline[1] == '\0')
838 && !(newline[0] == '\r' && newline[1] == '\0')
839 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
840 PyErr_Format(PyExc_ValueError,
841 "illegal newline value: %s", newline);
842 return -1;
843 }
844
845 Py_CLEAR(self->buffer);
846 Py_CLEAR(self->encoding);
847 Py_CLEAR(self->encoder);
848 Py_CLEAR(self->decoder);
849 Py_CLEAR(self->readnl);
850 Py_CLEAR(self->decoded_chars);
851 Py_CLEAR(self->pending_bytes);
852 Py_CLEAR(self->snapshot);
853 Py_CLEAR(self->errors);
854 Py_CLEAR(self->raw);
855 self->decoded_chars_used = 0;
856 self->pending_bytes_count = 0;
857 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000858 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000859
860 if (encoding == NULL) {
861 /* Try os.device_encoding(fileno) */
862 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200863 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000864 /* Ignore only AttributeError and UnsupportedOperation */
865 if (fileno == NULL) {
866 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
867 PyErr_ExceptionMatches(state->unsupported_operation)) {
868 PyErr_Clear();
869 }
870 else {
871 goto error;
872 }
873 }
874 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200875 self->encoding = _PyObject_CallMethodId(state->os_module,
876 &PyId_device_encoding,
877 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878 if (self->encoding == NULL)
879 goto error;
880 else if (!PyUnicode_Check(self->encoding))
881 Py_CLEAR(self->encoding);
882 }
883 }
884 if (encoding == NULL && self->encoding == NULL) {
885 if (state->locale_module == NULL) {
886 state->locale_module = PyImport_ImportModule("locale");
887 if (state->locale_module == NULL)
888 goto catch_ImportError;
889 else
890 goto use_locale;
891 }
892 else {
893 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894 self->encoding = _PyObject_CallMethodId(
895 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000896 if (self->encoding == NULL) {
897 catch_ImportError:
898 /*
899 Importing locale can raise a ImportError because of
900 _functools, and locale.getpreferredencoding can raise a
901 ImportError if _locale is not available. These will happen
902 during module building.
903 */
904 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
905 PyErr_Clear();
906 self->encoding = PyUnicode_FromString("ascii");
907 }
908 else
909 goto error;
910 }
911 else if (!PyUnicode_Check(self->encoding))
912 Py_CLEAR(self->encoding);
913 }
914 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000915 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000916 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000917 if (encoding == NULL)
918 goto error;
919 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 else if (encoding != NULL) {
921 self->encoding = PyUnicode_FromString(encoding);
922 if (self->encoding == NULL)
923 goto error;
924 }
925 else {
926 PyErr_SetString(PyExc_IOError,
927 "could not determine default encoding");
928 }
929
930 if (errors == NULL)
931 errors = "strict";
932 self->errors = PyBytes_FromString(errors);
933 if (self->errors == NULL)
934 goto error;
935
936 self->chunk_size = 8192;
937 self->readuniversal = (newline == NULL || newline[0] == '\0');
938 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200939 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 self->readtranslate = (newline == NULL);
941 if (newline) {
942 self->readnl = PyUnicode_FromString(newline);
943 if (self->readnl == NULL)
944 return -1;
945 }
946 self->writetranslate = (newline == NULL || newline[0] != '\0');
947 if (!self->readuniversal && self->readnl) {
948 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000949 if (self->writenl == NULL)
950 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000951 if (!strcmp(self->writenl, "\n"))
952 self->writenl = NULL;
953 }
954#ifdef MS_WINDOWS
955 else
956 self->writenl = "\r\n";
957#endif
958
959 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200960 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000961 if (res == NULL)
962 goto error;
963 r = PyObject_IsTrue(res);
964 Py_DECREF(res);
965 if (r == -1)
966 goto error;
967 if (r == 1) {
968 self->decoder = PyCodec_IncrementalDecoder(
969 encoding, errors);
970 if (self->decoder == NULL)
971 goto error;
972
973 if (self->readuniversal) {
974 PyObject *incrementalDecoder = PyObject_CallFunction(
975 (PyObject *)&PyIncrementalNewlineDecoder_Type,
976 "Oi", self->decoder, (int)self->readtranslate);
977 if (incrementalDecoder == NULL)
978 goto error;
979 Py_CLEAR(self->decoder);
980 self->decoder = incrementalDecoder;
981 }
982 }
983
984 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200985 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000986 if (res == NULL)
987 goto error;
988 r = PyObject_IsTrue(res);
989 Py_DECREF(res);
990 if (r == -1)
991 goto error;
992 if (r == 1) {
993 PyObject *ci;
994 self->encoder = PyCodec_IncrementalEncoder(
995 encoding, errors);
996 if (self->encoder == NULL)
997 goto error;
998 /* Get the normalized named of the codec */
999 ci = _PyCodec_Lookup(encoding);
1000 if (ci == NULL)
1001 goto error;
1002 res = PyObject_GetAttrString(ci, "name");
1003 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001004 if (res == NULL) {
1005 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1006 PyErr_Clear();
1007 else
1008 goto error;
1009 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 else if (PyUnicode_Check(res)) {
1011 encodefuncentry *e = encodefuncs;
1012 while (e->name != NULL) {
1013 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1014 self->encodefunc = e->encodefunc;
1015 break;
1016 }
1017 e++;
1018 }
1019 }
1020 Py_XDECREF(res);
1021 }
1022
1023 self->buffer = buffer;
1024 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001025
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001026 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1027 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1028 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
1029 raw = PyObject_GetAttrString(buffer, "raw");
1030 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001031 if (raw == NULL) {
1032 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1033 PyErr_Clear();
1034 else
1035 goto error;
1036 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001037 else if (Py_TYPE(raw) == &PyFileIO_Type)
1038 self->raw = raw;
1039 else
1040 Py_DECREF(raw);
1041 }
1042
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001043 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001044 if (res == NULL)
1045 goto error;
1046 self->seekable = self->telling = PyObject_IsTrue(res);
1047 Py_DECREF(res);
1048
Antoine Pitroue96ec682011-07-23 21:46:35 +02001049 self->has_read1 = PyObject_HasAttrString(buffer, "read1");
1050
Antoine Pitroue4501852009-05-14 18:55:55 +00001051 self->encoding_start_of_stream = 0;
1052 if (self->seekable && self->encoder) {
1053 PyObject *cookieObj;
1054 int cmp;
1055
1056 self->encoding_start_of_stream = 1;
1057
1058 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1059 if (cookieObj == NULL)
1060 goto error;
1061
1062 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1063 Py_DECREF(cookieObj);
1064 if (cmp < 0) {
1065 goto error;
1066 }
1067
1068 if (cmp == 0) {
1069 self->encoding_start_of_stream = 0;
1070 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1071 _PyIO_zero, NULL);
1072 if (res == NULL)
1073 goto error;
1074 Py_DECREF(res);
1075 }
1076 }
1077
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001078 self->ok = 1;
1079 return 0;
1080
1081 error:
1082 return -1;
1083}
1084
1085static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001086_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087{
1088 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1089 return -1;
1090 self->ok = 0;
1091 Py_CLEAR(self->buffer);
1092 Py_CLEAR(self->encoding);
1093 Py_CLEAR(self->encoder);
1094 Py_CLEAR(self->decoder);
1095 Py_CLEAR(self->readnl);
1096 Py_CLEAR(self->decoded_chars);
1097 Py_CLEAR(self->pending_bytes);
1098 Py_CLEAR(self->snapshot);
1099 Py_CLEAR(self->errors);
1100 Py_CLEAR(self->raw);
1101 return 0;
1102}
1103
1104static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001105textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001106{
Antoine Pitroue033e062010-10-29 10:38:18 +00001107 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001108 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109 return;
1110 _PyObject_GC_UNTRACK(self);
1111 if (self->weakreflist != NULL)
1112 PyObject_ClearWeakRefs((PyObject *)self);
1113 Py_CLEAR(self->dict);
1114 Py_TYPE(self)->tp_free((PyObject *)self);
1115}
1116
1117static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001118textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119{
1120 Py_VISIT(self->buffer);
1121 Py_VISIT(self->encoding);
1122 Py_VISIT(self->encoder);
1123 Py_VISIT(self->decoder);
1124 Py_VISIT(self->readnl);
1125 Py_VISIT(self->decoded_chars);
1126 Py_VISIT(self->pending_bytes);
1127 Py_VISIT(self->snapshot);
1128 Py_VISIT(self->errors);
1129 Py_VISIT(self->raw);
1130
1131 Py_VISIT(self->dict);
1132 return 0;
1133}
1134
1135static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001136textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001137{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001138 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 return -1;
1140 Py_CLEAR(self->dict);
1141 return 0;
1142}
1143
1144static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146
1147/* This macro takes some shortcuts to make the common case faster. */
1148#define CHECK_CLOSED(self) \
1149 do { \
1150 int r; \
1151 PyObject *_res; \
1152 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1153 if (self->raw != NULL) \
1154 r = _PyFileIO_closed(self->raw); \
1155 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001156 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001157 if (_res == NULL) \
1158 return NULL; \
1159 r = PyObject_IsTrue(_res); \
1160 Py_DECREF(_res); \
1161 if (r < 0) \
1162 return NULL; \
1163 } \
1164 if (r > 0) { \
1165 PyErr_SetString(PyExc_ValueError, \
1166 "I/O operation on closed file."); \
1167 return NULL; \
1168 } \
1169 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001170 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001171 return NULL; \
1172 } while (0)
1173
1174#define CHECK_INITIALIZED(self) \
1175 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001176 if (self->detached) { \
1177 PyErr_SetString(PyExc_ValueError, \
1178 "underlying buffer has been detached"); \
1179 } else { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "I/O operation on uninitialized object"); \
1182 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 return NULL; \
1184 }
1185
1186#define CHECK_INITIALIZED_INT(self) \
1187 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001188 if (self->detached) { \
1189 PyErr_SetString(PyExc_ValueError, \
1190 "underlying buffer has been detached"); \
1191 } else { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "I/O operation on uninitialized object"); \
1194 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001195 return -1; \
1196 }
1197
1198
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001199static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001200textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001201{
1202 PyObject *buffer, *res;
1203 CHECK_INITIALIZED(self);
1204 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1205 if (res == NULL)
1206 return NULL;
1207 Py_DECREF(res);
1208 buffer = self->buffer;
1209 self->buffer = NULL;
1210 self->detached = 1;
1211 self->ok = 0;
1212 return buffer;
1213}
1214
Antoine Pitrou24f36292009-03-28 22:16:42 +00001215/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001216 underlying buffered object, though. */
1217static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001218_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001220 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001221
1222 if (self->pending_bytes == NULL)
1223 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001224
1225 pending = self->pending_bytes;
1226 Py_INCREF(pending);
1227 self->pending_bytes_count = 0;
1228 Py_CLEAR(self->pending_bytes);
1229
1230 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1231 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 if (b == NULL)
1233 return -1;
1234 ret = PyObject_CallMethodObjArgs(self->buffer,
1235 _PyIO_str_write, b, NULL);
1236 Py_DECREF(b);
1237 if (ret == NULL)
1238 return -1;
1239 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001240 return 0;
1241}
1242
1243static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001244textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245{
1246 PyObject *ret;
1247 PyObject *text; /* owned reference */
1248 PyObject *b;
1249 Py_ssize_t textlen;
1250 int haslf = 0;
1251 int needflush = 0;
1252
1253 CHECK_INITIALIZED(self);
1254
1255 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1256 return NULL;
1257 }
1258
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001259 if (PyUnicode_READY(text) == -1)
1260 return NULL;
1261
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001262 CHECK_CLOSED(self);
1263
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001264 if (self->encoder == NULL)
1265 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 Py_INCREF(text);
1268
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001269 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270
1271 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273 haslf = 1;
1274
1275 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001276 PyObject *newtext = _PyObject_CallMethodId(
1277 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 Py_DECREF(text);
1279 if (newtext == NULL)
1280 return NULL;
1281 text = newtext;
1282 }
1283
Antoine Pitroue96ec682011-07-23 21:46:35 +02001284 if (self->write_through)
1285 needflush = 1;
1286 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 needflush = 1;
1290
1291 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001292 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001294 self->encoding_start_of_stream = 0;
1295 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 else
1297 b = PyObject_CallMethodObjArgs(self->encoder,
1298 _PyIO_str_encode, text, NULL);
1299 Py_DECREF(text);
1300 if (b == NULL)
1301 return NULL;
1302
1303 if (self->pending_bytes == NULL) {
1304 self->pending_bytes = PyList_New(0);
1305 if (self->pending_bytes == NULL) {
1306 Py_DECREF(b);
1307 return NULL;
1308 }
1309 self->pending_bytes_count = 0;
1310 }
1311 if (PyList_Append(self->pending_bytes, b) < 0) {
1312 Py_DECREF(b);
1313 return NULL;
1314 }
1315 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1316 Py_DECREF(b);
1317 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001318 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 return NULL;
1320 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001321
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 if (needflush) {
1323 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1324 if (ret == NULL)
1325 return NULL;
1326 Py_DECREF(ret);
1327 }
1328
1329 Py_CLEAR(self->snapshot);
1330
1331 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001332 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001333 if (ret == NULL)
1334 return NULL;
1335 Py_DECREF(ret);
1336 }
1337
1338 return PyLong_FromSsize_t(textlen);
1339}
1340
1341/* Steal a reference to chars and store it in the decoded_char buffer;
1342 */
1343static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001344textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001345{
1346 Py_CLEAR(self->decoded_chars);
1347 self->decoded_chars = chars;
1348 self->decoded_chars_used = 0;
1349}
1350
1351static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001352textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353{
1354 PyObject *chars;
1355 Py_ssize_t avail;
1356
1357 if (self->decoded_chars == NULL)
1358 return PyUnicode_FromStringAndSize(NULL, 0);
1359
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001360 /* decoded_chars is guaranteed to be "ready". */
1361 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 - self->decoded_chars_used);
1363
1364 assert(avail >= 0);
1365
1366 if (n < 0 || n > avail)
1367 n = avail;
1368
1369 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001370 chars = PyUnicode_Substring(self->decoded_chars,
1371 self->decoded_chars_used,
1372 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001373 if (chars == NULL)
1374 return NULL;
1375 }
1376 else {
1377 chars = self->decoded_chars;
1378 Py_INCREF(chars);
1379 }
1380
1381 self->decoded_chars_used += n;
1382 return chars;
1383}
1384
1385/* Read and decode the next chunk of data from the BufferedReader.
1386 */
1387static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001388textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389{
1390 PyObject *dec_buffer = NULL;
1391 PyObject *dec_flags = NULL;
1392 PyObject *input_chunk = NULL;
1393 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001394 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395 int eof;
1396
1397 /* The return value is True unless EOF was reached. The decoded string is
1398 * placed in self._decoded_chars (replacing its previous value). The
1399 * entire input chunk is sent to the decoder, though some of it may remain
1400 * buffered in the decoder, yet to be converted.
1401 */
1402
1403 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001404 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001405 return -1;
1406 }
1407
1408 if (self->telling) {
1409 /* To prepare for tell(), we need to snapshot a point in the file
1410 * where the decoder's input buffer is empty.
1411 */
1412
1413 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1414 _PyIO_str_getstate, NULL);
1415 if (state == NULL)
1416 return -1;
1417 /* Given this, we know there was a valid snapshot point
1418 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1419 */
1420 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1421 Py_DECREF(state);
1422 return -1;
1423 }
1424 Py_INCREF(dec_buffer);
1425 Py_INCREF(dec_flags);
1426 Py_DECREF(state);
1427 }
1428
1429 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1430 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1431 if (chunk_size == NULL)
1432 goto fail;
1433 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001434 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1435 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001436 Py_DECREF(chunk_size);
1437 if (input_chunk == NULL)
1438 goto fail;
1439 assert(PyBytes_Check(input_chunk));
1440
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001441 nbytes = PyBytes_Size(input_chunk);
1442 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001443
1444 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1445 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1446 self->decoder, input_chunk, eof);
1447 }
1448 else {
1449 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1450 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1451 }
1452
1453 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1454 if (decoded_chars == NULL)
1455 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001456 if (PyUnicode_READY(decoded_chars) == -1)
1457 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001458 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001459 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001460 if (nchars > 0)
1461 self->b2cratio = (double) nbytes / nchars;
1462 else
1463 self->b2cratio = 0.0;
1464 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465 eof = 0;
1466
1467 if (self->telling) {
1468 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1469 * next input to be decoded is dec_buffer + input_chunk.
1470 */
1471 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1472 if (next_input == NULL)
1473 goto fail;
1474 assert (PyBytes_Check(next_input));
1475 Py_DECREF(dec_buffer);
1476 Py_CLEAR(self->snapshot);
1477 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1478 }
1479 Py_DECREF(input_chunk);
1480
1481 return (eof == 0);
1482
1483 fail:
1484 Py_XDECREF(dec_buffer);
1485 Py_XDECREF(dec_flags);
1486 Py_XDECREF(input_chunk);
1487 return -1;
1488}
1489
1490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001491textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001492{
1493 Py_ssize_t n = -1;
1494 PyObject *result = NULL, *chunks = NULL;
1495
1496 CHECK_INITIALIZED(self);
1497
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001498 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001499 return NULL;
1500
1501 CHECK_CLOSED(self);
1502
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001503 if (self->decoder == NULL)
1504 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001505
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001506 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001507 return NULL;
1508
1509 if (n < 0) {
1510 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001511 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512 PyObject *decoded;
1513 if (bytes == NULL)
1514 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001515
1516 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1517 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1518 bytes, 1);
1519 else
1520 decoded = PyObject_CallMethodObjArgs(
1521 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001522 Py_DECREF(bytes);
1523 if (decoded == NULL)
1524 goto fail;
1525
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001526 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527
1528 if (result == NULL) {
1529 Py_DECREF(decoded);
1530 return NULL;
1531 }
1532
1533 PyUnicode_AppendAndDel(&result, decoded);
1534 if (result == NULL)
1535 goto fail;
1536
1537 Py_CLEAR(self->snapshot);
1538 return result;
1539 }
1540 else {
1541 int res = 1;
1542 Py_ssize_t remaining = n;
1543
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001544 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001545 if (result == NULL)
1546 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001547 if (PyUnicode_READY(result) == -1)
1548 goto fail;
1549 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550
1551 /* Keep reading chunks until we have n characters to return */
1552 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 if (res < 0)
1555 goto fail;
1556 if (res == 0) /* EOF */
1557 break;
1558 if (chunks == NULL) {
1559 chunks = PyList_New(0);
1560 if (chunks == NULL)
1561 goto fail;
1562 }
1563 if (PyList_Append(chunks, result) < 0)
1564 goto fail;
1565 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001566 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567 if (result == NULL)
1568 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001569 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570 }
1571 if (chunks != NULL) {
1572 if (result != NULL && PyList_Append(chunks, result) < 0)
1573 goto fail;
1574 Py_CLEAR(result);
1575 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1576 if (result == NULL)
1577 goto fail;
1578 Py_CLEAR(chunks);
1579 }
1580 return result;
1581 }
1582 fail:
1583 Py_XDECREF(result);
1584 Py_XDECREF(chunks);
1585 return NULL;
1586}
1587
1588
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001589/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001590 that is to the NUL character. Otherwise the function will produce
1591 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001592static char *
1593find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001596 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001597 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001598 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 return s;
1600 if (s == end)
1601 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001602 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 }
1604}
1605
1606Py_ssize_t
1607_PyIO_find_line_ending(
1608 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001609 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001611 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612
1613 if (translated) {
1614 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001615 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001617 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618 else {
1619 *consumed = len;
1620 return -1;
1621 }
1622 }
1623 else if (universal) {
1624 /* Universal newline search. Find any of \r, \r\n, \n
1625 * The decoder ensures that \r\n are not split in two pieces
1626 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001627 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001629 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001631 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001632 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001633 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001634 if (s >= end) {
1635 *consumed = len;
1636 return -1;
1637 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001638 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001639 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001641 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001644 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001646 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 }
1648 }
1649 }
1650 else {
1651 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1653 char *nl = PyUnicode_DATA(readnl);
1654 /* Assume that readnl is an ASCII character. */
1655 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001659 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 *consumed = len;
1661 return -1;
1662 }
1663 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001664 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001665 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001666 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 if (e < s)
1668 e = s;
1669 while (s < e) {
1670 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 if (pos == NULL || pos >= e)
1673 break;
1674 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001675 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 break;
1677 }
1678 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001679 return (pos - start)/kind + readnl_len;
1680 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 if (pos == NULL)
1684 *consumed = len;
1685 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 return -1;
1688 }
1689 }
1690}
1691
1692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001693_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694{
1695 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1696 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1697 int res;
1698
1699 CHECK_CLOSED(self);
1700
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001701 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001702 return NULL;
1703
1704 chunked = 0;
1705
1706 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001707 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001709 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 Py_ssize_t consumed = 0;
1711
1712 /* First, get some data if necessary */
1713 res = 1;
1714 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001715 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001716 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 if (res < 0)
1718 goto error;
1719 if (res == 0)
1720 break;
1721 }
1722 if (res == 0) {
1723 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001724 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 Py_CLEAR(self->snapshot);
1726 start = endpos = offset_to_buffer = 0;
1727 break;
1728 }
1729
1730 if (remaining == NULL) {
1731 line = self->decoded_chars;
1732 start = self->decoded_chars_used;
1733 offset_to_buffer = 0;
1734 Py_INCREF(line);
1735 }
1736 else {
1737 assert(self->decoded_chars_used == 0);
1738 line = PyUnicode_Concat(remaining, self->decoded_chars);
1739 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001740 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 Py_CLEAR(remaining);
1742 if (line == NULL)
1743 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001744 if (PyUnicode_READY(line) == -1)
1745 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 }
1747
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001748 ptr = PyUnicode_DATA(line);
1749 line_len = PyUnicode_GET_LENGTH(line);
1750 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751
1752 endpos = _PyIO_find_line_ending(
1753 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001754 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001755 ptr + kind * start,
1756 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001757 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 if (endpos >= 0) {
1759 endpos += start;
1760 if (limit >= 0 && (endpos - start) + chunked >= limit)
1761 endpos = start + limit - chunked;
1762 break;
1763 }
1764
1765 /* We can put aside up to `endpos` */
1766 endpos = consumed + start;
1767 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1768 /* Didn't find line ending, but reached length limit */
1769 endpos = start + limit - chunked;
1770 break;
1771 }
1772
1773 if (endpos > start) {
1774 /* No line ending seen yet - put aside current data */
1775 PyObject *s;
1776 if (chunks == NULL) {
1777 chunks = PyList_New(0);
1778 if (chunks == NULL)
1779 goto error;
1780 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001781 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 if (s == NULL)
1783 goto error;
1784 if (PyList_Append(chunks, s) < 0) {
1785 Py_DECREF(s);
1786 goto error;
1787 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001788 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 Py_DECREF(s);
1790 }
1791 /* There may be some remaining bytes we'll have to prepend to the
1792 next chunk of data */
1793 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001794 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795 if (remaining == NULL)
1796 goto error;
1797 }
1798 Py_CLEAR(line);
1799 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001800 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 }
1802
1803 if (line != NULL) {
1804 /* Our line ends in the current buffer */
1805 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001806 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1807 PyObject *s = PyUnicode_Substring(line, start, endpos);
1808 Py_CLEAR(line);
1809 if (s == NULL)
1810 goto error;
1811 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 }
1813 }
1814 if (remaining != NULL) {
1815 if (chunks == NULL) {
1816 chunks = PyList_New(0);
1817 if (chunks == NULL)
1818 goto error;
1819 }
1820 if (PyList_Append(chunks, remaining) < 0)
1821 goto error;
1822 Py_CLEAR(remaining);
1823 }
1824 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001825 if (line != NULL) {
1826 if (PyList_Append(chunks, line) < 0)
1827 goto error;
1828 Py_DECREF(line);
1829 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001830 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1831 if (line == NULL)
1832 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001833 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001835 if (line == NULL) {
1836 Py_INCREF(_PyIO_empty_str);
1837 line = _PyIO_empty_str;
1838 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839
1840 return line;
1841
1842 error:
1843 Py_XDECREF(chunks);
1844 Py_XDECREF(remaining);
1845 Py_XDECREF(line);
1846 return NULL;
1847}
1848
1849static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001850textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851{
1852 Py_ssize_t limit = -1;
1853
1854 CHECK_INITIALIZED(self);
1855 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1856 return NULL;
1857 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001858 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859}
1860
1861/* Seek and Tell */
1862
1863typedef struct {
1864 Py_off_t start_pos;
1865 int dec_flags;
1866 int bytes_to_feed;
1867 int chars_to_skip;
1868 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001869} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870
1871/*
1872 To speed up cookie packing/unpacking, we store the fields in a temporary
1873 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1874 The following macros define at which offsets in the intermediary byte
1875 string the various CookieStruct fields will be stored.
1876 */
1877
1878#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1879
1880#if defined(WORDS_BIGENDIAN)
1881
1882# define IS_LITTLE_ENDIAN 0
1883
1884/* We want the least significant byte of start_pos to also be the least
1885 significant byte of the cookie, which means that in big-endian mode we
1886 must copy the fields in reverse order. */
1887
1888# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1889# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1890# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1891# define OFF_CHARS_TO_SKIP (sizeof(char))
1892# define OFF_NEED_EOF 0
1893
1894#else
1895
1896# define IS_LITTLE_ENDIAN 1
1897
1898/* Little-endian mode: the least significant byte of start_pos will
1899 naturally end up the least significant byte of the cookie. */
1900
1901# define OFF_START_POS 0
1902# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1903# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1904# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1905# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1906
1907#endif
1908
1909static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001910textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911{
1912 unsigned char buffer[COOKIE_BUF_LEN];
1913 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1914 if (cookieLong == NULL)
1915 return -1;
1916
1917 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1918 IS_LITTLE_ENDIAN, 0) < 0) {
1919 Py_DECREF(cookieLong);
1920 return -1;
1921 }
1922 Py_DECREF(cookieLong);
1923
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001924 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1925 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1926 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1927 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1928 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
1930 return 0;
1931}
1932
1933static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001934textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001935{
1936 unsigned char buffer[COOKIE_BUF_LEN];
1937
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001938 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1939 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1940 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1941 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1942 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943
1944 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1945}
1946#undef IS_LITTLE_ENDIAN
1947
1948static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001949_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950{
1951 PyObject *res;
1952 /* When seeking to the start of the stream, we call decoder.reset()
1953 rather than decoder.getstate().
1954 This is for a few decoders such as utf-16 for which the state value
1955 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1956 utf-16, that we are expecting a BOM).
1957 */
1958 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1959 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1960 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001961 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1962 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963 if (res == NULL)
1964 return -1;
1965 Py_DECREF(res);
1966 return 0;
1967}
1968
Antoine Pitroue4501852009-05-14 18:55:55 +00001969static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001970_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001971{
1972 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001973 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001974 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1975 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1976 self->encoding_start_of_stream = 1;
1977 }
1978 else {
1979 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1980 _PyIO_zero, NULL);
1981 self->encoding_start_of_stream = 0;
1982 }
1983 if (res == NULL)
1984 return -1;
1985 Py_DECREF(res);
1986 return 0;
1987}
1988
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001990textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001991{
1992 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001993 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995 PyObject *res;
1996 int cmp;
1997
1998 CHECK_INITIALIZED(self);
1999
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2001 return NULL;
2002 CHECK_CLOSED(self);
2003
2004 Py_INCREF(cookieObj);
2005
2006 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002007 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 goto fail;
2009 }
2010
2011 if (whence == 1) {
2012 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002013 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 if (cmp < 0)
2015 goto fail;
2016
2017 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002018 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 goto fail;
2020 }
2021
2022 /* Seeking to the current position should attempt to
2023 * sync the underlying buffer with the current position.
2024 */
2025 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002026 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027 if (cookieObj == NULL)
2028 goto fail;
2029 }
2030 else if (whence == 2) {
2031 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002032 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 if (cmp < 0)
2034 goto fail;
2035
2036 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002037 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 goto fail;
2039 }
2040
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002041 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 if (res == NULL)
2043 goto fail;
2044 Py_DECREF(res);
2045
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002046 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 Py_CLEAR(self->snapshot);
2048 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002049 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 if (res == NULL)
2051 goto fail;
2052 Py_DECREF(res);
2053 }
2054
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002055 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 Py_XDECREF(cookieObj);
2057 return res;
2058 }
2059 else if (whence != 0) {
2060 PyErr_Format(PyExc_ValueError,
2061 "invalid whence (%d, should be 0, 1 or 2)", whence);
2062 goto fail;
2063 }
2064
Antoine Pitroue4501852009-05-14 18:55:55 +00002065 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 if (cmp < 0)
2067 goto fail;
2068
2069 if (cmp == 1) {
2070 PyErr_Format(PyExc_ValueError,
2071 "negative seek position %R", cookieObj);
2072 goto fail;
2073 }
2074
2075 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2076 if (res == NULL)
2077 goto fail;
2078 Py_DECREF(res);
2079
2080 /* The strategy of seek() is to go back to the safe start point
2081 * and replay the effect of read(chars_to_skip) from there.
2082 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002083 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002084 goto fail;
2085
2086 /* Seek back to the safe start point. */
2087 posobj = PyLong_FromOff_t(cookie.start_pos);
2088 if (posobj == NULL)
2089 goto fail;
2090 res = PyObject_CallMethodObjArgs(self->buffer,
2091 _PyIO_str_seek, posobj, NULL);
2092 Py_DECREF(posobj);
2093 if (res == NULL)
2094 goto fail;
2095 Py_DECREF(res);
2096
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002097 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 Py_CLEAR(self->snapshot);
2099
2100 /* Restore the decoder to its state from the safe start point. */
2101 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002102 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 goto fail;
2104 }
2105
2106 if (cookie.chars_to_skip) {
2107 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002108 PyObject *input_chunk = _PyObject_CallMethodId(
2109 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 PyObject *decoded;
2111
2112 if (input_chunk == NULL)
2113 goto fail;
2114
2115 assert (PyBytes_Check(input_chunk));
2116
2117 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2118 if (self->snapshot == NULL) {
2119 Py_DECREF(input_chunk);
2120 goto fail;
2121 }
2122
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002123 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2124 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125
2126 if (decoded == NULL)
2127 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002128 if (PyUnicode_READY(decoded) == -1) {
2129 Py_DECREF(decoded);
2130 goto fail;
2131 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002133 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002134
2135 /* Skip chars_to_skip of the decoded characters. */
2136 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2137 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2138 goto fail;
2139 }
2140 self->decoded_chars_used = cookie.chars_to_skip;
2141 }
2142 else {
2143 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2144 if (self->snapshot == NULL)
2145 goto fail;
2146 }
2147
Antoine Pitroue4501852009-05-14 18:55:55 +00002148 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2149 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002150 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002151 goto fail;
2152 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002153 return cookieObj;
2154 fail:
2155 Py_XDECREF(cookieObj);
2156 return NULL;
2157
2158}
2159
2160static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002161textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162{
2163 PyObject *res;
2164 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002165 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 PyObject *next_input;
2167 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002168 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169 PyObject *saved_state = NULL;
2170 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002171 char *dec_buffer;
2172 Py_ssize_t dec_buffer_len;
2173 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002174
2175 CHECK_INITIALIZED(self);
2176 CHECK_CLOSED(self);
2177
2178 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002179 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002180 goto fail;
2181 }
2182 if (!self->telling) {
2183 PyErr_SetString(PyExc_IOError,
2184 "telling position disabled by next() call");
2185 goto fail;
2186 }
2187
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002188 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002190 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 if (res == NULL)
2192 goto fail;
2193 Py_DECREF(res);
2194
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002195 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 if (posobj == NULL)
2197 goto fail;
2198
2199 if (self->decoder == NULL || self->snapshot == NULL) {
2200 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2201 return posobj;
2202 }
2203
2204#if defined(HAVE_LARGEFILE_SUPPORT)
2205 cookie.start_pos = PyLong_AsLongLong(posobj);
2206#else
2207 cookie.start_pos = PyLong_AsLong(posobj);
2208#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002209 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 if (PyErr_Occurred())
2211 goto fail;
2212
2213 /* Skip backward to the snapshot point (see _read_chunk). */
2214 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2215 goto fail;
2216
2217 assert (PyBytes_Check(next_input));
2218
2219 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2220
2221 /* How many decoded characters have been used up since the snapshot? */
2222 if (self->decoded_chars_used == 0) {
2223 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002224 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 }
2226
2227 chars_to_skip = self->decoded_chars_used;
2228
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002229 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2231 _PyIO_str_getstate, NULL);
2232 if (saved_state == NULL)
2233 goto fail;
2234
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002235#define DECODER_GETSTATE() do { \
2236 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2237 _PyIO_str_getstate, NULL); \
2238 if (_state == NULL) \
2239 goto fail; \
2240 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2241 Py_DECREF(_state); \
2242 goto fail; \
2243 } \
2244 Py_DECREF(_state); \
2245 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002247 /* TODO: replace assert with exception */
2248#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002249 PyObject *_decoded = _PyObject_CallMethodId( \
2250 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002251 if (_decoded == NULL) \
2252 goto fail; \
2253 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002254 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002255 Py_DECREF(_decoded); \
2256 } while (0)
2257
2258 /* Fast search for an acceptable start point, close to our
2259 current pos */
2260 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2261 skip_back = 1;
2262 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2263 input = PyBytes_AS_STRING(next_input);
2264 while (skip_bytes > 0) {
2265 /* Decode up to temptative start point */
2266 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2267 goto fail;
2268 DECODER_DECODE(input, skip_bytes, chars_decoded);
2269 if (chars_decoded <= chars_to_skip) {
2270 DECODER_GETSTATE();
2271 if (dec_buffer_len == 0) {
2272 /* Before pos and no bytes buffered in decoder => OK */
2273 cookie.dec_flags = dec_flags;
2274 chars_to_skip -= chars_decoded;
2275 break;
2276 }
2277 /* Skip back by buffered amount and reset heuristic */
2278 skip_bytes -= dec_buffer_len;
2279 skip_back = 1;
2280 }
2281 else {
2282 /* We're too far ahead, skip back a bit */
2283 skip_bytes -= skip_back;
2284 skip_back *= 2;
2285 }
2286 }
2287 if (skip_bytes <= 0) {
2288 skip_bytes = 0;
2289 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2290 goto fail;
2291 }
2292
2293 /* Note our initial start point. */
2294 cookie.start_pos += skip_bytes;
2295 cookie.chars_to_skip = chars_to_skip;
2296 if (chars_to_skip == 0)
2297 goto finally;
2298
2299 /* We should be close to the desired position. Now feed the decoder one
2300 * byte at a time until we reach the `chars_to_skip` target.
2301 * As we go, note the nearest "safe start point" before the current
2302 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002303 * can safely start from there and advance to this location).
2304 */
2305 chars_decoded = 0;
2306 input = PyBytes_AS_STRING(next_input);
2307 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002308 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002310 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002311
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002312 DECODER_DECODE(input, 1, n);
2313 /* We got n chars for 1 byte */
2314 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002316 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002317
2318 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2319 /* Decoder buffer is empty, so this is a safe start point. */
2320 cookie.start_pos += cookie.bytes_to_feed;
2321 chars_to_skip -= chars_decoded;
2322 cookie.dec_flags = dec_flags;
2323 cookie.bytes_to_feed = 0;
2324 chars_decoded = 0;
2325 }
2326 if (chars_decoded >= chars_to_skip)
2327 break;
2328 input++;
2329 }
2330 if (input == input_end) {
2331 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002332 PyObject *decoded = _PyObject_CallMethodId(
2333 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002334 if (decoded == NULL)
2335 goto fail;
2336 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002337 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338 Py_DECREF(decoded);
2339 cookie.need_eof = 1;
2340
2341 if (chars_decoded < chars_to_skip) {
2342 PyErr_SetString(PyExc_IOError,
2343 "can't reconstruct logical file position");
2344 goto fail;
2345 }
2346 }
2347
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002348finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002349 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350 Py_DECREF(saved_state);
2351 if (res == NULL)
2352 return NULL;
2353 Py_DECREF(res);
2354
2355 /* The returned cookie corresponds to the last safe start point. */
2356 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002357 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002359fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360 if (saved_state) {
2361 PyObject *type, *value, *traceback;
2362 PyErr_Fetch(&type, &value, &traceback);
2363
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002364 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365 Py_DECREF(saved_state);
2366 if (res == NULL)
2367 return NULL;
2368 Py_DECREF(res);
2369
2370 PyErr_Restore(type, value, traceback);
2371 }
2372 return NULL;
2373}
2374
2375static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002376textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377{
2378 PyObject *pos = Py_None;
2379 PyObject *res;
2380
2381 CHECK_INITIALIZED(self)
2382 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2383 return NULL;
2384 }
2385
2386 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2387 if (res == NULL)
2388 return NULL;
2389 Py_DECREF(res);
2390
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002391 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002392}
2393
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002394static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002395textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002396{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002397 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002398
2399 CHECK_INITIALIZED(self);
2400
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002401 res = PyUnicode_FromString("<_io.TextIOWrapper");
2402 if (res == NULL)
2403 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002404 nameobj = PyObject_GetAttrString((PyObject *) self, "name");
2405 if (nameobj == NULL) {
2406 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2407 PyErr_Clear();
2408 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002409 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002410 }
2411 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002412 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002413 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002414 if (s == NULL)
2415 goto error;
2416 PyUnicode_AppendAndDel(&res, s);
2417 if (res == NULL)
2418 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002419 }
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002420 modeobj = PyObject_GetAttrString((PyObject *) self, "mode");
2421 if (modeobj == NULL) {
2422 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2423 PyErr_Clear();
2424 else
2425 goto error;
2426 }
2427 else {
2428 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2429 Py_DECREF(modeobj);
2430 if (s == NULL)
2431 goto error;
2432 PyUnicode_AppendAndDel(&res, s);
2433 if (res == NULL)
2434 return NULL;
2435 }
2436 s = PyUnicode_FromFormat("%U encoding=%R>",
2437 res, self->encoding);
2438 Py_DECREF(res);
2439 return s;
2440error:
2441 Py_XDECREF(res);
2442 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002443}
2444
2445
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446/* Inquiries */
2447
2448static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002449textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450{
2451 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002452 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453}
2454
2455static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002456textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457{
2458 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002459 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460}
2461
2462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002463textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464{
2465 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002466 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467}
2468
2469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471{
2472 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002473 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474}
2475
2476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002477textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478{
2479 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002480 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481}
2482
2483static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002484textiowrapper_getstate(textio *self, PyObject *args)
2485{
2486 PyErr_Format(PyExc_TypeError,
2487 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2488 return NULL;
2489}
2490
2491static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002492textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493{
2494 CHECK_INITIALIZED(self);
2495 CHECK_CLOSED(self);
2496 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002497 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002499 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500}
2501
2502static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002506 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508
Antoine Pitrou6be88762010-05-03 16:48:20 +00002509 res = textiowrapper_closed_get(self, NULL);
2510 if (res == NULL)
2511 return NULL;
2512 r = PyObject_IsTrue(res);
2513 Py_DECREF(res);
2514 if (r < 0)
2515 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002516
Antoine Pitrou6be88762010-05-03 16:48:20 +00002517 if (r > 0) {
2518 Py_RETURN_NONE; /* stream already closed */
2519 }
2520 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002521 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002522 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002523 if (res)
2524 Py_DECREF(res);
2525 else
2526 PyErr_Clear();
2527 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002528 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002529 if (res == NULL) {
2530 return NULL;
2531 }
2532 else
2533 Py_DECREF(res);
2534
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002535 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002536 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537}
2538
2539static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002540textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541{
2542 PyObject *line;
2543
2544 CHECK_INITIALIZED(self);
2545
2546 self->telling = 0;
2547 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2548 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002549 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550 }
2551 else {
2552 line = PyObject_CallMethodObjArgs((PyObject *)self,
2553 _PyIO_str_readline, NULL);
2554 if (line && !PyUnicode_Check(line)) {
2555 PyErr_Format(PyExc_IOError,
2556 "readline() should have returned an str object, "
2557 "not '%.200s'", Py_TYPE(line)->tp_name);
2558 Py_DECREF(line);
2559 return NULL;
2560 }
2561 }
2562
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002563 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564 return NULL;
2565
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002566 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 /* Reached EOF or would have blocked */
2568 Py_DECREF(line);
2569 Py_CLEAR(self->snapshot);
2570 self->telling = self->seekable;
2571 return NULL;
2572 }
2573
2574 return line;
2575}
2576
2577static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002578textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002579{
2580 CHECK_INITIALIZED(self);
2581 return PyObject_GetAttrString(self->buffer, "name");
2582}
2583
2584static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586{
2587 CHECK_INITIALIZED(self);
2588 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2589}
2590
2591static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593{
2594 PyObject *res;
2595 CHECK_INITIALIZED(self);
2596 if (self->decoder == NULL)
2597 Py_RETURN_NONE;
2598 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2599 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002600 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2601 PyErr_Clear();
2602 Py_RETURN_NONE;
2603 }
2604 else {
2605 return NULL;
2606 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607 }
2608 return res;
2609}
2610
2611static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002612textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002613{
2614 CHECK_INITIALIZED(self);
2615 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2616}
2617
2618static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002620{
2621 CHECK_INITIALIZED(self);
2622 return PyLong_FromSsize_t(self->chunk_size);
2623}
2624
2625static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002626textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002627{
2628 Py_ssize_t n;
2629 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002630 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002631 if (n == -1 && PyErr_Occurred())
2632 return -1;
2633 if (n <= 0) {
2634 PyErr_SetString(PyExc_ValueError,
2635 "a strictly positive integer is required");
2636 return -1;
2637 }
2638 self->chunk_size = n;
2639 return 0;
2640}
2641
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002642static PyMethodDef textiowrapper_methods[] = {
2643 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2644 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2645 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2646 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2647 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2648 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002650 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2651 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2652 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2653 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2654 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002655 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002657 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2658 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2659 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660 {NULL, NULL}
2661};
2662
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002663static PyMemberDef textiowrapper_members[] = {
2664 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2665 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2666 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667 {NULL}
2668};
2669
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002670static PyGetSetDef textiowrapper_getset[] = {
2671 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2672 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2674*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2676 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2677 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2678 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002679 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680};
2681
2682PyTypeObject PyTextIOWrapper_Type = {
2683 PyVarObject_HEAD_INIT(NULL, 0)
2684 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002685 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002687 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002688 0, /*tp_print*/
2689 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002690 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002691 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002692 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 0, /*tp_as_number*/
2694 0, /*tp_as_sequence*/
2695 0, /*tp_as_mapping*/
2696 0, /*tp_hash */
2697 0, /*tp_call*/
2698 0, /*tp_str*/
2699 0, /*tp_getattro*/
2700 0, /*tp_setattro*/
2701 0, /*tp_as_buffer*/
2702 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2703 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002704 textiowrapper_doc, /* tp_doc */
2705 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2706 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002708 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002709 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002710 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2711 textiowrapper_methods, /* tp_methods */
2712 textiowrapper_members, /* tp_members */
2713 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714 0, /* tp_base */
2715 0, /* tp_dict */
2716 0, /* tp_descr_get */
2717 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718 offsetof(textio, dict), /*tp_dictoffset*/
2719 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720 0, /* tp_alloc */
2721 PyType_GenericNew, /* tp_new */
2722};