blob: e9abaf42a41f6626ecfda6d2c0fba524e23962cf [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
17_Py_IDENTIFIER(device_encoding);
18_Py_IDENTIFIER(fileno);
19_Py_IDENTIFIER(flush);
20_Py_IDENTIFIER(getpreferredencoding);
21_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020022_Py_IDENTIFIER(mode);
23_Py_IDENTIFIER(name);
24_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020026_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(readable);
28_Py_IDENTIFIER(replace);
29_Py_IDENTIFIER(reset);
30_Py_IDENTIFIER(seek);
31_Py_IDENTIFIER(seekable);
32_Py_IDENTIFIER(setstate);
33_Py_IDENTIFIER(tell);
34_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036/* TextIOBase */
37
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000038PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable. There is no public constructor.\n"
44 );
45
46static PyObject *
47_unsupported(const char *message)
48{
49 PyErr_SetString(IO_STATE->unsupported_operation, message);
50 return NULL;
51}
52
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000053PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000054 "Separate the underlying buffer from the TextIOBase and return it.\n"
55 "\n"
56 "After the underlying buffer has been detached, the TextIO is in an\n"
57 "unusable state.\n"
58 );
59
60static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062{
63 return _unsupported("detach");
64}
65
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000066PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 "Read at most n characters from stream.\n"
68 "\n"
69 "Read from underlying buffer until we have n characters or we hit EOF.\n"
70 "If n is negative or omitted, read until EOF.\n"
71 );
72
73static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075{
76 return _unsupported("read");
77}
78
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000079PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080 "Read until newline or EOF.\n"
81 "\n"
82 "Returns an empty string if EOF is hit immediately.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("readline");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Write string to stream.\n"
93 "Returns the number of characters written (which is always equal to\n"
94 "the length of the string).\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("write");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Encoding of the text stream.\n"
105 "\n"
106 "Subclasses should override.\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 Py_RETURN_NONE;
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Line endings translated so far.\n"
117 "\n"
118 "Only line endings translated during reading are considered.\n"
119 "\n"
120 "Subclasses should override.\n"
121 );
122
123static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125{
126 Py_RETURN_NONE;
127}
128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000129PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000130 "The error setting of the decoder or encoder.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000142static PyMethodDef textiobase_methods[] = {
143 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
144 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
145 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
146 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 {NULL, NULL}
148};
149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyGetSetDef textiobase_getset[] = {
151 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
152 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
153 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000154 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155};
156
157PyTypeObject PyTextIOBase_Type = {
158 PyVarObject_HEAD_INIT(NULL, 0)
159 "_io._TextIOBase", /*tp_name*/
160 0, /*tp_basicsize*/
161 0, /*tp_itemsize*/
162 0, /*tp_dealloc*/
163 0, /*tp_print*/
164 0, /*tp_getattr*/
165 0, /*tp_setattr*/
166 0, /*tp_compare */
167 0, /*tp_repr*/
168 0, /*tp_as_number*/
169 0, /*tp_as_sequence*/
170 0, /*tp_as_mapping*/
171 0, /*tp_hash */
172 0, /*tp_call*/
173 0, /*tp_str*/
174 0, /*tp_getattro*/
175 0, /*tp_setattro*/
176 0, /*tp_as_buffer*/
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
196};
197
198
199/* IncrementalNewlineDecoder */
200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 "Codec used when reading a file in universal newlines mode. It wraps\n"
203 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
204 "records the types of newlines encountered. When used with\n"
205 "translate=False, it ensures that the newline sequence is returned in\n"
206 "one piece. When used with decoder=None, it expects unicode strings as\n"
207 "decode input and translates newlines without first invoking an external\n"
208 "decoder.\n"
209 );
210
211typedef struct {
212 PyObject_HEAD
213 PyObject *decoder;
214 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000215 signed int pendingcr: 1;
216 signed int translate: 1;
217 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000218} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219
220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000221incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 PyObject *args, PyObject *kwds)
223{
224 PyObject *decoder;
225 int translate;
226 PyObject *errors = NULL;
227 char *kwlist[] = {"decoder", "translate", "errors", NULL};
228
229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
230 kwlist, &decoder, &translate, &errors))
231 return -1;
232
233 self->decoder = decoder;
234 Py_INCREF(decoder);
235
236 if (errors == NULL) {
237 self->errors = PyUnicode_FromString("strict");
238 if (self->errors == NULL)
239 return -1;
240 }
241 else {
242 Py_INCREF(errors);
243 self->errors = errors;
244 }
245
246 self->translate = translate;
247 self->seennl = 0;
248 self->pendingcr = 0;
249
250 return 0;
251}
252
253static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000254incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255{
256 Py_CLEAR(self->decoder);
257 Py_CLEAR(self->errors);
258 Py_TYPE(self)->tp_free((PyObject *)self);
259}
260
261#define SEEN_CR 1
262#define SEEN_LF 2
263#define SEEN_CRLF 4
264#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
265
266PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000267_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 PyObject *input, int final)
269{
270 PyObject *output;
271 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273
274 if (self->decoder == NULL) {
275 PyErr_SetString(PyExc_ValueError,
276 "IncrementalNewlineDecoder.__init__ not called");
277 return NULL;
278 }
279
280 /* decode input (with the eventual \r from a previous pass) */
281 if (self->decoder != Py_None) {
282 output = PyObject_CallMethodObjArgs(self->decoder,
283 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
284 }
285 else {
286 output = input;
287 Py_INCREF(output);
288 }
289
290 if (output == NULL)
291 return NULL;
292
293 if (!PyUnicode_Check(output)) {
294 PyErr_SetString(PyExc_TypeError,
295 "decoder should return a string result");
296 goto error;
297 }
298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200299 if (PyUnicode_READY(output) == -1)
300 goto error;
301
302 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200304 /* Prefix output with CR */
305 int kind;
306 PyObject *modified;
307 char *out;
308
309 modified = PyUnicode_New(output_len + 1,
310 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 if (modified == NULL)
312 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 kind = PyUnicode_KIND(modified);
314 out = PyUnicode_DATA(modified);
315 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200316 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 self->pendingcr = 0;
320 output_len++;
321 }
322
323 /* retain last \r even when not translating data:
324 * then readline() is sure to get \r\n in one pass
325 */
326 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000327 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
329 {
330 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
331 if (modified == NULL)
332 goto error;
333 Py_DECREF(output);
334 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 self->pendingcr = 1;
336 }
337 }
338
339 /* Record which newlines are read and do newline translation if desired,
340 all in one pass. */
341 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 Py_ssize_t len;
344 int seennl = self->seennl;
345 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 in_str = PyUnicode_DATA(output);
349 len = PyUnicode_GET_LENGTH(output);
350 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 if (len == 0)
353 return output;
354
355 /* If, up to now, newlines are consistently \n, do a quick check
356 for the \r *byte* with the libc's optimized memchr.
357 */
358 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200359 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 }
361
Antoine Pitrou66913e22009-03-06 23:40:56 +0000362 if (only_lf) {
363 /* If not already seen, quick scan for a possible "\n" character.
364 (there's nothing else to be done, even when in translation mode)
365 */
366 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 memchr(in_str, '\n', kind * len) != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000369 for (;;) {
Victor Stinnerf7b8cb62011-09-29 03:28:17 +0200370 Py_UCS4 c;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 while (PyUnicode_READ(kind, in_str, i) > '\n')
373 i++;
374 c = PyUnicode_READ(kind, in_str, i++);
Antoine Pitrou66913e22009-03-06 23:40:56 +0000375 if (c == '\n') {
376 seennl |= SEEN_LF;
377 break;
378 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 if (i >= len)
Antoine Pitrou66913e22009-03-06 23:40:56 +0000380 break;
381 }
382 }
383 /* Finished: we have scanned for newlines, and none of them
384 need translating */
385 }
386 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000388 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 if (seennl == SEEN_ALL)
390 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200392 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200394 while (PyUnicode_READ(kind, in_str, i) > '\r')
395 i++;
396 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 if (c == '\n')
398 seennl |= SEEN_LF;
399 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200402 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403 }
404 else
405 seennl |= SEEN_CR;
406 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 break;
409 if (seennl == SEEN_ALL)
410 break;
411 }
412 endscan:
413 ;
414 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200416 void *translated;
417 int kind = PyUnicode_KIND(output);
418 void *in_str = PyUnicode_DATA(output);
419 Py_ssize_t in, out;
420 /* XXX: Previous in-place translation here is disabled as
421 resizing is not possible anymore */
422 /* We could try to optimize this so that we only do a copy
423 when there is something to translate. On the other hand,
424 we already know there is a \r byte, so chances are high
425 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200426 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 if (translated == NULL) {
428 PyErr_NoMemory();
429 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
436 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000439 seennl |= SEEN_LF;
440 continue;
441 }
442 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200443 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000444 in++;
445 seennl |= SEEN_CRLF;
446 }
447 else
448 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 continue;
451 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 Py_DECREF(output);
457 output = PyUnicode_FromKindAndData(kind, translated, out);
458 if (!output)
459 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000460 }
461 self->seennl |= seennl;
462 }
463
464 return output;
465
466 error:
467 Py_DECREF(output);
468 return NULL;
469}
470
471static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000472incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000473 PyObject *args, PyObject *kwds)
474{
475 char *kwlist[] = {"input", "final", NULL};
476 PyObject *input;
477 int final = 0;
478
479 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
480 kwlist, &input, &final))
481 return NULL;
482 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487{
488 PyObject *buffer;
489 unsigned PY_LONG_LONG flag;
490
491 if (self->decoder != Py_None) {
492 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
493 _PyIO_str_getstate, NULL);
494 if (state == NULL)
495 return NULL;
496 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
497 Py_DECREF(state);
498 return NULL;
499 }
500 Py_INCREF(buffer);
501 Py_DECREF(state);
502 }
503 else {
504 buffer = PyBytes_FromString("");
505 flag = 0;
506 }
507 flag <<= 1;
508 if (self->pendingcr)
509 flag |= 1;
510 return Py_BuildValue("NK", buffer, flag);
511}
512
513static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000514incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000515{
516 PyObject *buffer;
517 unsigned PY_LONG_LONG flag;
518
519 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
520 return NULL;
521
522 self->pendingcr = (int) flag & 1;
523 flag >>= 1;
524
525 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200526 return _PyObject_CallMethodId(self->decoder,
527 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000528 else
529 Py_RETURN_NONE;
530}
531
532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000533incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000534{
535 self->seennl = 0;
536 self->pendingcr = 0;
537 if (self->decoder != Py_None)
538 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
539 else
540 Py_RETURN_NONE;
541}
542
543static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000544incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000545{
546 switch (self->seennl) {
547 case SEEN_CR:
548 return PyUnicode_FromString("\r");
549 case SEEN_LF:
550 return PyUnicode_FromString("\n");
551 case SEEN_CRLF:
552 return PyUnicode_FromString("\r\n");
553 case SEEN_CR | SEEN_LF:
554 return Py_BuildValue("ss", "\r", "\n");
555 case SEEN_CR | SEEN_CRLF:
556 return Py_BuildValue("ss", "\r", "\r\n");
557 case SEEN_LF | SEEN_CRLF:
558 return Py_BuildValue("ss", "\n", "\r\n");
559 case SEEN_CR | SEEN_LF | SEEN_CRLF:
560 return Py_BuildValue("sss", "\r", "\n", "\r\n");
561 default:
562 Py_RETURN_NONE;
563 }
564
565}
566
567
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000568static PyMethodDef incrementalnewlinedecoder_methods[] = {
569 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
570 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
571 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
572 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000573 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000574};
575
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000576static PyGetSetDef incrementalnewlinedecoder_getset[] = {
577 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000578 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579};
580
581PyTypeObject PyIncrementalNewlineDecoder_Type = {
582 PyVarObject_HEAD_INIT(NULL, 0)
583 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000584 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000585 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000586 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000587 0, /*tp_print*/
588 0, /*tp_getattr*/
589 0, /*tp_setattr*/
590 0, /*tp_compare */
591 0, /*tp_repr*/
592 0, /*tp_as_number*/
593 0, /*tp_as_sequence*/
594 0, /*tp_as_mapping*/
595 0, /*tp_hash */
596 0, /*tp_call*/
597 0, /*tp_str*/
598 0, /*tp_getattro*/
599 0, /*tp_setattro*/
600 0, /*tp_as_buffer*/
601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000602 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000603 0, /* tp_traverse */
604 0, /* tp_clear */
605 0, /* tp_richcompare */
606 0, /*tp_weaklistoffset*/
607 0, /* tp_iter */
608 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000609 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /* tp_base */
613 0, /* tp_dict */
614 0, /* tp_descr_get */
615 0, /* tp_descr_set */
616 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000617 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000618 0, /* tp_alloc */
619 PyType_GenericNew, /* tp_new */
620};
621
622
623/* TextIOWrapper */
624
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 "Character and line based layer over a BufferedIOBase object, buffer.\n"
627 "\n"
628 "encoding gives the name of the encoding that the stream will be\n"
629 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
630 "\n"
631 "errors determines the strictness of encoding and decoding (see the\n"
632 "codecs.register) and defaults to \"strict\".\n"
633 "\n"
634 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
635 "handling of line endings. If it is None, universal newlines is\n"
636 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
637 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
638 "caller. Conversely, on output, '\\n' is translated to the system\n"
639 "default line seperator, os.linesep. If newline is any other of its\n"
640 "legal values, that newline becomes the newline when the file is read\n"
641 "and it is returned untranslated. On output, '\\n' is converted to the\n"
642 "newline.\n"
643 "\n"
644 "If line_buffering is True, a call to flush is implied when a call to\n"
645 "write contains a newline character."
646 );
647
648typedef PyObject *
649 (*encodefunc_t)(PyObject *, PyObject *);
650
651typedef struct
652{
653 PyObject_HEAD
654 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000655 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000656 Py_ssize_t chunk_size;
657 PyObject *buffer;
658 PyObject *encoding;
659 PyObject *encoder;
660 PyObject *decoder;
661 PyObject *readnl;
662 PyObject *errors;
663 const char *writenl; /* utf-8 encoded, NULL stands for \n */
664 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200665 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000666 char readuniversal;
667 char readtranslate;
668 char writetranslate;
669 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200670 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000672 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000673 /* Specialized encoding func (see below) */
674 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000675 /* Whether or not it's the start of the stream */
676 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677
678 /* Reads and writes are internally buffered in order to speed things up.
679 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000680
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681 Please also note that text to be written is first encoded before being
682 buffered. This is necessary so that encoding errors are immediately
683 reported to the caller, but it unfortunately means that the
684 IncrementalEncoder (whose encode() method is always written in Python)
685 becomes a bottleneck for small writes.
686 */
687 PyObject *decoded_chars; /* buffer for text returned from decoder */
688 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
689 PyObject *pending_bytes; /* list of bytes objects waiting to be
690 written, or NULL */
691 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000692
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693 /* snapshot is either None, or a tuple (dec_flags, next_input) where
694 * dec_flags is the second (integer) item of the decoder state and
695 * next_input is the chunk of input bytes that comes next after the
696 * snapshot point. We use this to reconstruct decoder states in tell().
697 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000698 PyObject *snapshot;
699 /* Bytes-to-characters ratio for the current chunk. Serves as input for
700 the heuristic in tell(). */
701 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702
703 /* Cache raw object if it's a FileIO object */
704 PyObject *raw;
705
706 PyObject *weakreflist;
707 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000708} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000709
710
711/* A couple of specialized cases in order to bypass the slow incremental
712 encoding methods for the most popular encodings. */
713
714static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000715ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000716{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200717 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000718}
719
720static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000721utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722{
723 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
724 PyUnicode_GET_SIZE(text),
725 PyBytes_AS_STRING(self->errors), 1);
726}
727
728static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000729utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730{
731 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
732 PyUnicode_GET_SIZE(text),
733 PyBytes_AS_STRING(self->errors), -1);
734}
735
736static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000737utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738{
Antoine Pitroue4501852009-05-14 18:55:55 +0000739 if (!self->encoding_start_of_stream) {
740 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000742 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000744 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000746 }
747 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
748 PyUnicode_GET_SIZE(text),
749 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750}
751
Antoine Pitroue4501852009-05-14 18:55:55 +0000752static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000753utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000754{
755 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
756 PyUnicode_GET_SIZE(text),
757 PyBytes_AS_STRING(self->errors), 1);
758}
759
760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
763 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
764 PyUnicode_GET_SIZE(text),
765 PyBytes_AS_STRING(self->errors), -1);
766}
767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000770{
771 if (!self->encoding_start_of_stream) {
772 /* Skip the BOM and use native byte ordering */
773#if defined(WORDS_BIGENDIAN)
774 return utf32be_encode(self, text);
775#else
776 return utf32le_encode(self, text);
777#endif
778 }
779 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
780 PyUnicode_GET_SIZE(text),
781 PyBytes_AS_STRING(self->errors), 0);
782}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000783
784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200787 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788}
789
790static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000791latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200793 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000794}
795
796/* Map normalized encoding names onto the specialized encoding funcs */
797
798typedef struct {
799 const char *name;
800 encodefunc_t encodefunc;
801} encodefuncentry;
802
Antoine Pitrou24f36292009-03-28 22:16:42 +0000803static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804 {"ascii", (encodefunc_t) ascii_encode},
805 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000806 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807 {"utf-16-be", (encodefunc_t) utf16be_encode},
808 {"utf-16-le", (encodefunc_t) utf16le_encode},
809 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000810 {"utf-32-be", (encodefunc_t) utf32be_encode},
811 {"utf-32-le", (encodefunc_t) utf32le_encode},
812 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000813 {NULL, NULL}
814};
815
816
817static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000818textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819{
820 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200821 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 NULL};
823 PyObject *buffer, *raw;
824 char *encoding = NULL;
825 char *errors = NULL;
826 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200827 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 _PyIO_State *state = IO_STATE;
829
830 PyObject *res;
831 int r;
832
833 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000834 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200835 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200837 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 return -1;
839
840 if (newline && newline[0] != '\0'
841 && !(newline[0] == '\n' && newline[1] == '\0')
842 && !(newline[0] == '\r' && newline[1] == '\0')
843 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
844 PyErr_Format(PyExc_ValueError,
845 "illegal newline value: %s", newline);
846 return -1;
847 }
848
849 Py_CLEAR(self->buffer);
850 Py_CLEAR(self->encoding);
851 Py_CLEAR(self->encoder);
852 Py_CLEAR(self->decoder);
853 Py_CLEAR(self->readnl);
854 Py_CLEAR(self->decoded_chars);
855 Py_CLEAR(self->pending_bytes);
856 Py_CLEAR(self->snapshot);
857 Py_CLEAR(self->errors);
858 Py_CLEAR(self->raw);
859 self->decoded_chars_used = 0;
860 self->pending_bytes_count = 0;
861 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000862 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863
864 if (encoding == NULL) {
865 /* Try os.device_encoding(fileno) */
866 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200867 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868 /* Ignore only AttributeError and UnsupportedOperation */
869 if (fileno == NULL) {
870 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
871 PyErr_ExceptionMatches(state->unsupported_operation)) {
872 PyErr_Clear();
873 }
874 else {
875 goto error;
876 }
877 }
878 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200879 self->encoding = _PyObject_CallMethodId(state->os_module,
880 &PyId_device_encoding,
881 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000882 if (self->encoding == NULL)
883 goto error;
884 else if (!PyUnicode_Check(self->encoding))
885 Py_CLEAR(self->encoding);
886 }
887 }
888 if (encoding == NULL && self->encoding == NULL) {
889 if (state->locale_module == NULL) {
890 state->locale_module = PyImport_ImportModule("locale");
891 if (state->locale_module == NULL)
892 goto catch_ImportError;
893 else
894 goto use_locale;
895 }
896 else {
897 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200898 self->encoding = _PyObject_CallMethodId(
899 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000900 if (self->encoding == NULL) {
901 catch_ImportError:
902 /*
903 Importing locale can raise a ImportError because of
904 _functools, and locale.getpreferredencoding can raise a
905 ImportError if _locale is not available. These will happen
906 during module building.
907 */
908 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
909 PyErr_Clear();
910 self->encoding = PyUnicode_FromString("ascii");
911 }
912 else
913 goto error;
914 }
915 else if (!PyUnicode_Check(self->encoding))
916 Py_CLEAR(self->encoding);
917 }
918 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000919 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000921 if (encoding == NULL)
922 goto error;
923 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000924 else if (encoding != NULL) {
925 self->encoding = PyUnicode_FromString(encoding);
926 if (self->encoding == NULL)
927 goto error;
928 }
929 else {
930 PyErr_SetString(PyExc_IOError,
931 "could not determine default encoding");
932 }
933
934 if (errors == NULL)
935 errors = "strict";
936 self->errors = PyBytes_FromString(errors);
937 if (self->errors == NULL)
938 goto error;
939
940 self->chunk_size = 8192;
941 self->readuniversal = (newline == NULL || newline[0] == '\0');
942 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200943 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 self->readtranslate = (newline == NULL);
945 if (newline) {
946 self->readnl = PyUnicode_FromString(newline);
947 if (self->readnl == NULL)
948 return -1;
949 }
950 self->writetranslate = (newline == NULL || newline[0] != '\0');
951 if (!self->readuniversal && self->readnl) {
952 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000953 if (self->writenl == NULL)
954 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000955 if (!strcmp(self->writenl, "\n"))
956 self->writenl = NULL;
957 }
958#ifdef MS_WINDOWS
959 else
960 self->writenl = "\r\n";
961#endif
962
963 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200964 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000965 if (res == NULL)
966 goto error;
967 r = PyObject_IsTrue(res);
968 Py_DECREF(res);
969 if (r == -1)
970 goto error;
971 if (r == 1) {
972 self->decoder = PyCodec_IncrementalDecoder(
973 encoding, errors);
974 if (self->decoder == NULL)
975 goto error;
976
977 if (self->readuniversal) {
978 PyObject *incrementalDecoder = PyObject_CallFunction(
979 (PyObject *)&PyIncrementalNewlineDecoder_Type,
980 "Oi", self->decoder, (int)self->readtranslate);
981 if (incrementalDecoder == NULL)
982 goto error;
983 Py_CLEAR(self->decoder);
984 self->decoder = incrementalDecoder;
985 }
986 }
987
988 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200989 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 if (res == NULL)
991 goto error;
992 r = PyObject_IsTrue(res);
993 Py_DECREF(res);
994 if (r == -1)
995 goto error;
996 if (r == 1) {
997 PyObject *ci;
998 self->encoder = PyCodec_IncrementalEncoder(
999 encoding, errors);
1000 if (self->encoder == NULL)
1001 goto error;
1002 /* Get the normalized named of the codec */
1003 ci = _PyCodec_Lookup(encoding);
1004 if (ci == NULL)
1005 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001006 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001007 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001008 if (res == NULL) {
1009 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1010 PyErr_Clear();
1011 else
1012 goto error;
1013 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001014 else if (PyUnicode_Check(res)) {
1015 encodefuncentry *e = encodefuncs;
1016 while (e->name != NULL) {
1017 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1018 self->encodefunc = e->encodefunc;
1019 break;
1020 }
1021 e++;
1022 }
1023 }
1024 Py_XDECREF(res);
1025 }
1026
1027 self->buffer = buffer;
1028 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001029
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001030 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1031 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1032 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001033 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001035 if (raw == NULL) {
1036 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1037 PyErr_Clear();
1038 else
1039 goto error;
1040 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001041 else if (Py_TYPE(raw) == &PyFileIO_Type)
1042 self->raw = raw;
1043 else
1044 Py_DECREF(raw);
1045 }
1046
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001047 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001048 if (res == NULL)
1049 goto error;
1050 self->seekable = self->telling = PyObject_IsTrue(res);
1051 Py_DECREF(res);
1052
Martin v. Löwis767046a2011-10-14 15:35:36 +02001053 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001054
Antoine Pitroue4501852009-05-14 18:55:55 +00001055 self->encoding_start_of_stream = 0;
1056 if (self->seekable && self->encoder) {
1057 PyObject *cookieObj;
1058 int cmp;
1059
1060 self->encoding_start_of_stream = 1;
1061
1062 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1063 if (cookieObj == NULL)
1064 goto error;
1065
1066 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1067 Py_DECREF(cookieObj);
1068 if (cmp < 0) {
1069 goto error;
1070 }
1071
1072 if (cmp == 0) {
1073 self->encoding_start_of_stream = 0;
1074 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1075 _PyIO_zero, NULL);
1076 if (res == NULL)
1077 goto error;
1078 Py_DECREF(res);
1079 }
1080 }
1081
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001082 self->ok = 1;
1083 return 0;
1084
1085 error:
1086 return -1;
1087}
1088
1089static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001090_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091{
1092 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1093 return -1;
1094 self->ok = 0;
1095 Py_CLEAR(self->buffer);
1096 Py_CLEAR(self->encoding);
1097 Py_CLEAR(self->encoder);
1098 Py_CLEAR(self->decoder);
1099 Py_CLEAR(self->readnl);
1100 Py_CLEAR(self->decoded_chars);
1101 Py_CLEAR(self->pending_bytes);
1102 Py_CLEAR(self->snapshot);
1103 Py_CLEAR(self->errors);
1104 Py_CLEAR(self->raw);
1105 return 0;
1106}
1107
1108static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001109textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001110{
Antoine Pitroue033e062010-10-29 10:38:18 +00001111 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001112 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001113 return;
1114 _PyObject_GC_UNTRACK(self);
1115 if (self->weakreflist != NULL)
1116 PyObject_ClearWeakRefs((PyObject *)self);
1117 Py_CLEAR(self->dict);
1118 Py_TYPE(self)->tp_free((PyObject *)self);
1119}
1120
1121static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001122textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001123{
1124 Py_VISIT(self->buffer);
1125 Py_VISIT(self->encoding);
1126 Py_VISIT(self->encoder);
1127 Py_VISIT(self->decoder);
1128 Py_VISIT(self->readnl);
1129 Py_VISIT(self->decoded_chars);
1130 Py_VISIT(self->pending_bytes);
1131 Py_VISIT(self->snapshot);
1132 Py_VISIT(self->errors);
1133 Py_VISIT(self->raw);
1134
1135 Py_VISIT(self->dict);
1136 return 0;
1137}
1138
1139static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001140textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001142 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143 return -1;
1144 Py_CLEAR(self->dict);
1145 return 0;
1146}
1147
1148static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001149textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150
1151/* This macro takes some shortcuts to make the common case faster. */
1152#define CHECK_CLOSED(self) \
1153 do { \
1154 int r; \
1155 PyObject *_res; \
1156 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1157 if (self->raw != NULL) \
1158 r = _PyFileIO_closed(self->raw); \
1159 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001160 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001161 if (_res == NULL) \
1162 return NULL; \
1163 r = PyObject_IsTrue(_res); \
1164 Py_DECREF(_res); \
1165 if (r < 0) \
1166 return NULL; \
1167 } \
1168 if (r > 0) { \
1169 PyErr_SetString(PyExc_ValueError, \
1170 "I/O operation on closed file."); \
1171 return NULL; \
1172 } \
1173 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001174 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001175 return NULL; \
1176 } while (0)
1177
1178#define CHECK_INITIALIZED(self) \
1179 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001180 if (self->detached) { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "underlying buffer has been detached"); \
1183 } else { \
1184 PyErr_SetString(PyExc_ValueError, \
1185 "I/O operation on uninitialized object"); \
1186 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 return NULL; \
1188 }
1189
1190#define CHECK_INITIALIZED_INT(self) \
1191 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001192 if (self->detached) { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "underlying buffer has been detached"); \
1195 } else { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on uninitialized object"); \
1198 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 return -1; \
1200 }
1201
1202
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001203static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001204textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001205{
1206 PyObject *buffer, *res;
1207 CHECK_INITIALIZED(self);
1208 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1209 if (res == NULL)
1210 return NULL;
1211 Py_DECREF(res);
1212 buffer = self->buffer;
1213 self->buffer = NULL;
1214 self->detached = 1;
1215 self->ok = 0;
1216 return buffer;
1217}
1218
Antoine Pitrou24f36292009-03-28 22:16:42 +00001219/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001220 underlying buffered object, though. */
1221static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001222_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001223{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001224 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225
1226 if (self->pending_bytes == NULL)
1227 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001228
1229 pending = self->pending_bytes;
1230 Py_INCREF(pending);
1231 self->pending_bytes_count = 0;
1232 Py_CLEAR(self->pending_bytes);
1233
1234 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1235 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001236 if (b == NULL)
1237 return -1;
1238 ret = PyObject_CallMethodObjArgs(self->buffer,
1239 _PyIO_str_write, b, NULL);
1240 Py_DECREF(b);
1241 if (ret == NULL)
1242 return -1;
1243 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001244 return 0;
1245}
1246
1247static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001248textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001249{
1250 PyObject *ret;
1251 PyObject *text; /* owned reference */
1252 PyObject *b;
1253 Py_ssize_t textlen;
1254 int haslf = 0;
1255 int needflush = 0;
1256
1257 CHECK_INITIALIZED(self);
1258
1259 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1260 return NULL;
1261 }
1262
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001263 if (PyUnicode_READY(text) == -1)
1264 return NULL;
1265
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 CHECK_CLOSED(self);
1267
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001268 if (self->encoder == NULL)
1269 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001270
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 Py_INCREF(text);
1272
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001273 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274
1275 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001276 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001277 haslf = 1;
1278
1279 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001280 PyObject *newtext = _PyObject_CallMethodId(
1281 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282 Py_DECREF(text);
1283 if (newtext == NULL)
1284 return NULL;
1285 text = newtext;
1286 }
1287
Antoine Pitroue96ec682011-07-23 21:46:35 +02001288 if (self->write_through)
1289 needflush = 1;
1290 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001292 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 needflush = 1;
1294
1295 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001296 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001298 self->encoding_start_of_stream = 0;
1299 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300 else
1301 b = PyObject_CallMethodObjArgs(self->encoder,
1302 _PyIO_str_encode, text, NULL);
1303 Py_DECREF(text);
1304 if (b == NULL)
1305 return NULL;
1306
1307 if (self->pending_bytes == NULL) {
1308 self->pending_bytes = PyList_New(0);
1309 if (self->pending_bytes == NULL) {
1310 Py_DECREF(b);
1311 return NULL;
1312 }
1313 self->pending_bytes_count = 0;
1314 }
1315 if (PyList_Append(self->pending_bytes, b) < 0) {
1316 Py_DECREF(b);
1317 return NULL;
1318 }
1319 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1320 Py_DECREF(b);
1321 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001322 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 return NULL;
1324 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001325
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 if (needflush) {
1327 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1328 if (ret == NULL)
1329 return NULL;
1330 Py_DECREF(ret);
1331 }
1332
1333 Py_CLEAR(self->snapshot);
1334
1335 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001336 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001337 if (ret == NULL)
1338 return NULL;
1339 Py_DECREF(ret);
1340 }
1341
1342 return PyLong_FromSsize_t(textlen);
1343}
1344
1345/* Steal a reference to chars and store it in the decoded_char buffer;
1346 */
1347static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001348textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349{
1350 Py_CLEAR(self->decoded_chars);
1351 self->decoded_chars = chars;
1352 self->decoded_chars_used = 0;
1353}
1354
1355static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001356textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001357{
1358 PyObject *chars;
1359 Py_ssize_t avail;
1360
1361 if (self->decoded_chars == NULL)
1362 return PyUnicode_FromStringAndSize(NULL, 0);
1363
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001364 /* decoded_chars is guaranteed to be "ready". */
1365 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366 - self->decoded_chars_used);
1367
1368 assert(avail >= 0);
1369
1370 if (n < 0 || n > avail)
1371 n = avail;
1372
1373 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001374 chars = PyUnicode_Substring(self->decoded_chars,
1375 self->decoded_chars_used,
1376 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001377 if (chars == NULL)
1378 return NULL;
1379 }
1380 else {
1381 chars = self->decoded_chars;
1382 Py_INCREF(chars);
1383 }
1384
1385 self->decoded_chars_used += n;
1386 return chars;
1387}
1388
1389/* Read and decode the next chunk of data from the BufferedReader.
1390 */
1391static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001392textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001393{
1394 PyObject *dec_buffer = NULL;
1395 PyObject *dec_flags = NULL;
1396 PyObject *input_chunk = NULL;
1397 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001398 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001399 int eof;
1400
1401 /* The return value is True unless EOF was reached. The decoded string is
1402 * placed in self._decoded_chars (replacing its previous value). The
1403 * entire input chunk is sent to the decoder, though some of it may remain
1404 * buffered in the decoder, yet to be converted.
1405 */
1406
1407 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001408 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001409 return -1;
1410 }
1411
1412 if (self->telling) {
1413 /* To prepare for tell(), we need to snapshot a point in the file
1414 * where the decoder's input buffer is empty.
1415 */
1416
1417 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1418 _PyIO_str_getstate, NULL);
1419 if (state == NULL)
1420 return -1;
1421 /* Given this, we know there was a valid snapshot point
1422 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1423 */
1424 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1425 Py_DECREF(state);
1426 return -1;
1427 }
1428 Py_INCREF(dec_buffer);
1429 Py_INCREF(dec_flags);
1430 Py_DECREF(state);
1431 }
1432
1433 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1434 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1435 if (chunk_size == NULL)
1436 goto fail;
1437 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001438 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1439 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440 Py_DECREF(chunk_size);
1441 if (input_chunk == NULL)
1442 goto fail;
1443 assert(PyBytes_Check(input_chunk));
1444
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001445 nbytes = PyBytes_Size(input_chunk);
1446 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447
1448 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1449 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1450 self->decoder, input_chunk, eof);
1451 }
1452 else {
1453 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1454 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1455 }
1456
1457 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1458 if (decoded_chars == NULL)
1459 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001460 if (PyUnicode_READY(decoded_chars) == -1)
1461 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001462 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001463 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001464 if (nchars > 0)
1465 self->b2cratio = (double) nbytes / nchars;
1466 else
1467 self->b2cratio = 0.0;
1468 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001469 eof = 0;
1470
1471 if (self->telling) {
1472 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1473 * next input to be decoded is dec_buffer + input_chunk.
1474 */
1475 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1476 if (next_input == NULL)
1477 goto fail;
1478 assert (PyBytes_Check(next_input));
1479 Py_DECREF(dec_buffer);
1480 Py_CLEAR(self->snapshot);
1481 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1482 }
1483 Py_DECREF(input_chunk);
1484
1485 return (eof == 0);
1486
1487 fail:
1488 Py_XDECREF(dec_buffer);
1489 Py_XDECREF(dec_flags);
1490 Py_XDECREF(input_chunk);
1491 return -1;
1492}
1493
1494static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001495textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001496{
1497 Py_ssize_t n = -1;
1498 PyObject *result = NULL, *chunks = NULL;
1499
1500 CHECK_INITIALIZED(self);
1501
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001502 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001503 return NULL;
1504
1505 CHECK_CLOSED(self);
1506
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001507 if (self->decoder == NULL)
1508 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001509
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511 return NULL;
1512
1513 if (n < 0) {
1514 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001515 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 PyObject *decoded;
1517 if (bytes == NULL)
1518 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001519
1520 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1521 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1522 bytes, 1);
1523 else
1524 decoded = PyObject_CallMethodObjArgs(
1525 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 Py_DECREF(bytes);
1527 if (decoded == NULL)
1528 goto fail;
1529
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001530 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531
1532 if (result == NULL) {
1533 Py_DECREF(decoded);
1534 return NULL;
1535 }
1536
1537 PyUnicode_AppendAndDel(&result, decoded);
1538 if (result == NULL)
1539 goto fail;
1540
1541 Py_CLEAR(self->snapshot);
1542 return result;
1543 }
1544 else {
1545 int res = 1;
1546 Py_ssize_t remaining = n;
1547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 if (result == NULL)
1550 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001551 if (PyUnicode_READY(result) == -1)
1552 goto fail;
1553 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554
1555 /* Keep reading chunks until we have n characters to return */
1556 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001557 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001558 if (res < 0)
1559 goto fail;
1560 if (res == 0) /* EOF */
1561 break;
1562 if (chunks == NULL) {
1563 chunks = PyList_New(0);
1564 if (chunks == NULL)
1565 goto fail;
1566 }
1567 if (PyList_Append(chunks, result) < 0)
1568 goto fail;
1569 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001570 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 if (result == NULL)
1572 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001573 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 }
1575 if (chunks != NULL) {
1576 if (result != NULL && PyList_Append(chunks, result) < 0)
1577 goto fail;
1578 Py_CLEAR(result);
1579 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1580 if (result == NULL)
1581 goto fail;
1582 Py_CLEAR(chunks);
1583 }
1584 return result;
1585 }
1586 fail:
1587 Py_XDECREF(result);
1588 Py_XDECREF(chunks);
1589 return NULL;
1590}
1591
1592
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001593/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001594 that is to the NUL character. Otherwise the function will produce
1595 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001596static char *
1597find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001598{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001600 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001601 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001602 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 return s;
1604 if (s == end)
1605 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001606 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001607 }
1608}
1609
1610Py_ssize_t
1611_PyIO_find_line_ending(
1612 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001613 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001615 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616
1617 if (translated) {
1618 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001619 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001621 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001622 else {
1623 *consumed = len;
1624 return -1;
1625 }
1626 }
1627 else if (universal) {
1628 /* Universal newline search. Find any of \r, \r\n, \n
1629 * The decoder ensures that \r\n are not split in two pieces
1630 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001631 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001633 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001634 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001635 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001636 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001637 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 if (s >= end) {
1639 *consumed = len;
1640 return -1;
1641 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001643 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001645 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001647 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001648 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001650 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001651 }
1652 }
1653 }
1654 else {
1655 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001656 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1657 char *nl = PyUnicode_DATA(readnl);
1658 /* Assume that readnl is an ASCII character. */
1659 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001661 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001663 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664 *consumed = len;
1665 return -1;
1666 }
1667 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001669 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001670 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 if (e < s)
1672 e = s;
1673 while (s < e) {
1674 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001675 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 if (pos == NULL || pos >= e)
1677 break;
1678 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001679 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 break;
1681 }
1682 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001683 return (pos - start)/kind + readnl_len;
1684 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 if (pos == NULL)
1688 *consumed = len;
1689 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001690 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 return -1;
1692 }
1693 }
1694}
1695
1696static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001697_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698{
1699 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1700 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1701 int res;
1702
1703 CHECK_CLOSED(self);
1704
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001705 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 return NULL;
1707
1708 chunked = 0;
1709
1710 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001711 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001713 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 Py_ssize_t consumed = 0;
1715
1716 /* First, get some data if necessary */
1717 res = 1;
1718 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001719 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001720 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (res < 0)
1722 goto error;
1723 if (res == 0)
1724 break;
1725 }
1726 if (res == 0) {
1727 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001728 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 Py_CLEAR(self->snapshot);
1730 start = endpos = offset_to_buffer = 0;
1731 break;
1732 }
1733
1734 if (remaining == NULL) {
1735 line = self->decoded_chars;
1736 start = self->decoded_chars_used;
1737 offset_to_buffer = 0;
1738 Py_INCREF(line);
1739 }
1740 else {
1741 assert(self->decoded_chars_used == 0);
1742 line = PyUnicode_Concat(remaining, self->decoded_chars);
1743 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001744 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 Py_CLEAR(remaining);
1746 if (line == NULL)
1747 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001748 if (PyUnicode_READY(line) == -1)
1749 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 }
1751
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001752 ptr = PyUnicode_DATA(line);
1753 line_len = PyUnicode_GET_LENGTH(line);
1754 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755
1756 endpos = _PyIO_find_line_ending(
1757 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001758 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001759 ptr + kind * start,
1760 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 if (endpos >= 0) {
1763 endpos += start;
1764 if (limit >= 0 && (endpos - start) + chunked >= limit)
1765 endpos = start + limit - chunked;
1766 break;
1767 }
1768
1769 /* We can put aside up to `endpos` */
1770 endpos = consumed + start;
1771 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1772 /* Didn't find line ending, but reached length limit */
1773 endpos = start + limit - chunked;
1774 break;
1775 }
1776
1777 if (endpos > start) {
1778 /* No line ending seen yet - put aside current data */
1779 PyObject *s;
1780 if (chunks == NULL) {
1781 chunks = PyList_New(0);
1782 if (chunks == NULL)
1783 goto error;
1784 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001785 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001786 if (s == NULL)
1787 goto error;
1788 if (PyList_Append(chunks, s) < 0) {
1789 Py_DECREF(s);
1790 goto error;
1791 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001792 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 Py_DECREF(s);
1794 }
1795 /* There may be some remaining bytes we'll have to prepend to the
1796 next chunk of data */
1797 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001798 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001799 if (remaining == NULL)
1800 goto error;
1801 }
1802 Py_CLEAR(line);
1803 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001804 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 }
1806
1807 if (line != NULL) {
1808 /* Our line ends in the current buffer */
1809 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001810 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1811 PyObject *s = PyUnicode_Substring(line, start, endpos);
1812 Py_CLEAR(line);
1813 if (s == NULL)
1814 goto error;
1815 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001816 }
1817 }
1818 if (remaining != NULL) {
1819 if (chunks == NULL) {
1820 chunks = PyList_New(0);
1821 if (chunks == NULL)
1822 goto error;
1823 }
1824 if (PyList_Append(chunks, remaining) < 0)
1825 goto error;
1826 Py_CLEAR(remaining);
1827 }
1828 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001829 if (line != NULL) {
1830 if (PyList_Append(chunks, line) < 0)
1831 goto error;
1832 Py_DECREF(line);
1833 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1835 if (line == NULL)
1836 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001837 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001839 if (line == NULL) {
1840 Py_INCREF(_PyIO_empty_str);
1841 line = _PyIO_empty_str;
1842 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843
1844 return line;
1845
1846 error:
1847 Py_XDECREF(chunks);
1848 Py_XDECREF(remaining);
1849 Py_XDECREF(line);
1850 return NULL;
1851}
1852
1853static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001854textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001855{
1856 Py_ssize_t limit = -1;
1857
1858 CHECK_INITIALIZED(self);
1859 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1860 return NULL;
1861 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001862 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863}
1864
1865/* Seek and Tell */
1866
1867typedef struct {
1868 Py_off_t start_pos;
1869 int dec_flags;
1870 int bytes_to_feed;
1871 int chars_to_skip;
1872 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001873} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001874
1875/*
1876 To speed up cookie packing/unpacking, we store the fields in a temporary
1877 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1878 The following macros define at which offsets in the intermediary byte
1879 string the various CookieStruct fields will be stored.
1880 */
1881
1882#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1883
1884#if defined(WORDS_BIGENDIAN)
1885
1886# define IS_LITTLE_ENDIAN 0
1887
1888/* We want the least significant byte of start_pos to also be the least
1889 significant byte of the cookie, which means that in big-endian mode we
1890 must copy the fields in reverse order. */
1891
1892# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1893# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1894# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1895# define OFF_CHARS_TO_SKIP (sizeof(char))
1896# define OFF_NEED_EOF 0
1897
1898#else
1899
1900# define IS_LITTLE_ENDIAN 1
1901
1902/* Little-endian mode: the least significant byte of start_pos will
1903 naturally end up the least significant byte of the cookie. */
1904
1905# define OFF_START_POS 0
1906# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1907# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1908# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1909# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1910
1911#endif
1912
1913static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001914textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915{
1916 unsigned char buffer[COOKIE_BUF_LEN];
1917 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1918 if (cookieLong == NULL)
1919 return -1;
1920
1921 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1922 IS_LITTLE_ENDIAN, 0) < 0) {
1923 Py_DECREF(cookieLong);
1924 return -1;
1925 }
1926 Py_DECREF(cookieLong);
1927
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001928 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1929 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1930 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1931 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1932 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001933
1934 return 0;
1935}
1936
1937static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001938textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001939{
1940 unsigned char buffer[COOKIE_BUF_LEN];
1941
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001942 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1943 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1944 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1945 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1946 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947
1948 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1949}
1950#undef IS_LITTLE_ENDIAN
1951
1952static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001953_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954{
1955 PyObject *res;
1956 /* When seeking to the start of the stream, we call decoder.reset()
1957 rather than decoder.getstate().
1958 This is for a few decoders such as utf-16 for which the state value
1959 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1960 utf-16, that we are expecting a BOM).
1961 */
1962 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1963 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1964 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001965 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1966 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001967 if (res == NULL)
1968 return -1;
1969 Py_DECREF(res);
1970 return 0;
1971}
1972
Antoine Pitroue4501852009-05-14 18:55:55 +00001973static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001974_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001975{
1976 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001977 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001978 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1979 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1980 self->encoding_start_of_stream = 1;
1981 }
1982 else {
1983 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1984 _PyIO_zero, NULL);
1985 self->encoding_start_of_stream = 0;
1986 }
1987 if (res == NULL)
1988 return -1;
1989 Py_DECREF(res);
1990 return 0;
1991}
1992
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001994textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995{
1996 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001997 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 PyObject *res;
2000 int cmp;
2001
2002 CHECK_INITIALIZED(self);
2003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2005 return NULL;
2006 CHECK_CLOSED(self);
2007
2008 Py_INCREF(cookieObj);
2009
2010 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002011 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002012 goto fail;
2013 }
2014
2015 if (whence == 1) {
2016 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002017 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002018 if (cmp < 0)
2019 goto fail;
2020
2021 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002022 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 goto fail;
2024 }
2025
2026 /* Seeking to the current position should attempt to
2027 * sync the underlying buffer with the current position.
2028 */
2029 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002030 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 if (cookieObj == NULL)
2032 goto fail;
2033 }
2034 else if (whence == 2) {
2035 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002036 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002037 if (cmp < 0)
2038 goto fail;
2039
2040 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002041 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042 goto fail;
2043 }
2044
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002045 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046 if (res == NULL)
2047 goto fail;
2048 Py_DECREF(res);
2049
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002050 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 Py_CLEAR(self->snapshot);
2052 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002053 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if (res == NULL)
2055 goto fail;
2056 Py_DECREF(res);
2057 }
2058
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002059 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060 Py_XDECREF(cookieObj);
2061 return res;
2062 }
2063 else if (whence != 0) {
2064 PyErr_Format(PyExc_ValueError,
2065 "invalid whence (%d, should be 0, 1 or 2)", whence);
2066 goto fail;
2067 }
2068
Antoine Pitroue4501852009-05-14 18:55:55 +00002069 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070 if (cmp < 0)
2071 goto fail;
2072
2073 if (cmp == 1) {
2074 PyErr_Format(PyExc_ValueError,
2075 "negative seek position %R", cookieObj);
2076 goto fail;
2077 }
2078
2079 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2080 if (res == NULL)
2081 goto fail;
2082 Py_DECREF(res);
2083
2084 /* The strategy of seek() is to go back to the safe start point
2085 * and replay the effect of read(chars_to_skip) from there.
2086 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002087 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002088 goto fail;
2089
2090 /* Seek back to the safe start point. */
2091 posobj = PyLong_FromOff_t(cookie.start_pos);
2092 if (posobj == NULL)
2093 goto fail;
2094 res = PyObject_CallMethodObjArgs(self->buffer,
2095 _PyIO_str_seek, posobj, NULL);
2096 Py_DECREF(posobj);
2097 if (res == NULL)
2098 goto fail;
2099 Py_DECREF(res);
2100
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002101 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 Py_CLEAR(self->snapshot);
2103
2104 /* Restore the decoder to its state from the safe start point. */
2105 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002106 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 goto fail;
2108 }
2109
2110 if (cookie.chars_to_skip) {
2111 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002112 PyObject *input_chunk = _PyObject_CallMethodId(
2113 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 PyObject *decoded;
2115
2116 if (input_chunk == NULL)
2117 goto fail;
2118
2119 assert (PyBytes_Check(input_chunk));
2120
2121 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2122 if (self->snapshot == NULL) {
2123 Py_DECREF(input_chunk);
2124 goto fail;
2125 }
2126
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002127 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2128 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129
2130 if (decoded == NULL)
2131 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002132 if (PyUnicode_READY(decoded) == -1) {
2133 Py_DECREF(decoded);
2134 goto fail;
2135 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002137 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138
2139 /* Skip chars_to_skip of the decoded characters. */
2140 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2141 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2142 goto fail;
2143 }
2144 self->decoded_chars_used = cookie.chars_to_skip;
2145 }
2146 else {
2147 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2148 if (self->snapshot == NULL)
2149 goto fail;
2150 }
2151
Antoine Pitroue4501852009-05-14 18:55:55 +00002152 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2153 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002154 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002155 goto fail;
2156 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 return cookieObj;
2158 fail:
2159 Py_XDECREF(cookieObj);
2160 return NULL;
2161
2162}
2163
2164static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002165textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166{
2167 PyObject *res;
2168 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002169 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 PyObject *next_input;
2171 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002172 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 PyObject *saved_state = NULL;
2174 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002175 char *dec_buffer;
2176 Py_ssize_t dec_buffer_len;
2177 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002178
2179 CHECK_INITIALIZED(self);
2180 CHECK_CLOSED(self);
2181
2182 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002183 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002184 goto fail;
2185 }
2186 if (!self->telling) {
2187 PyErr_SetString(PyExc_IOError,
2188 "telling position disabled by next() call");
2189 goto fail;
2190 }
2191
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002192 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002194 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195 if (res == NULL)
2196 goto fail;
2197 Py_DECREF(res);
2198
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002199 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200 if (posobj == NULL)
2201 goto fail;
2202
2203 if (self->decoder == NULL || self->snapshot == NULL) {
2204 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2205 return posobj;
2206 }
2207
2208#if defined(HAVE_LARGEFILE_SUPPORT)
2209 cookie.start_pos = PyLong_AsLongLong(posobj);
2210#else
2211 cookie.start_pos = PyLong_AsLong(posobj);
2212#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002213 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214 if (PyErr_Occurred())
2215 goto fail;
2216
2217 /* Skip backward to the snapshot point (see _read_chunk). */
2218 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2219 goto fail;
2220
2221 assert (PyBytes_Check(next_input));
2222
2223 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2224
2225 /* How many decoded characters have been used up since the snapshot? */
2226 if (self->decoded_chars_used == 0) {
2227 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002228 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 }
2230
2231 chars_to_skip = self->decoded_chars_used;
2232
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002233 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002234 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2235 _PyIO_str_getstate, NULL);
2236 if (saved_state == NULL)
2237 goto fail;
2238
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002239#define DECODER_GETSTATE() do { \
2240 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2241 _PyIO_str_getstate, NULL); \
2242 if (_state == NULL) \
2243 goto fail; \
2244 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2245 Py_DECREF(_state); \
2246 goto fail; \
2247 } \
2248 Py_DECREF(_state); \
2249 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002250
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002251 /* TODO: replace assert with exception */
2252#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002253 PyObject *_decoded = _PyObject_CallMethodId( \
2254 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002255 if (_decoded == NULL) \
2256 goto fail; \
2257 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002258 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002259 Py_DECREF(_decoded); \
2260 } while (0)
2261
2262 /* Fast search for an acceptable start point, close to our
2263 current pos */
2264 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2265 skip_back = 1;
2266 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2267 input = PyBytes_AS_STRING(next_input);
2268 while (skip_bytes > 0) {
2269 /* Decode up to temptative start point */
2270 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2271 goto fail;
2272 DECODER_DECODE(input, skip_bytes, chars_decoded);
2273 if (chars_decoded <= chars_to_skip) {
2274 DECODER_GETSTATE();
2275 if (dec_buffer_len == 0) {
2276 /* Before pos and no bytes buffered in decoder => OK */
2277 cookie.dec_flags = dec_flags;
2278 chars_to_skip -= chars_decoded;
2279 break;
2280 }
2281 /* Skip back by buffered amount and reset heuristic */
2282 skip_bytes -= dec_buffer_len;
2283 skip_back = 1;
2284 }
2285 else {
2286 /* We're too far ahead, skip back a bit */
2287 skip_bytes -= skip_back;
2288 skip_back *= 2;
2289 }
2290 }
2291 if (skip_bytes <= 0) {
2292 skip_bytes = 0;
2293 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2294 goto fail;
2295 }
2296
2297 /* Note our initial start point. */
2298 cookie.start_pos += skip_bytes;
2299 cookie.chars_to_skip = chars_to_skip;
2300 if (chars_to_skip == 0)
2301 goto finally;
2302
2303 /* We should be close to the desired position. Now feed the decoder one
2304 * byte at a time until we reach the `chars_to_skip` target.
2305 * As we go, note the nearest "safe start point" before the current
2306 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002307 * can safely start from there and advance to this location).
2308 */
2309 chars_decoded = 0;
2310 input = PyBytes_AS_STRING(next_input);
2311 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002312 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002313 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002314 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002315
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002316 DECODER_DECODE(input, 1, n);
2317 /* We got n chars for 1 byte */
2318 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002319 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002320 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002321
2322 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2323 /* Decoder buffer is empty, so this is a safe start point. */
2324 cookie.start_pos += cookie.bytes_to_feed;
2325 chars_to_skip -= chars_decoded;
2326 cookie.dec_flags = dec_flags;
2327 cookie.bytes_to_feed = 0;
2328 chars_decoded = 0;
2329 }
2330 if (chars_decoded >= chars_to_skip)
2331 break;
2332 input++;
2333 }
2334 if (input == input_end) {
2335 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002336 PyObject *decoded = _PyObject_CallMethodId(
2337 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002338 if (decoded == NULL)
2339 goto fail;
2340 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002341 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002342 Py_DECREF(decoded);
2343 cookie.need_eof = 1;
2344
2345 if (chars_decoded < chars_to_skip) {
2346 PyErr_SetString(PyExc_IOError,
2347 "can't reconstruct logical file position");
2348 goto fail;
2349 }
2350 }
2351
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002352finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002353 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002354 Py_DECREF(saved_state);
2355 if (res == NULL)
2356 return NULL;
2357 Py_DECREF(res);
2358
2359 /* The returned cookie corresponds to the last safe start point. */
2360 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002361 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002362
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002363fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 if (saved_state) {
2365 PyObject *type, *value, *traceback;
2366 PyErr_Fetch(&type, &value, &traceback);
2367
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002368 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 Py_DECREF(saved_state);
2370 if (res == NULL)
2371 return NULL;
2372 Py_DECREF(res);
2373
2374 PyErr_Restore(type, value, traceback);
2375 }
2376 return NULL;
2377}
2378
2379static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002380textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002381{
2382 PyObject *pos = Py_None;
2383 PyObject *res;
2384
2385 CHECK_INITIALIZED(self)
2386 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2387 return NULL;
2388 }
2389
2390 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2391 if (res == NULL)
2392 return NULL;
2393 Py_DECREF(res);
2394
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002395 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396}
2397
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002398static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002399textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002400{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002401 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002402
2403 CHECK_INITIALIZED(self);
2404
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002405 res = PyUnicode_FromString("<_io.TextIOWrapper");
2406 if (res == NULL)
2407 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002408 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002409 if (nameobj == NULL) {
2410 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2411 PyErr_Clear();
2412 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002413 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002414 }
2415 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002416 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002417 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002418 if (s == NULL)
2419 goto error;
2420 PyUnicode_AppendAndDel(&res, s);
2421 if (res == NULL)
2422 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002423 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002424 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002425 if (modeobj == NULL) {
2426 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2427 PyErr_Clear();
2428 else
2429 goto error;
2430 }
2431 else {
2432 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2433 Py_DECREF(modeobj);
2434 if (s == NULL)
2435 goto error;
2436 PyUnicode_AppendAndDel(&res, s);
2437 if (res == NULL)
2438 return NULL;
2439 }
2440 s = PyUnicode_FromFormat("%U encoding=%R>",
2441 res, self->encoding);
2442 Py_DECREF(res);
2443 return s;
2444error:
2445 Py_XDECREF(res);
2446 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002447}
2448
2449
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450/* Inquiries */
2451
2452static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002453textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002454{
2455 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002456 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457}
2458
2459static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002460textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461{
2462 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002463 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464}
2465
2466static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002467textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468{
2469 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002470 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471}
2472
2473static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002474textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475{
2476 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002477 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478}
2479
2480static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002481textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482{
2483 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002484 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485}
2486
2487static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002488textiowrapper_getstate(textio *self, PyObject *args)
2489{
2490 PyErr_Format(PyExc_TypeError,
2491 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2492 return NULL;
2493}
2494
2495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497{
2498 CHECK_INITIALIZED(self);
2499 CHECK_CLOSED(self);
2500 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002501 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002502 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002503 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504}
2505
2506static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002507textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508{
2509 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002510 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512
Antoine Pitrou6be88762010-05-03 16:48:20 +00002513 res = textiowrapper_closed_get(self, NULL);
2514 if (res == NULL)
2515 return NULL;
2516 r = PyObject_IsTrue(res);
2517 Py_DECREF(res);
2518 if (r < 0)
2519 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002520
Antoine Pitrou6be88762010-05-03 16:48:20 +00002521 if (r > 0) {
2522 Py_RETURN_NONE; /* stream already closed */
2523 }
2524 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002525 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002526 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002527 if (res)
2528 Py_DECREF(res);
2529 else
2530 PyErr_Clear();
2531 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002532 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002533 if (res == NULL) {
2534 return NULL;
2535 }
2536 else
2537 Py_DECREF(res);
2538
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002539 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002540 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541}
2542
2543static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002544textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002545{
2546 PyObject *line;
2547
2548 CHECK_INITIALIZED(self);
2549
2550 self->telling = 0;
2551 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2552 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002553 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554 }
2555 else {
2556 line = PyObject_CallMethodObjArgs((PyObject *)self,
2557 _PyIO_str_readline, NULL);
2558 if (line && !PyUnicode_Check(line)) {
2559 PyErr_Format(PyExc_IOError,
2560 "readline() should have returned an str object, "
2561 "not '%.200s'", Py_TYPE(line)->tp_name);
2562 Py_DECREF(line);
2563 return NULL;
2564 }
2565 }
2566
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002567 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568 return NULL;
2569
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002570 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002571 /* Reached EOF or would have blocked */
2572 Py_DECREF(line);
2573 Py_CLEAR(self->snapshot);
2574 self->telling = self->seekable;
2575 return NULL;
2576 }
2577
2578 return line;
2579}
2580
2581static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002582textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583{
2584 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002585 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586}
2587
2588static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002589textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590{
2591 CHECK_INITIALIZED(self);
2592 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2593}
2594
2595static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002596textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597{
2598 PyObject *res;
2599 CHECK_INITIALIZED(self);
2600 if (self->decoder == NULL)
2601 Py_RETURN_NONE;
2602 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2603 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002604 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2605 PyErr_Clear();
2606 Py_RETURN_NONE;
2607 }
2608 else {
2609 return NULL;
2610 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002611 }
2612 return res;
2613}
2614
2615static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002616textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002617{
2618 CHECK_INITIALIZED(self);
2619 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2620}
2621
2622static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002623textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624{
2625 CHECK_INITIALIZED(self);
2626 return PyLong_FromSsize_t(self->chunk_size);
2627}
2628
2629static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002630textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002631{
2632 Py_ssize_t n;
2633 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002634 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002635 if (n == -1 && PyErr_Occurred())
2636 return -1;
2637 if (n <= 0) {
2638 PyErr_SetString(PyExc_ValueError,
2639 "a strictly positive integer is required");
2640 return -1;
2641 }
2642 self->chunk_size = n;
2643 return 0;
2644}
2645
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002646static PyMethodDef textiowrapper_methods[] = {
2647 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2648 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2649 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2650 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2651 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2652 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002654 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2655 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2656 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2657 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2658 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002659 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2662 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2663 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 {NULL, NULL}
2665};
2666
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002667static PyMemberDef textiowrapper_members[] = {
2668 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2669 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2670 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002671 {NULL}
2672};
2673
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674static PyGetSetDef textiowrapper_getset[] = {
2675 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2676 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2678*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002679 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2680 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2681 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2682 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002683 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002684};
2685
2686PyTypeObject PyTextIOWrapper_Type = {
2687 PyVarObject_HEAD_INIT(NULL, 0)
2688 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002691 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002692 0, /*tp_print*/
2693 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002694 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002695 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697 0, /*tp_as_number*/
2698 0, /*tp_as_sequence*/
2699 0, /*tp_as_mapping*/
2700 0, /*tp_hash */
2701 0, /*tp_call*/
2702 0, /*tp_str*/
2703 0, /*tp_getattro*/
2704 0, /*tp_setattro*/
2705 0, /*tp_as_buffer*/
2706 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2707 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002708 textiowrapper_doc, /* tp_doc */
2709 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2710 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002711 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002712 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002713 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002714 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2715 textiowrapper_methods, /* tp_methods */
2716 textiowrapper_members, /* tp_members */
2717 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718 0, /* tp_base */
2719 0, /* tp_dict */
2720 0, /* tp_descr_get */
2721 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002722 offsetof(textio, dict), /*tp_dictoffset*/
2723 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724 0, /* tp_alloc */
2725 PyType_GenericNew, /* tp_new */
2726};