blob: 590a9e662c6209d35fc9ed9ddc9314c8dd2ba37f [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
17_Py_IDENTIFIER(device_encoding);
18_Py_IDENTIFIER(fileno);
19_Py_IDENTIFIER(flush);
20_Py_IDENTIFIER(getpreferredencoding);
21_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020022_Py_IDENTIFIER(mode);
23_Py_IDENTIFIER(name);
24_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020026_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(readable);
28_Py_IDENTIFIER(replace);
29_Py_IDENTIFIER(reset);
30_Py_IDENTIFIER(seek);
31_Py_IDENTIFIER(seekable);
32_Py_IDENTIFIER(setstate);
33_Py_IDENTIFIER(tell);
34_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036/* TextIOBase */
37
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000038PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable. There is no public constructor.\n"
44 );
45
46static PyObject *
47_unsupported(const char *message)
48{
49 PyErr_SetString(IO_STATE->unsupported_operation, message);
50 return NULL;
51}
52
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000053PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000054 "Separate the underlying buffer from the TextIOBase and return it.\n"
55 "\n"
56 "After the underlying buffer has been detached, the TextIO is in an\n"
57 "unusable state.\n"
58 );
59
60static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062{
63 return _unsupported("detach");
64}
65
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000066PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 "Read at most n characters from stream.\n"
68 "\n"
69 "Read from underlying buffer until we have n characters or we hit EOF.\n"
70 "If n is negative or omitted, read until EOF.\n"
71 );
72
73static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075{
76 return _unsupported("read");
77}
78
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000079PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080 "Read until newline or EOF.\n"
81 "\n"
82 "Returns an empty string if EOF is hit immediately.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("readline");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Write string to stream.\n"
93 "Returns the number of characters written (which is always equal to\n"
94 "the length of the string).\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("write");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Encoding of the text stream.\n"
105 "\n"
106 "Subclasses should override.\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 Py_RETURN_NONE;
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Line endings translated so far.\n"
117 "\n"
118 "Only line endings translated during reading are considered.\n"
119 "\n"
120 "Subclasses should override.\n"
121 );
122
123static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125{
126 Py_RETURN_NONE;
127}
128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000129PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000130 "The error setting of the decoder or encoder.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000142static PyMethodDef textiobase_methods[] = {
143 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
144 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
145 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
146 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 {NULL, NULL}
148};
149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyGetSetDef textiobase_getset[] = {
151 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
152 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
153 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000154 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155};
156
157PyTypeObject PyTextIOBase_Type = {
158 PyVarObject_HEAD_INIT(NULL, 0)
159 "_io._TextIOBase", /*tp_name*/
160 0, /*tp_basicsize*/
161 0, /*tp_itemsize*/
162 0, /*tp_dealloc*/
163 0, /*tp_print*/
164 0, /*tp_getattr*/
165 0, /*tp_setattr*/
166 0, /*tp_compare */
167 0, /*tp_repr*/
168 0, /*tp_as_number*/
169 0, /*tp_as_sequence*/
170 0, /*tp_as_mapping*/
171 0, /*tp_hash */
172 0, /*tp_call*/
173 0, /*tp_str*/
174 0, /*tp_getattro*/
175 0, /*tp_setattro*/
176 0, /*tp_as_buffer*/
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
196};
197
198
199/* IncrementalNewlineDecoder */
200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 "Codec used when reading a file in universal newlines mode. It wraps\n"
203 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
204 "records the types of newlines encountered. When used with\n"
205 "translate=False, it ensures that the newline sequence is returned in\n"
206 "one piece. When used with decoder=None, it expects unicode strings as\n"
207 "decode input and translates newlines without first invoking an external\n"
208 "decoder.\n"
209 );
210
211typedef struct {
212 PyObject_HEAD
213 PyObject *decoder;
214 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000215 signed int pendingcr: 1;
216 signed int translate: 1;
217 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000218} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219
220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000221incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 PyObject *args, PyObject *kwds)
223{
224 PyObject *decoder;
225 int translate;
226 PyObject *errors = NULL;
227 char *kwlist[] = {"decoder", "translate", "errors", NULL};
228
229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
230 kwlist, &decoder, &translate, &errors))
231 return -1;
232
233 self->decoder = decoder;
234 Py_INCREF(decoder);
235
236 if (errors == NULL) {
237 self->errors = PyUnicode_FromString("strict");
238 if (self->errors == NULL)
239 return -1;
240 }
241 else {
242 Py_INCREF(errors);
243 self->errors = errors;
244 }
245
246 self->translate = translate;
247 self->seennl = 0;
248 self->pendingcr = 0;
249
250 return 0;
251}
252
253static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000254incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255{
256 Py_CLEAR(self->decoder);
257 Py_CLEAR(self->errors);
258 Py_TYPE(self)->tp_free((PyObject *)self);
259}
260
261#define SEEN_CR 1
262#define SEEN_LF 2
263#define SEEN_CRLF 4
264#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
265
266PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000267_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 PyObject *input, int final)
269{
270 PyObject *output;
271 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273
274 if (self->decoder == NULL) {
275 PyErr_SetString(PyExc_ValueError,
276 "IncrementalNewlineDecoder.__init__ not called");
277 return NULL;
278 }
279
280 /* decode input (with the eventual \r from a previous pass) */
281 if (self->decoder != Py_None) {
282 output = PyObject_CallMethodObjArgs(self->decoder,
283 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
284 }
285 else {
286 output = input;
287 Py_INCREF(output);
288 }
289
290 if (output == NULL)
291 return NULL;
292
293 if (!PyUnicode_Check(output)) {
294 PyErr_SetString(PyExc_TypeError,
295 "decoder should return a string result");
296 goto error;
297 }
298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200299 if (PyUnicode_READY(output) == -1)
300 goto error;
301
302 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200304 /* Prefix output with CR */
305 int kind;
306 PyObject *modified;
307 char *out;
308
309 modified = PyUnicode_New(output_len + 1,
310 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 if (modified == NULL)
312 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 kind = PyUnicode_KIND(modified);
314 out = PyUnicode_DATA(modified);
315 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200316 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 self->pendingcr = 0;
320 output_len++;
321 }
322
323 /* retain last \r even when not translating data:
324 * then readline() is sure to get \r\n in one pass
325 */
326 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000327 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
329 {
330 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
331 if (modified == NULL)
332 goto error;
333 Py_DECREF(output);
334 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 self->pendingcr = 1;
336 }
337 }
338
339 /* Record which newlines are read and do newline translation if desired,
340 all in one pass. */
341 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 Py_ssize_t len;
344 int seennl = self->seennl;
345 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 in_str = PyUnicode_DATA(output);
349 len = PyUnicode_GET_LENGTH(output);
350 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 if (len == 0)
353 return output;
354
355 /* If, up to now, newlines are consistently \n, do a quick check
356 for the \r *byte* with the libc's optimized memchr.
357 */
358 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200359 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 }
361
Antoine Pitrou66913e22009-03-06 23:40:56 +0000362 if (only_lf) {
363 /* If not already seen, quick scan for a possible "\n" character.
364 (there's nothing else to be done, even when in translation mode)
365 */
366 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100368 if (kind == PyUnicode_1BYTE_KIND)
369 seennl |= SEEN_LF;
370 else {
371 Py_ssize_t i = 0;
372 for (;;) {
373 Py_UCS4 c;
374 /* Fast loop for non-control characters */
375 while (PyUnicode_READ(kind, in_str, i) > '\n')
376 i++;
377 c = PyUnicode_READ(kind, in_str, i++);
378 if (c == '\n') {
379 seennl |= SEEN_LF;
380 break;
381 }
382 if (i >= len)
383 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000385 }
386 }
387 /* Finished: we have scanned for newlines, and none of them
388 need translating */
389 }
390 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200391 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000392 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 if (seennl == SEEN_ALL)
394 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398 while (PyUnicode_READ(kind, in_str, i) > '\r')
399 i++;
400 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 if (c == '\n')
402 seennl |= SEEN_LF;
403 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200404 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200406 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407 }
408 else
409 seennl |= SEEN_CR;
410 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 break;
413 if (seennl == SEEN_ALL)
414 break;
415 }
416 endscan:
417 ;
418 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 void *translated;
421 int kind = PyUnicode_KIND(output);
422 void *in_str = PyUnicode_DATA(output);
423 Py_ssize_t in, out;
424 /* XXX: Previous in-place translation here is disabled as
425 resizing is not possible anymore */
426 /* We could try to optimize this so that we only do a copy
427 when there is something to translate. On the other hand,
428 we already know there is a \r byte, so chances are high
429 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200430 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 if (translated == NULL) {
432 PyErr_NoMemory();
433 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
440 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 seennl |= SEEN_LF;
444 continue;
445 }
446 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448 in++;
449 seennl |= SEEN_CRLF;
450 }
451 else
452 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 continue;
455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 Py_DECREF(output);
461 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100462 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 if (!output)
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
466 self->seennl |= seennl;
467 }
468
469 return output;
470
471 error:
472 Py_DECREF(output);
473 return NULL;
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 PyObject *args, PyObject *kwds)
479{
480 char *kwlist[] = {"input", "final", NULL};
481 PyObject *input;
482 int final = 0;
483
484 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
485 kwlist, &input, &final))
486 return NULL;
487 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
488}
489
490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000491incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492{
493 PyObject *buffer;
494 unsigned PY_LONG_LONG flag;
495
496 if (self->decoder != Py_None) {
497 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
498 _PyIO_str_getstate, NULL);
499 if (state == NULL)
500 return NULL;
501 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
502 Py_DECREF(state);
503 return NULL;
504 }
505 Py_INCREF(buffer);
506 Py_DECREF(state);
507 }
508 else {
509 buffer = PyBytes_FromString("");
510 flag = 0;
511 }
512 flag <<= 1;
513 if (self->pendingcr)
514 flag |= 1;
515 return Py_BuildValue("NK", buffer, flag);
516}
517
518static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000519incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520{
521 PyObject *buffer;
522 unsigned PY_LONG_LONG flag;
523
524 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
525 return NULL;
526
527 self->pendingcr = (int) flag & 1;
528 flag >>= 1;
529
530 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200531 return _PyObject_CallMethodId(self->decoder,
532 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000533 else
534 Py_RETURN_NONE;
535}
536
537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000538incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000539{
540 self->seennl = 0;
541 self->pendingcr = 0;
542 if (self->decoder != Py_None)
543 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
544 else
545 Py_RETURN_NONE;
546}
547
548static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000549incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550{
551 switch (self->seennl) {
552 case SEEN_CR:
553 return PyUnicode_FromString("\r");
554 case SEEN_LF:
555 return PyUnicode_FromString("\n");
556 case SEEN_CRLF:
557 return PyUnicode_FromString("\r\n");
558 case SEEN_CR | SEEN_LF:
559 return Py_BuildValue("ss", "\r", "\n");
560 case SEEN_CR | SEEN_CRLF:
561 return Py_BuildValue("ss", "\r", "\r\n");
562 case SEEN_LF | SEEN_CRLF:
563 return Py_BuildValue("ss", "\n", "\r\n");
564 case SEEN_CR | SEEN_LF | SEEN_CRLF:
565 return Py_BuildValue("sss", "\r", "\n", "\r\n");
566 default:
567 Py_RETURN_NONE;
568 }
569
570}
571
572
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000573static PyMethodDef incrementalnewlinedecoder_methods[] = {
574 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
575 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
576 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
577 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000578 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579};
580
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000581static PyGetSetDef incrementalnewlinedecoder_getset[] = {
582 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000583 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584};
585
586PyTypeObject PyIncrementalNewlineDecoder_Type = {
587 PyVarObject_HEAD_INIT(NULL, 0)
588 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000589 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000591 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 0, /*tp_print*/
593 0, /*tp_getattr*/
594 0, /*tp_setattr*/
595 0, /*tp_compare */
596 0, /*tp_repr*/
597 0, /*tp_as_number*/
598 0, /*tp_as_sequence*/
599 0, /*tp_as_mapping*/
600 0, /*tp_hash */
601 0, /*tp_call*/
602 0, /*tp_str*/
603 0, /*tp_getattro*/
604 0, /*tp_setattro*/
605 0, /*tp_as_buffer*/
606 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000607 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 0, /* tp_traverse */
609 0, /* tp_clear */
610 0, /* tp_richcompare */
611 0, /*tp_weaklistoffset*/
612 0, /* tp_iter */
613 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000614 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_base */
618 0, /* tp_dict */
619 0, /* tp_descr_get */
620 0, /* tp_descr_set */
621 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000622 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 0, /* tp_alloc */
624 PyType_GenericNew, /* tp_new */
625};
626
627
628/* TextIOWrapper */
629
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000630PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631 "Character and line based layer over a BufferedIOBase object, buffer.\n"
632 "\n"
633 "encoding gives the name of the encoding that the stream will be\n"
634 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
635 "\n"
636 "errors determines the strictness of encoding and decoding (see the\n"
637 "codecs.register) and defaults to \"strict\".\n"
638 "\n"
639 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
640 "handling of line endings. If it is None, universal newlines is\n"
641 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
642 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
643 "caller. Conversely, on output, '\\n' is translated to the system\n"
644 "default line seperator, os.linesep. If newline is any other of its\n"
645 "legal values, that newline becomes the newline when the file is read\n"
646 "and it is returned untranslated. On output, '\\n' is converted to the\n"
647 "newline.\n"
648 "\n"
649 "If line_buffering is True, a call to flush is implied when a call to\n"
650 "write contains a newline character."
651 );
652
653typedef PyObject *
654 (*encodefunc_t)(PyObject *, PyObject *);
655
656typedef struct
657{
658 PyObject_HEAD
659 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000660 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 Py_ssize_t chunk_size;
662 PyObject *buffer;
663 PyObject *encoding;
664 PyObject *encoder;
665 PyObject *decoder;
666 PyObject *readnl;
667 PyObject *errors;
668 const char *writenl; /* utf-8 encoded, NULL stands for \n */
669 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200670 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 char readuniversal;
672 char readtranslate;
673 char writetranslate;
674 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200675 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000677 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 /* Specialized encoding func (see below) */
679 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000680 /* Whether or not it's the start of the stream */
681 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 /* Reads and writes are internally buffered in order to speed things up.
684 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000685
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000686 Please also note that text to be written is first encoded before being
687 buffered. This is necessary so that encoding errors are immediately
688 reported to the caller, but it unfortunately means that the
689 IncrementalEncoder (whose encode() method is always written in Python)
690 becomes a bottleneck for small writes.
691 */
692 PyObject *decoded_chars; /* buffer for text returned from decoder */
693 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
694 PyObject *pending_bytes; /* list of bytes objects waiting to be
695 written, or NULL */
696 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000697
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 /* snapshot is either None, or a tuple (dec_flags, next_input) where
699 * dec_flags is the second (integer) item of the decoder state and
700 * next_input is the chunk of input bytes that comes next after the
701 * snapshot point. We use this to reconstruct decoder states in tell().
702 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703 PyObject *snapshot;
704 /* Bytes-to-characters ratio for the current chunk. Serves as input for
705 the heuristic in tell(). */
706 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
708 /* Cache raw object if it's a FileIO object */
709 PyObject *raw;
710
711 PyObject *weakreflist;
712 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714
715
716/* A couple of specialized cases in order to bypass the slow incremental
717 encoding methods for the most popular encodings. */
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
728 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
729 PyUnicode_GET_SIZE(text),
730 PyBytes_AS_STRING(self->errors), 1);
731}
732
733static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000734utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735{
736 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
737 PyUnicode_GET_SIZE(text),
738 PyBytes_AS_STRING(self->errors), -1);
739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743{
Antoine Pitroue4501852009-05-14 18:55:55 +0000744 if (!self->encoding_start_of_stream) {
745 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000749 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 }
752 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
753 PyUnicode_GET_SIZE(text),
754 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755}
756
Antoine Pitroue4501852009-05-14 18:55:55 +0000757static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000758utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000759{
760 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
761 PyUnicode_GET_SIZE(text),
762 PyBytes_AS_STRING(self->errors), 1);
763}
764
765static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000766utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000767{
768 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
769 PyUnicode_GET_SIZE(text),
770 PyBytes_AS_STRING(self->errors), -1);
771}
772
773static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000774utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000775{
776 if (!self->encoding_start_of_stream) {
777 /* Skip the BOM and use native byte ordering */
778#if defined(WORDS_BIGENDIAN)
779 return utf32be_encode(self, text);
780#else
781 return utf32le_encode(self, text);
782#endif
783 }
784 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
785 PyUnicode_GET_SIZE(text),
786 PyBytes_AS_STRING(self->errors), 0);
787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799}
800
801/* Map normalized encoding names onto the specialized encoding funcs */
802
803typedef struct {
804 const char *name;
805 encodefunc_t encodefunc;
806} encodefuncentry;
807
Antoine Pitrou24f36292009-03-28 22:16:42 +0000808static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {"ascii", (encodefunc_t) ascii_encode},
810 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {"utf-16-be", (encodefunc_t) utf16be_encode},
813 {"utf-16-le", (encodefunc_t) utf16le_encode},
814 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-32-be", (encodefunc_t) utf32be_encode},
816 {"utf-32-le", (encodefunc_t) utf32le_encode},
817 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 {NULL, NULL}
819};
820
821
822static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 NULL};
828 PyObject *buffer, *raw;
829 char *encoding = NULL;
830 char *errors = NULL;
831 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 _PyIO_State *state = IO_STATE;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000839 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000867 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868
869 if (encoding == NULL) {
870 /* Try os.device_encoding(fileno) */
871 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 /* Ignore only AttributeError and UnsupportedOperation */
874 if (fileno == NULL) {
875 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 PyErr_ExceptionMatches(state->unsupported_operation)) {
877 PyErr_Clear();
878 }
879 else {
880 goto error;
881 }
882 }
883 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200884 self->encoding = _PyObject_CallMethodId(state->os_module,
885 &PyId_device_encoding,
886 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000887 if (self->encoding == NULL)
888 goto error;
889 else if (!PyUnicode_Check(self->encoding))
890 Py_CLEAR(self->encoding);
891 }
892 }
893 if (encoding == NULL && self->encoding == NULL) {
894 if (state->locale_module == NULL) {
895 state->locale_module = PyImport_ImportModule("locale");
896 if (state->locale_module == NULL)
897 goto catch_ImportError;
898 else
899 goto use_locale;
900 }
901 else {
902 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200903 self->encoding = _PyObject_CallMethodId(
904 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000905 if (self->encoding == NULL) {
906 catch_ImportError:
907 /*
908 Importing locale can raise a ImportError because of
909 _functools, and locale.getpreferredencoding can raise a
910 ImportError if _locale is not available. These will happen
911 during module building.
912 */
913 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
914 PyErr_Clear();
915 self->encoding = PyUnicode_FromString("ascii");
916 }
917 else
918 goto error;
919 }
920 else if (!PyUnicode_Check(self->encoding))
921 Py_CLEAR(self->encoding);
922 }
923 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000924 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000925 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000926 if (encoding == NULL)
927 goto error;
928 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 else if (encoding != NULL) {
930 self->encoding = PyUnicode_FromString(encoding);
931 if (self->encoding == NULL)
932 goto error;
933 }
934 else {
935 PyErr_SetString(PyExc_IOError,
936 "could not determine default encoding");
937 }
938
939 if (errors == NULL)
940 errors = "strict";
941 self->errors = PyBytes_FromString(errors);
942 if (self->errors == NULL)
943 goto error;
944
945 self->chunk_size = 8192;
946 self->readuniversal = (newline == NULL || newline[0] == '\0');
947 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200948 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 self->readtranslate = (newline == NULL);
950 if (newline) {
951 self->readnl = PyUnicode_FromString(newline);
952 if (self->readnl == NULL)
953 return -1;
954 }
955 self->writetranslate = (newline == NULL || newline[0] != '\0');
956 if (!self->readuniversal && self->readnl) {
957 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000958 if (self->writenl == NULL)
959 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000960 if (!strcmp(self->writenl, "\n"))
961 self->writenl = NULL;
962 }
963#ifdef MS_WINDOWS
964 else
965 self->writenl = "\r\n";
966#endif
967
968 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200969 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000970 if (res == NULL)
971 goto error;
972 r = PyObject_IsTrue(res);
973 Py_DECREF(res);
974 if (r == -1)
975 goto error;
976 if (r == 1) {
977 self->decoder = PyCodec_IncrementalDecoder(
978 encoding, errors);
979 if (self->decoder == NULL)
980 goto error;
981
982 if (self->readuniversal) {
983 PyObject *incrementalDecoder = PyObject_CallFunction(
984 (PyObject *)&PyIncrementalNewlineDecoder_Type,
985 "Oi", self->decoder, (int)self->readtranslate);
986 if (incrementalDecoder == NULL)
987 goto error;
988 Py_CLEAR(self->decoder);
989 self->decoder = incrementalDecoder;
990 }
991 }
992
993 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200994 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000995 if (res == NULL)
996 goto error;
997 r = PyObject_IsTrue(res);
998 Py_DECREF(res);
999 if (r == -1)
1000 goto error;
1001 if (r == 1) {
1002 PyObject *ci;
1003 self->encoder = PyCodec_IncrementalEncoder(
1004 encoding, errors);
1005 if (self->encoder == NULL)
1006 goto error;
1007 /* Get the normalized named of the codec */
1008 ci = _PyCodec_Lookup(encoding);
1009 if (ci == NULL)
1010 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001011 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001012 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001013 if (res == NULL) {
1014 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1015 PyErr_Clear();
1016 else
1017 goto error;
1018 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001019 else if (PyUnicode_Check(res)) {
1020 encodefuncentry *e = encodefuncs;
1021 while (e->name != NULL) {
1022 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1023 self->encodefunc = e->encodefunc;
1024 break;
1025 }
1026 e++;
1027 }
1028 }
1029 Py_XDECREF(res);
1030 }
1031
1032 self->buffer = buffer;
1033 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1036 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1037 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001038 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001040 if (raw == NULL) {
1041 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1042 PyErr_Clear();
1043 else
1044 goto error;
1045 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001046 else if (Py_TYPE(raw) == &PyFileIO_Type)
1047 self->raw = raw;
1048 else
1049 Py_DECREF(raw);
1050 }
1051
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001052 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 if (res == NULL)
1054 goto error;
1055 self->seekable = self->telling = PyObject_IsTrue(res);
1056 Py_DECREF(res);
1057
Martin v. Löwis767046a2011-10-14 15:35:36 +02001058 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001059
Antoine Pitroue4501852009-05-14 18:55:55 +00001060 self->encoding_start_of_stream = 0;
1061 if (self->seekable && self->encoder) {
1062 PyObject *cookieObj;
1063 int cmp;
1064
1065 self->encoding_start_of_stream = 1;
1066
1067 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1068 if (cookieObj == NULL)
1069 goto error;
1070
1071 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1072 Py_DECREF(cookieObj);
1073 if (cmp < 0) {
1074 goto error;
1075 }
1076
1077 if (cmp == 0) {
1078 self->encoding_start_of_stream = 0;
1079 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1080 _PyIO_zero, NULL);
1081 if (res == NULL)
1082 goto error;
1083 Py_DECREF(res);
1084 }
1085 }
1086
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001087 self->ok = 1;
1088 return 0;
1089
1090 error:
1091 return -1;
1092}
1093
1094static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001095_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001096{
1097 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1098 return -1;
1099 self->ok = 0;
1100 Py_CLEAR(self->buffer);
1101 Py_CLEAR(self->encoding);
1102 Py_CLEAR(self->encoder);
1103 Py_CLEAR(self->decoder);
1104 Py_CLEAR(self->readnl);
1105 Py_CLEAR(self->decoded_chars);
1106 Py_CLEAR(self->pending_bytes);
1107 Py_CLEAR(self->snapshot);
1108 Py_CLEAR(self->errors);
1109 Py_CLEAR(self->raw);
1110 return 0;
1111}
1112
1113static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001114textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115{
Antoine Pitroue033e062010-10-29 10:38:18 +00001116 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001117 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001118 return;
1119 _PyObject_GC_UNTRACK(self);
1120 if (self->weakreflist != NULL)
1121 PyObject_ClearWeakRefs((PyObject *)self);
1122 Py_CLEAR(self->dict);
1123 Py_TYPE(self)->tp_free((PyObject *)self);
1124}
1125
1126static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001127textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001128{
1129 Py_VISIT(self->buffer);
1130 Py_VISIT(self->encoding);
1131 Py_VISIT(self->encoder);
1132 Py_VISIT(self->decoder);
1133 Py_VISIT(self->readnl);
1134 Py_VISIT(self->decoded_chars);
1135 Py_VISIT(self->pending_bytes);
1136 Py_VISIT(self->snapshot);
1137 Py_VISIT(self->errors);
1138 Py_VISIT(self->raw);
1139
1140 Py_VISIT(self->dict);
1141 return 0;
1142}
1143
1144static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001147 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001148 return -1;
1149 Py_CLEAR(self->dict);
1150 return 0;
1151}
1152
1153static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155
1156/* This macro takes some shortcuts to make the common case faster. */
1157#define CHECK_CLOSED(self) \
1158 do { \
1159 int r; \
1160 PyObject *_res; \
1161 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1162 if (self->raw != NULL) \
1163 r = _PyFileIO_closed(self->raw); \
1164 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001165 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 if (_res == NULL) \
1167 return NULL; \
1168 r = PyObject_IsTrue(_res); \
1169 Py_DECREF(_res); \
1170 if (r < 0) \
1171 return NULL; \
1172 } \
1173 if (r > 0) { \
1174 PyErr_SetString(PyExc_ValueError, \
1175 "I/O operation on closed file."); \
1176 return NULL; \
1177 } \
1178 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001179 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001180 return NULL; \
1181 } while (0)
1182
1183#define CHECK_INITIALIZED(self) \
1184 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001185 if (self->detached) { \
1186 PyErr_SetString(PyExc_ValueError, \
1187 "underlying buffer has been detached"); \
1188 } else { \
1189 PyErr_SetString(PyExc_ValueError, \
1190 "I/O operation on uninitialized object"); \
1191 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001192 return NULL; \
1193 }
1194
1195#define CHECK_INITIALIZED_INT(self) \
1196 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001197 if (self->detached) { \
1198 PyErr_SetString(PyExc_ValueError, \
1199 "underlying buffer has been detached"); \
1200 } else { \
1201 PyErr_SetString(PyExc_ValueError, \
1202 "I/O operation on uninitialized object"); \
1203 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001204 return -1; \
1205 }
1206
1207
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001208static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001209textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001210{
1211 PyObject *buffer, *res;
1212 CHECK_INITIALIZED(self);
1213 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1214 if (res == NULL)
1215 return NULL;
1216 Py_DECREF(res);
1217 buffer = self->buffer;
1218 self->buffer = NULL;
1219 self->detached = 1;
1220 self->ok = 0;
1221 return buffer;
1222}
1223
Antoine Pitrou24f36292009-03-28 22:16:42 +00001224/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225 underlying buffered object, though. */
1226static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001227_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001228{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001229 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001230
1231 if (self->pending_bytes == NULL)
1232 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001233
1234 pending = self->pending_bytes;
1235 Py_INCREF(pending);
1236 self->pending_bytes_count = 0;
1237 Py_CLEAR(self->pending_bytes);
1238
1239 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1240 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001241 if (b == NULL)
1242 return -1;
1243 ret = PyObject_CallMethodObjArgs(self->buffer,
1244 _PyIO_str_write, b, NULL);
1245 Py_DECREF(b);
1246 if (ret == NULL)
1247 return -1;
1248 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001249 return 0;
1250}
1251
1252static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001253textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001254{
1255 PyObject *ret;
1256 PyObject *text; /* owned reference */
1257 PyObject *b;
1258 Py_ssize_t textlen;
1259 int haslf = 0;
1260 int needflush = 0;
1261
1262 CHECK_INITIALIZED(self);
1263
1264 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1265 return NULL;
1266 }
1267
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001268 if (PyUnicode_READY(text) == -1)
1269 return NULL;
1270
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001271 CHECK_CLOSED(self);
1272
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001273 if (self->encoder == NULL)
1274 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001275
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 Py_INCREF(text);
1277
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279
1280 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001281 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001282 haslf = 1;
1283
1284 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001285 PyObject *newtext = _PyObject_CallMethodId(
1286 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001287 Py_DECREF(text);
1288 if (newtext == NULL)
1289 return NULL;
1290 text = newtext;
1291 }
1292
Antoine Pitroue96ec682011-07-23 21:46:35 +02001293 if (self->write_through)
1294 needflush = 1;
1295 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001297 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001298 needflush = 1;
1299
1300 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001301 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001303 self->encoding_start_of_stream = 0;
1304 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 else
1306 b = PyObject_CallMethodObjArgs(self->encoder,
1307 _PyIO_str_encode, text, NULL);
1308 Py_DECREF(text);
1309 if (b == NULL)
1310 return NULL;
1311
1312 if (self->pending_bytes == NULL) {
1313 self->pending_bytes = PyList_New(0);
1314 if (self->pending_bytes == NULL) {
1315 Py_DECREF(b);
1316 return NULL;
1317 }
1318 self->pending_bytes_count = 0;
1319 }
1320 if (PyList_Append(self->pending_bytes, b) < 0) {
1321 Py_DECREF(b);
1322 return NULL;
1323 }
1324 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1325 Py_DECREF(b);
1326 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001327 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 return NULL;
1329 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001330
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001331 if (needflush) {
1332 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1333 if (ret == NULL)
1334 return NULL;
1335 Py_DECREF(ret);
1336 }
1337
1338 Py_CLEAR(self->snapshot);
1339
1340 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001341 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001342 if (ret == NULL)
1343 return NULL;
1344 Py_DECREF(ret);
1345 }
1346
1347 return PyLong_FromSsize_t(textlen);
1348}
1349
1350/* Steal a reference to chars and store it in the decoded_char buffer;
1351 */
1352static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001353textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001354{
1355 Py_CLEAR(self->decoded_chars);
1356 self->decoded_chars = chars;
1357 self->decoded_chars_used = 0;
1358}
1359
1360static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001361textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362{
1363 PyObject *chars;
1364 Py_ssize_t avail;
1365
1366 if (self->decoded_chars == NULL)
1367 return PyUnicode_FromStringAndSize(NULL, 0);
1368
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001369 /* decoded_chars is guaranteed to be "ready". */
1370 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001371 - self->decoded_chars_used);
1372
1373 assert(avail >= 0);
1374
1375 if (n < 0 || n > avail)
1376 n = avail;
1377
1378 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001379 chars = PyUnicode_Substring(self->decoded_chars,
1380 self->decoded_chars_used,
1381 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382 if (chars == NULL)
1383 return NULL;
1384 }
1385 else {
1386 chars = self->decoded_chars;
1387 Py_INCREF(chars);
1388 }
1389
1390 self->decoded_chars_used += n;
1391 return chars;
1392}
1393
1394/* Read and decode the next chunk of data from the BufferedReader.
1395 */
1396static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001397textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398{
1399 PyObject *dec_buffer = NULL;
1400 PyObject *dec_flags = NULL;
1401 PyObject *input_chunk = NULL;
1402 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001403 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001404 int eof;
1405
1406 /* The return value is True unless EOF was reached. The decoded string is
1407 * placed in self._decoded_chars (replacing its previous value). The
1408 * entire input chunk is sent to the decoder, though some of it may remain
1409 * buffered in the decoder, yet to be converted.
1410 */
1411
1412 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001413 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414 return -1;
1415 }
1416
1417 if (self->telling) {
1418 /* To prepare for tell(), we need to snapshot a point in the file
1419 * where the decoder's input buffer is empty.
1420 */
1421
1422 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1423 _PyIO_str_getstate, NULL);
1424 if (state == NULL)
1425 return -1;
1426 /* Given this, we know there was a valid snapshot point
1427 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1428 */
1429 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1430 Py_DECREF(state);
1431 return -1;
1432 }
1433 Py_INCREF(dec_buffer);
1434 Py_INCREF(dec_flags);
1435 Py_DECREF(state);
1436 }
1437
1438 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1439 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1440 if (chunk_size == NULL)
1441 goto fail;
1442 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001443 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1444 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 Py_DECREF(chunk_size);
1446 if (input_chunk == NULL)
1447 goto fail;
1448 assert(PyBytes_Check(input_chunk));
1449
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001450 nbytes = PyBytes_Size(input_chunk);
1451 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452
1453 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1454 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1455 self->decoder, input_chunk, eof);
1456 }
1457 else {
1458 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1459 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1460 }
1461
1462 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1463 if (decoded_chars == NULL)
1464 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001465 if (PyUnicode_READY(decoded_chars) == -1)
1466 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001467 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001469 if (nchars > 0)
1470 self->b2cratio = (double) nbytes / nchars;
1471 else
1472 self->b2cratio = 0.0;
1473 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474 eof = 0;
1475
1476 if (self->telling) {
1477 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1478 * next input to be decoded is dec_buffer + input_chunk.
1479 */
1480 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1481 if (next_input == NULL)
1482 goto fail;
1483 assert (PyBytes_Check(next_input));
1484 Py_DECREF(dec_buffer);
1485 Py_CLEAR(self->snapshot);
1486 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1487 }
1488 Py_DECREF(input_chunk);
1489
1490 return (eof == 0);
1491
1492 fail:
1493 Py_XDECREF(dec_buffer);
1494 Py_XDECREF(dec_flags);
1495 Py_XDECREF(input_chunk);
1496 return -1;
1497}
1498
1499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001500textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501{
1502 Py_ssize_t n = -1;
1503 PyObject *result = NULL, *chunks = NULL;
1504
1505 CHECK_INITIALIZED(self);
1506
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001507 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 return NULL;
1509
1510 CHECK_CLOSED(self);
1511
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001512 if (self->decoder == NULL)
1513 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001514
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001515 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 return NULL;
1517
1518 if (n < 0) {
1519 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001520 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 PyObject *decoded;
1522 if (bytes == NULL)
1523 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001524
1525 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1526 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1527 bytes, 1);
1528 else
1529 decoded = PyObject_CallMethodObjArgs(
1530 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 Py_DECREF(bytes);
1532 if (decoded == NULL)
1533 goto fail;
1534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536
1537 if (result == NULL) {
1538 Py_DECREF(decoded);
1539 return NULL;
1540 }
1541
1542 PyUnicode_AppendAndDel(&result, decoded);
1543 if (result == NULL)
1544 goto fail;
1545
1546 Py_CLEAR(self->snapshot);
1547 return result;
1548 }
1549 else {
1550 int res = 1;
1551 Py_ssize_t remaining = n;
1552
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 if (result == NULL)
1555 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556 if (PyUnicode_READY(result) == -1)
1557 goto fail;
1558 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559
1560 /* Keep reading chunks until we have n characters to return */
1561 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001562 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001563 if (res < 0)
1564 goto fail;
1565 if (res == 0) /* EOF */
1566 break;
1567 if (chunks == NULL) {
1568 chunks = PyList_New(0);
1569 if (chunks == NULL)
1570 goto fail;
1571 }
1572 if (PyList_Append(chunks, result) < 0)
1573 goto fail;
1574 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001575 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 if (result == NULL)
1577 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001578 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579 }
1580 if (chunks != NULL) {
1581 if (result != NULL && PyList_Append(chunks, result) < 0)
1582 goto fail;
1583 Py_CLEAR(result);
1584 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1585 if (result == NULL)
1586 goto fail;
1587 Py_CLEAR(chunks);
1588 }
1589 return result;
1590 }
1591 fail:
1592 Py_XDECREF(result);
1593 Py_XDECREF(chunks);
1594 return NULL;
1595}
1596
1597
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001598/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 that is to the NUL character. Otherwise the function will produce
1600 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001601static char *
1602find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001604 if (kind == PyUnicode_1BYTE_KIND) {
1605 assert(ch < 256);
1606 return (char *) memchr((void *) s, (char) ch, end - s);
1607 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001608 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001609 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001610 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001611 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001612 return s;
1613 if (s == end)
1614 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001615 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 }
1617}
1618
1619Py_ssize_t
1620_PyIO_find_line_ending(
1621 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001622 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001624 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625
1626 if (translated) {
1627 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001630 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001631 else {
1632 *consumed = len;
1633 return -1;
1634 }
1635 }
1636 else if (universal) {
1637 /* Universal newline search. Find any of \r, \r\n, \n
1638 * The decoder ensures that \r\n are not split in two pieces
1639 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001640 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001644 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001646 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 if (s >= end) {
1648 *consumed = len;
1649 return -1;
1650 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001651 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001652 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001654 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001655 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001656 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001657 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001659 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 }
1661 }
1662 }
1663 else {
1664 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001665 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1666 char *nl = PyUnicode_DATA(readnl);
1667 /* Assume that readnl is an ASCII character. */
1668 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001670 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001672 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673 *consumed = len;
1674 return -1;
1675 }
1676 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001677 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001678 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001679 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 if (e < s)
1681 e = s;
1682 while (s < e) {
1683 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001684 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685 if (pos == NULL || pos >= e)
1686 break;
1687 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001688 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001689 break;
1690 }
1691 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 return (pos - start)/kind + readnl_len;
1693 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 if (pos == NULL)
1697 *consumed = len;
1698 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001699 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 return -1;
1701 }
1702 }
1703}
1704
1705static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001706_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707{
1708 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1709 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1710 int res;
1711
1712 CHECK_CLOSED(self);
1713
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001714 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 return NULL;
1716
1717 chunked = 0;
1718
1719 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001722 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001723 Py_ssize_t consumed = 0;
1724
1725 /* First, get some data if necessary */
1726 res = 1;
1727 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001728 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001729 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (res < 0)
1731 goto error;
1732 if (res == 0)
1733 break;
1734 }
1735 if (res == 0) {
1736 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001737 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 Py_CLEAR(self->snapshot);
1739 start = endpos = offset_to_buffer = 0;
1740 break;
1741 }
1742
1743 if (remaining == NULL) {
1744 line = self->decoded_chars;
1745 start = self->decoded_chars_used;
1746 offset_to_buffer = 0;
1747 Py_INCREF(line);
1748 }
1749 else {
1750 assert(self->decoded_chars_used == 0);
1751 line = PyUnicode_Concat(remaining, self->decoded_chars);
1752 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001753 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001754 Py_CLEAR(remaining);
1755 if (line == NULL)
1756 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001757 if (PyUnicode_READY(line) == -1)
1758 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001759 }
1760
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 ptr = PyUnicode_DATA(line);
1762 line_len = PyUnicode_GET_LENGTH(line);
1763 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764
1765 endpos = _PyIO_find_line_ending(
1766 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001767 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001768 ptr + kind * start,
1769 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001771 if (endpos >= 0) {
1772 endpos += start;
1773 if (limit >= 0 && (endpos - start) + chunked >= limit)
1774 endpos = start + limit - chunked;
1775 break;
1776 }
1777
1778 /* We can put aside up to `endpos` */
1779 endpos = consumed + start;
1780 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1781 /* Didn't find line ending, but reached length limit */
1782 endpos = start + limit - chunked;
1783 break;
1784 }
1785
1786 if (endpos > start) {
1787 /* No line ending seen yet - put aside current data */
1788 PyObject *s;
1789 if (chunks == NULL) {
1790 chunks = PyList_New(0);
1791 if (chunks == NULL)
1792 goto error;
1793 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001794 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001795 if (s == NULL)
1796 goto error;
1797 if (PyList_Append(chunks, s) < 0) {
1798 Py_DECREF(s);
1799 goto error;
1800 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001801 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 Py_DECREF(s);
1803 }
1804 /* There may be some remaining bytes we'll have to prepend to the
1805 next chunk of data */
1806 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001807 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 if (remaining == NULL)
1809 goto error;
1810 }
1811 Py_CLEAR(line);
1812 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001813 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001814 }
1815
1816 if (line != NULL) {
1817 /* Our line ends in the current buffer */
1818 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001819 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1820 PyObject *s = PyUnicode_Substring(line, start, endpos);
1821 Py_CLEAR(line);
1822 if (s == NULL)
1823 goto error;
1824 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 }
1826 }
1827 if (remaining != NULL) {
1828 if (chunks == NULL) {
1829 chunks = PyList_New(0);
1830 if (chunks == NULL)
1831 goto error;
1832 }
1833 if (PyList_Append(chunks, remaining) < 0)
1834 goto error;
1835 Py_CLEAR(remaining);
1836 }
1837 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001838 if (line != NULL) {
1839 if (PyList_Append(chunks, line) < 0)
1840 goto error;
1841 Py_DECREF(line);
1842 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001843 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1844 if (line == NULL)
1845 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001847 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001848 if (line == NULL) {
1849 Py_INCREF(_PyIO_empty_str);
1850 line = _PyIO_empty_str;
1851 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001852
1853 return line;
1854
1855 error:
1856 Py_XDECREF(chunks);
1857 Py_XDECREF(remaining);
1858 Py_XDECREF(line);
1859 return NULL;
1860}
1861
1862static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001863textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001864{
1865 Py_ssize_t limit = -1;
1866
1867 CHECK_INITIALIZED(self);
1868 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1869 return NULL;
1870 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001871 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872}
1873
1874/* Seek and Tell */
1875
1876typedef struct {
1877 Py_off_t start_pos;
1878 int dec_flags;
1879 int bytes_to_feed;
1880 int chars_to_skip;
1881 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001882} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001883
1884/*
1885 To speed up cookie packing/unpacking, we store the fields in a temporary
1886 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1887 The following macros define at which offsets in the intermediary byte
1888 string the various CookieStruct fields will be stored.
1889 */
1890
1891#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1892
1893#if defined(WORDS_BIGENDIAN)
1894
1895# define IS_LITTLE_ENDIAN 0
1896
1897/* We want the least significant byte of start_pos to also be the least
1898 significant byte of the cookie, which means that in big-endian mode we
1899 must copy the fields in reverse order. */
1900
1901# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1902# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1903# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1904# define OFF_CHARS_TO_SKIP (sizeof(char))
1905# define OFF_NEED_EOF 0
1906
1907#else
1908
1909# define IS_LITTLE_ENDIAN 1
1910
1911/* Little-endian mode: the least significant byte of start_pos will
1912 naturally end up the least significant byte of the cookie. */
1913
1914# define OFF_START_POS 0
1915# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1916# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1917# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1918# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1919
1920#endif
1921
1922static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001923textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001924{
1925 unsigned char buffer[COOKIE_BUF_LEN];
1926 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1927 if (cookieLong == NULL)
1928 return -1;
1929
1930 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1931 IS_LITTLE_ENDIAN, 0) < 0) {
1932 Py_DECREF(cookieLong);
1933 return -1;
1934 }
1935 Py_DECREF(cookieLong);
1936
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001937 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1938 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1939 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1940 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1941 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942
1943 return 0;
1944}
1945
1946static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001947textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948{
1949 unsigned char buffer[COOKIE_BUF_LEN];
1950
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001951 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1952 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1953 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1954 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1955 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956
1957 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1958}
1959#undef IS_LITTLE_ENDIAN
1960
1961static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001962_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001963{
1964 PyObject *res;
1965 /* When seeking to the start of the stream, we call decoder.reset()
1966 rather than decoder.getstate().
1967 This is for a few decoders such as utf-16 for which the state value
1968 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1969 utf-16, that we are expecting a BOM).
1970 */
1971 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1972 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1973 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001974 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1975 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976 if (res == NULL)
1977 return -1;
1978 Py_DECREF(res);
1979 return 0;
1980}
1981
Antoine Pitroue4501852009-05-14 18:55:55 +00001982static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001983_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001984{
1985 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001986 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001987 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1988 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1989 self->encoding_start_of_stream = 1;
1990 }
1991 else {
1992 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1993 _PyIO_zero, NULL);
1994 self->encoding_start_of_stream = 0;
1995 }
1996 if (res == NULL)
1997 return -1;
1998 Py_DECREF(res);
1999 return 0;
2000}
2001
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002003textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004{
2005 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002006 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 PyObject *res;
2009 int cmp;
2010
2011 CHECK_INITIALIZED(self);
2012
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2014 return NULL;
2015 CHECK_CLOSED(self);
2016
2017 Py_INCREF(cookieObj);
2018
2019 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002020 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 goto fail;
2022 }
2023
2024 if (whence == 1) {
2025 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002026 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027 if (cmp < 0)
2028 goto fail;
2029
2030 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002031 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 goto fail;
2033 }
2034
2035 /* Seeking to the current position should attempt to
2036 * sync the underlying buffer with the current position.
2037 */
2038 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002039 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 if (cookieObj == NULL)
2041 goto fail;
2042 }
2043 else if (whence == 2) {
2044 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002045 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002046 if (cmp < 0)
2047 goto fail;
2048
2049 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002050 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051 goto fail;
2052 }
2053
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002054 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 if (res == NULL)
2056 goto fail;
2057 Py_DECREF(res);
2058
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002059 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060 Py_CLEAR(self->snapshot);
2061 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002062 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (res == NULL)
2064 goto fail;
2065 Py_DECREF(res);
2066 }
2067
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002068 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 Py_XDECREF(cookieObj);
2070 return res;
2071 }
2072 else if (whence != 0) {
2073 PyErr_Format(PyExc_ValueError,
2074 "invalid whence (%d, should be 0, 1 or 2)", whence);
2075 goto fail;
2076 }
2077
Antoine Pitroue4501852009-05-14 18:55:55 +00002078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 if (cmp < 0)
2080 goto fail;
2081
2082 if (cmp == 1) {
2083 PyErr_Format(PyExc_ValueError,
2084 "negative seek position %R", cookieObj);
2085 goto fail;
2086 }
2087
2088 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2089 if (res == NULL)
2090 goto fail;
2091 Py_DECREF(res);
2092
2093 /* The strategy of seek() is to go back to the safe start point
2094 * and replay the effect of read(chars_to_skip) from there.
2095 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002096 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002097 goto fail;
2098
2099 /* Seek back to the safe start point. */
2100 posobj = PyLong_FromOff_t(cookie.start_pos);
2101 if (posobj == NULL)
2102 goto fail;
2103 res = PyObject_CallMethodObjArgs(self->buffer,
2104 _PyIO_str_seek, posobj, NULL);
2105 Py_DECREF(posobj);
2106 if (res == NULL)
2107 goto fail;
2108 Py_DECREF(res);
2109
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002110 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002111 Py_CLEAR(self->snapshot);
2112
2113 /* Restore the decoder to its state from the safe start point. */
2114 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002115 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002116 goto fail;
2117 }
2118
2119 if (cookie.chars_to_skip) {
2120 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002121 PyObject *input_chunk = _PyObject_CallMethodId(
2122 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 PyObject *decoded;
2124
2125 if (input_chunk == NULL)
2126 goto fail;
2127
2128 assert (PyBytes_Check(input_chunk));
2129
2130 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2131 if (self->snapshot == NULL) {
2132 Py_DECREF(input_chunk);
2133 goto fail;
2134 }
2135
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002136 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2137 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138
2139 if (decoded == NULL)
2140 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002141 if (PyUnicode_READY(decoded) == -1) {
2142 Py_DECREF(decoded);
2143 goto fail;
2144 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002146 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002147
2148 /* Skip chars_to_skip of the decoded characters. */
2149 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2150 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2151 goto fail;
2152 }
2153 self->decoded_chars_used = cookie.chars_to_skip;
2154 }
2155 else {
2156 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2157 if (self->snapshot == NULL)
2158 goto fail;
2159 }
2160
Antoine Pitroue4501852009-05-14 18:55:55 +00002161 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2162 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002163 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002164 goto fail;
2165 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002166 return cookieObj;
2167 fail:
2168 Py_XDECREF(cookieObj);
2169 return NULL;
2170
2171}
2172
2173static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002174textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002175{
2176 PyObject *res;
2177 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002178 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002179 PyObject *next_input;
2180 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002181 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182 PyObject *saved_state = NULL;
2183 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002184 char *dec_buffer;
2185 Py_ssize_t dec_buffer_len;
2186 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187
2188 CHECK_INITIALIZED(self);
2189 CHECK_CLOSED(self);
2190
2191 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002192 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 goto fail;
2194 }
2195 if (!self->telling) {
2196 PyErr_SetString(PyExc_IOError,
2197 "telling position disabled by next() call");
2198 goto fail;
2199 }
2200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002201 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002203 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002204 if (res == NULL)
2205 goto fail;
2206 Py_DECREF(res);
2207
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002208 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 if (posobj == NULL)
2210 goto fail;
2211
2212 if (self->decoder == NULL || self->snapshot == NULL) {
2213 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2214 return posobj;
2215 }
2216
2217#if defined(HAVE_LARGEFILE_SUPPORT)
2218 cookie.start_pos = PyLong_AsLongLong(posobj);
2219#else
2220 cookie.start_pos = PyLong_AsLong(posobj);
2221#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002222 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002223 if (PyErr_Occurred())
2224 goto fail;
2225
2226 /* Skip backward to the snapshot point (see _read_chunk). */
2227 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2228 goto fail;
2229
2230 assert (PyBytes_Check(next_input));
2231
2232 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2233
2234 /* How many decoded characters have been used up since the snapshot? */
2235 if (self->decoded_chars_used == 0) {
2236 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002237 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002238 }
2239
2240 chars_to_skip = self->decoded_chars_used;
2241
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002242 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2244 _PyIO_str_getstate, NULL);
2245 if (saved_state == NULL)
2246 goto fail;
2247
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002248#define DECODER_GETSTATE() do { \
2249 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2250 _PyIO_str_getstate, NULL); \
2251 if (_state == NULL) \
2252 goto fail; \
2253 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2254 Py_DECREF(_state); \
2255 goto fail; \
2256 } \
2257 Py_DECREF(_state); \
2258 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002260 /* TODO: replace assert with exception */
2261#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002262 PyObject *_decoded = _PyObject_CallMethodId( \
2263 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002264 if (_decoded == NULL) \
2265 goto fail; \
2266 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002267 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002268 Py_DECREF(_decoded); \
2269 } while (0)
2270
2271 /* Fast search for an acceptable start point, close to our
2272 current pos */
2273 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2274 skip_back = 1;
2275 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2276 input = PyBytes_AS_STRING(next_input);
2277 while (skip_bytes > 0) {
2278 /* Decode up to temptative start point */
2279 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2280 goto fail;
2281 DECODER_DECODE(input, skip_bytes, chars_decoded);
2282 if (chars_decoded <= chars_to_skip) {
2283 DECODER_GETSTATE();
2284 if (dec_buffer_len == 0) {
2285 /* Before pos and no bytes buffered in decoder => OK */
2286 cookie.dec_flags = dec_flags;
2287 chars_to_skip -= chars_decoded;
2288 break;
2289 }
2290 /* Skip back by buffered amount and reset heuristic */
2291 skip_bytes -= dec_buffer_len;
2292 skip_back = 1;
2293 }
2294 else {
2295 /* We're too far ahead, skip back a bit */
2296 skip_bytes -= skip_back;
2297 skip_back *= 2;
2298 }
2299 }
2300 if (skip_bytes <= 0) {
2301 skip_bytes = 0;
2302 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2303 goto fail;
2304 }
2305
2306 /* Note our initial start point. */
2307 cookie.start_pos += skip_bytes;
2308 cookie.chars_to_skip = chars_to_skip;
2309 if (chars_to_skip == 0)
2310 goto finally;
2311
2312 /* We should be close to the desired position. Now feed the decoder one
2313 * byte at a time until we reach the `chars_to_skip` target.
2314 * As we go, note the nearest "safe start point" before the current
2315 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 * can safely start from there and advance to this location).
2317 */
2318 chars_decoded = 0;
2319 input = PyBytes_AS_STRING(next_input);
2320 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002321 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002323 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002324
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002325 DECODER_DECODE(input, 1, n);
2326 /* We got n chars for 1 byte */
2327 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002328 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002329 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002330
2331 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2332 /* Decoder buffer is empty, so this is a safe start point. */
2333 cookie.start_pos += cookie.bytes_to_feed;
2334 chars_to_skip -= chars_decoded;
2335 cookie.dec_flags = dec_flags;
2336 cookie.bytes_to_feed = 0;
2337 chars_decoded = 0;
2338 }
2339 if (chars_decoded >= chars_to_skip)
2340 break;
2341 input++;
2342 }
2343 if (input == input_end) {
2344 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002345 PyObject *decoded = _PyObject_CallMethodId(
2346 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347 if (decoded == NULL)
2348 goto fail;
2349 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002350 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 Py_DECREF(decoded);
2352 cookie.need_eof = 1;
2353
2354 if (chars_decoded < chars_to_skip) {
2355 PyErr_SetString(PyExc_IOError,
2356 "can't reconstruct logical file position");
2357 goto fail;
2358 }
2359 }
2360
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002361finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002362 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363 Py_DECREF(saved_state);
2364 if (res == NULL)
2365 return NULL;
2366 Py_DECREF(res);
2367
2368 /* The returned cookie corresponds to the last safe start point. */
2369 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002370 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002372fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373 if (saved_state) {
2374 PyObject *type, *value, *traceback;
2375 PyErr_Fetch(&type, &value, &traceback);
2376
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002377 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378 Py_DECREF(saved_state);
2379 if (res == NULL)
2380 return NULL;
2381 Py_DECREF(res);
2382
2383 PyErr_Restore(type, value, traceback);
2384 }
2385 return NULL;
2386}
2387
2388static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002389textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390{
2391 PyObject *pos = Py_None;
2392 PyObject *res;
2393
2394 CHECK_INITIALIZED(self)
2395 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2396 return NULL;
2397 }
2398
2399 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2400 if (res == NULL)
2401 return NULL;
2402 Py_DECREF(res);
2403
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002404 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002405}
2406
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002407static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002408textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002409{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002410 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002411
2412 CHECK_INITIALIZED(self);
2413
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002414 res = PyUnicode_FromString("<_io.TextIOWrapper");
2415 if (res == NULL)
2416 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002417 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002418 if (nameobj == NULL) {
2419 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2420 PyErr_Clear();
2421 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002422 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002423 }
2424 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002425 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002426 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002427 if (s == NULL)
2428 goto error;
2429 PyUnicode_AppendAndDel(&res, s);
2430 if (res == NULL)
2431 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002432 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002433 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002434 if (modeobj == NULL) {
2435 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2436 PyErr_Clear();
2437 else
2438 goto error;
2439 }
2440 else {
2441 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2442 Py_DECREF(modeobj);
2443 if (s == NULL)
2444 goto error;
2445 PyUnicode_AppendAndDel(&res, s);
2446 if (res == NULL)
2447 return NULL;
2448 }
2449 s = PyUnicode_FromFormat("%U encoding=%R>",
2450 res, self->encoding);
2451 Py_DECREF(res);
2452 return s;
2453error:
2454 Py_XDECREF(res);
2455 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002456}
2457
2458
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002459/* Inquiries */
2460
2461static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002462textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002463{
2464 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002465 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002466}
2467
2468static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002469textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002470{
2471 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002472 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002473}
2474
2475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002476textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002477{
2478 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002479 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480}
2481
2482static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002483textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002484{
2485 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002486 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487}
2488
2489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002490textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002491{
2492 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002493 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494}
2495
2496static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002497textiowrapper_getstate(textio *self, PyObject *args)
2498{
2499 PyErr_Format(PyExc_TypeError,
2500 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2501 return NULL;
2502}
2503
2504static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002505textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506{
2507 CHECK_INITIALIZED(self);
2508 CHECK_CLOSED(self);
2509 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002510 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002512 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513}
2514
2515static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002516textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517{
2518 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002519 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521
Antoine Pitrou6be88762010-05-03 16:48:20 +00002522 res = textiowrapper_closed_get(self, NULL);
2523 if (res == NULL)
2524 return NULL;
2525 r = PyObject_IsTrue(res);
2526 Py_DECREF(res);
2527 if (r < 0)
2528 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002529
Antoine Pitrou6be88762010-05-03 16:48:20 +00002530 if (r > 0) {
2531 Py_RETURN_NONE; /* stream already closed */
2532 }
2533 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002534 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002535 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002536 if (res)
2537 Py_DECREF(res);
2538 else
2539 PyErr_Clear();
2540 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002541 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002542 if (res == NULL) {
2543 return NULL;
2544 }
2545 else
2546 Py_DECREF(res);
2547
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002548 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002549 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002550}
2551
2552static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002553textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554{
2555 PyObject *line;
2556
2557 CHECK_INITIALIZED(self);
2558
2559 self->telling = 0;
2560 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2561 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563 }
2564 else {
2565 line = PyObject_CallMethodObjArgs((PyObject *)self,
2566 _PyIO_str_readline, NULL);
2567 if (line && !PyUnicode_Check(line)) {
2568 PyErr_Format(PyExc_IOError,
2569 "readline() should have returned an str object, "
2570 "not '%.200s'", Py_TYPE(line)->tp_name);
2571 Py_DECREF(line);
2572 return NULL;
2573 }
2574 }
2575
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002576 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002577 return NULL;
2578
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002579 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580 /* Reached EOF or would have blocked */
2581 Py_DECREF(line);
2582 Py_CLEAR(self->snapshot);
2583 self->telling = self->seekable;
2584 return NULL;
2585 }
2586
2587 return line;
2588}
2589
2590static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002591textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592{
2593 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002594 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002595}
2596
2597static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002598textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002599{
2600 CHECK_INITIALIZED(self);
2601 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2602}
2603
2604static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002605textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606{
2607 PyObject *res;
2608 CHECK_INITIALIZED(self);
2609 if (self->decoder == NULL)
2610 Py_RETURN_NONE;
2611 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2612 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002613 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2614 PyErr_Clear();
2615 Py_RETURN_NONE;
2616 }
2617 else {
2618 return NULL;
2619 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002620 }
2621 return res;
2622}
2623
2624static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002625textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002626{
2627 CHECK_INITIALIZED(self);
2628 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2629}
2630
2631static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002632textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633{
2634 CHECK_INITIALIZED(self);
2635 return PyLong_FromSsize_t(self->chunk_size);
2636}
2637
2638static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002639textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640{
2641 Py_ssize_t n;
2642 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002643 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002644 if (n == -1 && PyErr_Occurred())
2645 return -1;
2646 if (n <= 0) {
2647 PyErr_SetString(PyExc_ValueError,
2648 "a strictly positive integer is required");
2649 return -1;
2650 }
2651 self->chunk_size = n;
2652 return 0;
2653}
2654
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002655static PyMethodDef textiowrapper_methods[] = {
2656 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2657 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2658 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2659 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2660 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2661 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002663 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2664 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2665 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2666 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2667 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002668 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002670 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2671 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2672 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002673 {NULL, NULL}
2674};
2675
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002676static PyMemberDef textiowrapper_members[] = {
2677 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2678 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2679 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680 {NULL}
2681};
2682
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002683static PyGetSetDef textiowrapper_getset[] = {
2684 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2685 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2687*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002688 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2689 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2690 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2691 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002692 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693};
2694
2695PyTypeObject PyTextIOWrapper_Type = {
2696 PyVarObject_HEAD_INIT(NULL, 0)
2697 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002700 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002701 0, /*tp_print*/
2702 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002703 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002704 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002705 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 0, /*tp_as_number*/
2707 0, /*tp_as_sequence*/
2708 0, /*tp_as_mapping*/
2709 0, /*tp_hash */
2710 0, /*tp_call*/
2711 0, /*tp_str*/
2712 0, /*tp_getattro*/
2713 0, /*tp_setattro*/
2714 0, /*tp_as_buffer*/
2715 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2716 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002717 textiowrapper_doc, /* tp_doc */
2718 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2719 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002723 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2724 textiowrapper_methods, /* tp_methods */
2725 textiowrapper_members, /* tp_members */
2726 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002727 0, /* tp_base */
2728 0, /* tp_dict */
2729 0, /* tp_descr_get */
2730 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002731 offsetof(textio, dict), /*tp_dictoffset*/
2732 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002733 0, /* tp_alloc */
2734 PyType_GenericNew, /* tp_new */
2735};