blob: a3566553e00b125818f8eae5a92159b9bf8f2c35 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
17_Py_IDENTIFIER(device_encoding);
18_Py_IDENTIFIER(fileno);
19_Py_IDENTIFIER(flush);
20_Py_IDENTIFIER(getpreferredencoding);
21_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020022_Py_IDENTIFIER(mode);
23_Py_IDENTIFIER(name);
24_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020026_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(readable);
28_Py_IDENTIFIER(replace);
29_Py_IDENTIFIER(reset);
30_Py_IDENTIFIER(seek);
31_Py_IDENTIFIER(seekable);
32_Py_IDENTIFIER(setstate);
33_Py_IDENTIFIER(tell);
34_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036/* TextIOBase */
37
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000038PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable. There is no public constructor.\n"
44 );
45
46static PyObject *
47_unsupported(const char *message)
48{
49 PyErr_SetString(IO_STATE->unsupported_operation, message);
50 return NULL;
51}
52
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000053PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000054 "Separate the underlying buffer from the TextIOBase and return it.\n"
55 "\n"
56 "After the underlying buffer has been detached, the TextIO is in an\n"
57 "unusable state.\n"
58 );
59
60static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062{
63 return _unsupported("detach");
64}
65
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000066PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 "Read at most n characters from stream.\n"
68 "\n"
69 "Read from underlying buffer until we have n characters or we hit EOF.\n"
70 "If n is negative or omitted, read until EOF.\n"
71 );
72
73static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075{
76 return _unsupported("read");
77}
78
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000079PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080 "Read until newline or EOF.\n"
81 "\n"
82 "Returns an empty string if EOF is hit immediately.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("readline");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Write string to stream.\n"
93 "Returns the number of characters written (which is always equal to\n"
94 "the length of the string).\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("write");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Encoding of the text stream.\n"
105 "\n"
106 "Subclasses should override.\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 Py_RETURN_NONE;
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Line endings translated so far.\n"
117 "\n"
118 "Only line endings translated during reading are considered.\n"
119 "\n"
120 "Subclasses should override.\n"
121 );
122
123static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125{
126 Py_RETURN_NONE;
127}
128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000129PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000130 "The error setting of the decoder or encoder.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000142static PyMethodDef textiobase_methods[] = {
143 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
144 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
145 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
146 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 {NULL, NULL}
148};
149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyGetSetDef textiobase_getset[] = {
151 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
152 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
153 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000154 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155};
156
157PyTypeObject PyTextIOBase_Type = {
158 PyVarObject_HEAD_INIT(NULL, 0)
159 "_io._TextIOBase", /*tp_name*/
160 0, /*tp_basicsize*/
161 0, /*tp_itemsize*/
162 0, /*tp_dealloc*/
163 0, /*tp_print*/
164 0, /*tp_getattr*/
165 0, /*tp_setattr*/
166 0, /*tp_compare */
167 0, /*tp_repr*/
168 0, /*tp_as_number*/
169 0, /*tp_as_sequence*/
170 0, /*tp_as_mapping*/
171 0, /*tp_hash */
172 0, /*tp_call*/
173 0, /*tp_str*/
174 0, /*tp_getattro*/
175 0, /*tp_setattro*/
176 0, /*tp_as_buffer*/
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
196};
197
198
199/* IncrementalNewlineDecoder */
200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 "Codec used when reading a file in universal newlines mode. It wraps\n"
203 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
204 "records the types of newlines encountered. When used with\n"
205 "translate=False, it ensures that the newline sequence is returned in\n"
206 "one piece. When used with decoder=None, it expects unicode strings as\n"
207 "decode input and translates newlines without first invoking an external\n"
208 "decoder.\n"
209 );
210
211typedef struct {
212 PyObject_HEAD
213 PyObject *decoder;
214 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000215 signed int pendingcr: 1;
216 signed int translate: 1;
217 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000218} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219
220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000221incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 PyObject *args, PyObject *kwds)
223{
224 PyObject *decoder;
225 int translate;
226 PyObject *errors = NULL;
227 char *kwlist[] = {"decoder", "translate", "errors", NULL};
228
229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
230 kwlist, &decoder, &translate, &errors))
231 return -1;
232
233 self->decoder = decoder;
234 Py_INCREF(decoder);
235
236 if (errors == NULL) {
237 self->errors = PyUnicode_FromString("strict");
238 if (self->errors == NULL)
239 return -1;
240 }
241 else {
242 Py_INCREF(errors);
243 self->errors = errors;
244 }
245
246 self->translate = translate;
247 self->seennl = 0;
248 self->pendingcr = 0;
249
250 return 0;
251}
252
253static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000254incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255{
256 Py_CLEAR(self->decoder);
257 Py_CLEAR(self->errors);
258 Py_TYPE(self)->tp_free((PyObject *)self);
259}
260
261#define SEEN_CR 1
262#define SEEN_LF 2
263#define SEEN_CRLF 4
264#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
265
266PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000267_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 PyObject *input, int final)
269{
270 PyObject *output;
271 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273
274 if (self->decoder == NULL) {
275 PyErr_SetString(PyExc_ValueError,
276 "IncrementalNewlineDecoder.__init__ not called");
277 return NULL;
278 }
279
280 /* decode input (with the eventual \r from a previous pass) */
281 if (self->decoder != Py_None) {
282 output = PyObject_CallMethodObjArgs(self->decoder,
283 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
284 }
285 else {
286 output = input;
287 Py_INCREF(output);
288 }
289
290 if (output == NULL)
291 return NULL;
292
293 if (!PyUnicode_Check(output)) {
294 PyErr_SetString(PyExc_TypeError,
295 "decoder should return a string result");
296 goto error;
297 }
298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200299 if (PyUnicode_READY(output) == -1)
300 goto error;
301
302 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200304 /* Prefix output with CR */
305 int kind;
306 PyObject *modified;
307 char *out;
308
309 modified = PyUnicode_New(output_len + 1,
310 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 if (modified == NULL)
312 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 kind = PyUnicode_KIND(modified);
314 out = PyUnicode_DATA(modified);
315 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200316 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 self->pendingcr = 0;
320 output_len++;
321 }
322
323 /* retain last \r even when not translating data:
324 * then readline() is sure to get \r\n in one pass
325 */
326 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000327 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
329 {
330 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
331 if (modified == NULL)
332 goto error;
333 Py_DECREF(output);
334 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 self->pendingcr = 1;
336 }
337 }
338
339 /* Record which newlines are read and do newline translation if desired,
340 all in one pass. */
341 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 Py_ssize_t len;
344 int seennl = self->seennl;
345 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 in_str = PyUnicode_DATA(output);
349 len = PyUnicode_GET_LENGTH(output);
350 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 if (len == 0)
353 return output;
354
355 /* If, up to now, newlines are consistently \n, do a quick check
356 for the \r *byte* with the libc's optimized memchr.
357 */
358 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200359 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 }
361
Antoine Pitrou66913e22009-03-06 23:40:56 +0000362 if (only_lf) {
363 /* If not already seen, quick scan for a possible "\n" character.
364 (there's nothing else to be done, even when in translation mode)
365 */
366 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100368 if (kind == PyUnicode_1BYTE_KIND)
369 seennl |= SEEN_LF;
370 else {
371 Py_ssize_t i = 0;
372 for (;;) {
373 Py_UCS4 c;
374 /* Fast loop for non-control characters */
375 while (PyUnicode_READ(kind, in_str, i) > '\n')
376 i++;
377 c = PyUnicode_READ(kind, in_str, i++);
378 if (c == '\n') {
379 seennl |= SEEN_LF;
380 break;
381 }
382 if (i >= len)
383 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000385 }
386 }
387 /* Finished: we have scanned for newlines, and none of them
388 need translating */
389 }
390 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200391 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000392 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 if (seennl == SEEN_ALL)
394 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398 while (PyUnicode_READ(kind, in_str, i) > '\r')
399 i++;
400 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 if (c == '\n')
402 seennl |= SEEN_LF;
403 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200404 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200406 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407 }
408 else
409 seennl |= SEEN_CR;
410 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 break;
413 if (seennl == SEEN_ALL)
414 break;
415 }
416 endscan:
417 ;
418 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 void *translated;
421 int kind = PyUnicode_KIND(output);
422 void *in_str = PyUnicode_DATA(output);
423 Py_ssize_t in, out;
424 /* XXX: Previous in-place translation here is disabled as
425 resizing is not possible anymore */
426 /* We could try to optimize this so that we only do a copy
427 when there is something to translate. On the other hand,
428 we already know there is a \r byte, so chances are high
429 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200430 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 if (translated == NULL) {
432 PyErr_NoMemory();
433 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
440 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 seennl |= SEEN_LF;
444 continue;
445 }
446 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448 in++;
449 seennl |= SEEN_CRLF;
450 }
451 else
452 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 continue;
455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 Py_DECREF(output);
461 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100462 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 if (!output)
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
466 self->seennl |= seennl;
467 }
468
469 return output;
470
471 error:
472 Py_DECREF(output);
473 return NULL;
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 PyObject *args, PyObject *kwds)
479{
480 char *kwlist[] = {"input", "final", NULL};
481 PyObject *input;
482 int final = 0;
483
484 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
485 kwlist, &input, &final))
486 return NULL;
487 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
488}
489
490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000491incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492{
493 PyObject *buffer;
494 unsigned PY_LONG_LONG flag;
495
496 if (self->decoder != Py_None) {
497 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
498 _PyIO_str_getstate, NULL);
499 if (state == NULL)
500 return NULL;
501 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
502 Py_DECREF(state);
503 return NULL;
504 }
505 Py_INCREF(buffer);
506 Py_DECREF(state);
507 }
508 else {
509 buffer = PyBytes_FromString("");
510 flag = 0;
511 }
512 flag <<= 1;
513 if (self->pendingcr)
514 flag |= 1;
515 return Py_BuildValue("NK", buffer, flag);
516}
517
518static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000519incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520{
521 PyObject *buffer;
522 unsigned PY_LONG_LONG flag;
523
524 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
525 return NULL;
526
527 self->pendingcr = (int) flag & 1;
528 flag >>= 1;
529
530 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200531 return _PyObject_CallMethodId(self->decoder,
532 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000533 else
534 Py_RETURN_NONE;
535}
536
537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000538incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000539{
540 self->seennl = 0;
541 self->pendingcr = 0;
542 if (self->decoder != Py_None)
543 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
544 else
545 Py_RETURN_NONE;
546}
547
548static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000549incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550{
551 switch (self->seennl) {
552 case SEEN_CR:
553 return PyUnicode_FromString("\r");
554 case SEEN_LF:
555 return PyUnicode_FromString("\n");
556 case SEEN_CRLF:
557 return PyUnicode_FromString("\r\n");
558 case SEEN_CR | SEEN_LF:
559 return Py_BuildValue("ss", "\r", "\n");
560 case SEEN_CR | SEEN_CRLF:
561 return Py_BuildValue("ss", "\r", "\r\n");
562 case SEEN_LF | SEEN_CRLF:
563 return Py_BuildValue("ss", "\n", "\r\n");
564 case SEEN_CR | SEEN_LF | SEEN_CRLF:
565 return Py_BuildValue("sss", "\r", "\n", "\r\n");
566 default:
567 Py_RETURN_NONE;
568 }
569
570}
571
572
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000573static PyMethodDef incrementalnewlinedecoder_methods[] = {
574 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
575 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
576 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
577 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000578 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579};
580
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000581static PyGetSetDef incrementalnewlinedecoder_getset[] = {
582 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000583 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584};
585
586PyTypeObject PyIncrementalNewlineDecoder_Type = {
587 PyVarObject_HEAD_INIT(NULL, 0)
588 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000589 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000591 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 0, /*tp_print*/
593 0, /*tp_getattr*/
594 0, /*tp_setattr*/
595 0, /*tp_compare */
596 0, /*tp_repr*/
597 0, /*tp_as_number*/
598 0, /*tp_as_sequence*/
599 0, /*tp_as_mapping*/
600 0, /*tp_hash */
601 0, /*tp_call*/
602 0, /*tp_str*/
603 0, /*tp_getattro*/
604 0, /*tp_setattro*/
605 0, /*tp_as_buffer*/
606 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000607 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 0, /* tp_traverse */
609 0, /* tp_clear */
610 0, /* tp_richcompare */
611 0, /*tp_weaklistoffset*/
612 0, /* tp_iter */
613 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000614 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_base */
618 0, /* tp_dict */
619 0, /* tp_descr_get */
620 0, /* tp_descr_set */
621 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000622 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 0, /* tp_alloc */
624 PyType_GenericNew, /* tp_new */
625};
626
627
628/* TextIOWrapper */
629
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000630PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631 "Character and line based layer over a BufferedIOBase object, buffer.\n"
632 "\n"
633 "encoding gives the name of the encoding that the stream will be\n"
634 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
635 "\n"
636 "errors determines the strictness of encoding and decoding (see the\n"
637 "codecs.register) and defaults to \"strict\".\n"
638 "\n"
639 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
640 "handling of line endings. If it is None, universal newlines is\n"
641 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
642 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
643 "caller. Conversely, on output, '\\n' is translated to the system\n"
644 "default line seperator, os.linesep. If newline is any other of its\n"
645 "legal values, that newline becomes the newline when the file is read\n"
646 "and it is returned untranslated. On output, '\\n' is converted to the\n"
647 "newline.\n"
648 "\n"
649 "If line_buffering is True, a call to flush is implied when a call to\n"
650 "write contains a newline character."
651 );
652
653typedef PyObject *
654 (*encodefunc_t)(PyObject *, PyObject *);
655
656typedef struct
657{
658 PyObject_HEAD
659 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000660 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 Py_ssize_t chunk_size;
662 PyObject *buffer;
663 PyObject *encoding;
664 PyObject *encoder;
665 PyObject *decoder;
666 PyObject *readnl;
667 PyObject *errors;
668 const char *writenl; /* utf-8 encoded, NULL stands for \n */
669 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200670 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 char readuniversal;
672 char readtranslate;
673 char writetranslate;
674 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200675 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000677 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 /* Specialized encoding func (see below) */
679 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000680 /* Whether or not it's the start of the stream */
681 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 /* Reads and writes are internally buffered in order to speed things up.
684 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000685
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000686 Please also note that text to be written is first encoded before being
687 buffered. This is necessary so that encoding errors are immediately
688 reported to the caller, but it unfortunately means that the
689 IncrementalEncoder (whose encode() method is always written in Python)
690 becomes a bottleneck for small writes.
691 */
692 PyObject *decoded_chars; /* buffer for text returned from decoder */
693 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
694 PyObject *pending_bytes; /* list of bytes objects waiting to be
695 written, or NULL */
696 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000697
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 /* snapshot is either None, or a tuple (dec_flags, next_input) where
699 * dec_flags is the second (integer) item of the decoder state and
700 * next_input is the chunk of input bytes that comes next after the
701 * snapshot point. We use this to reconstruct decoder states in tell().
702 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703 PyObject *snapshot;
704 /* Bytes-to-characters ratio for the current chunk. Serves as input for
705 the heuristic in tell(). */
706 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
708 /* Cache raw object if it's a FileIO object */
709 PyObject *raw;
710
711 PyObject *weakreflist;
712 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714
715
716/* A couple of specialized cases in order to bypass the slow incremental
717 encoding methods for the most popular encodings. */
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100728 return _PyUnicode_EncodeUTF16(text,
729 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730}
731
732static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000733utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
736 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741{
Antoine Pitroue4501852009-05-14 18:55:55 +0000742 if (!self->encoding_start_of_stream) {
743 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000745 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000749 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100750 return _PyUnicode_EncodeUTF16(text,
751 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752}
753
Antoine Pitroue4501852009-05-14 18:55:55 +0000754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100757 return _PyUnicode_EncodeUTF32(text,
758 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000759}
760
761static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000762utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000763{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
765 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000770{
771 if (!self->encoding_start_of_stream) {
772 /* Skip the BOM and use native byte ordering */
773#if defined(WORDS_BIGENDIAN)
774 return utf32be_encode(self, text);
775#else
776 return utf32le_encode(self, text);
777#endif
778 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100779 return _PyUnicode_EncodeUTF32(text,
780 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000781}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782
783static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000784utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200786 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787}
788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795/* Map normalized encoding names onto the specialized encoding funcs */
796
797typedef struct {
798 const char *name;
799 encodefunc_t encodefunc;
800} encodefuncentry;
801
Antoine Pitrou24f36292009-03-28 22:16:42 +0000802static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {"ascii", (encodefunc_t) ascii_encode},
804 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 {"utf-16-be", (encodefunc_t) utf16be_encode},
807 {"utf-16-le", (encodefunc_t) utf16le_encode},
808 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000809 {"utf-32-be", (encodefunc_t) utf32be_encode},
810 {"utf-32-le", (encodefunc_t) utf32le_encode},
811 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {NULL, NULL}
813};
814
815
816static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000817textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818{
819 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200820 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821 NULL};
822 PyObject *buffer, *raw;
823 char *encoding = NULL;
824 char *errors = NULL;
825 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 _PyIO_State *state = IO_STATE;
828
829 PyObject *res;
830 int r;
831
832 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000833 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200834 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 return -1;
838
839 if (newline && newline[0] != '\0'
840 && !(newline[0] == '\n' && newline[1] == '\0')
841 && !(newline[0] == '\r' && newline[1] == '\0')
842 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
843 PyErr_Format(PyExc_ValueError,
844 "illegal newline value: %s", newline);
845 return -1;
846 }
847
848 Py_CLEAR(self->buffer);
849 Py_CLEAR(self->encoding);
850 Py_CLEAR(self->encoder);
851 Py_CLEAR(self->decoder);
852 Py_CLEAR(self->readnl);
853 Py_CLEAR(self->decoded_chars);
854 Py_CLEAR(self->pending_bytes);
855 Py_CLEAR(self->snapshot);
856 Py_CLEAR(self->errors);
857 Py_CLEAR(self->raw);
858 self->decoded_chars_used = 0;
859 self->pending_bytes_count = 0;
860 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000861 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000862
863 if (encoding == NULL) {
864 /* Try os.device_encoding(fileno) */
865 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200866 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 /* Ignore only AttributeError and UnsupportedOperation */
868 if (fileno == NULL) {
869 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
870 PyErr_ExceptionMatches(state->unsupported_operation)) {
871 PyErr_Clear();
872 }
873 else {
874 goto error;
875 }
876 }
877 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200878 self->encoding = _PyObject_CallMethodId(state->os_module,
879 &PyId_device_encoding,
880 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 if (self->encoding == NULL)
882 goto error;
883 else if (!PyUnicode_Check(self->encoding))
884 Py_CLEAR(self->encoding);
885 }
886 }
887 if (encoding == NULL && self->encoding == NULL) {
888 if (state->locale_module == NULL) {
889 state->locale_module = PyImport_ImportModule("locale");
890 if (state->locale_module == NULL)
891 goto catch_ImportError;
892 else
893 goto use_locale;
894 }
895 else {
896 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200897 self->encoding = _PyObject_CallMethodId(
898 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000899 if (self->encoding == NULL) {
900 catch_ImportError:
901 /*
902 Importing locale can raise a ImportError because of
903 _functools, and locale.getpreferredencoding can raise a
904 ImportError if _locale is not available. These will happen
905 during module building.
906 */
907 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
908 PyErr_Clear();
909 self->encoding = PyUnicode_FromString("ascii");
910 }
911 else
912 goto error;
913 }
914 else if (!PyUnicode_Check(self->encoding))
915 Py_CLEAR(self->encoding);
916 }
917 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000918 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000920 if (encoding == NULL)
921 goto error;
922 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000923 else if (encoding != NULL) {
924 self->encoding = PyUnicode_FromString(encoding);
925 if (self->encoding == NULL)
926 goto error;
927 }
928 else {
929 PyErr_SetString(PyExc_IOError,
930 "could not determine default encoding");
931 }
932
933 if (errors == NULL)
934 errors = "strict";
935 self->errors = PyBytes_FromString(errors);
936 if (self->errors == NULL)
937 goto error;
938
939 self->chunk_size = 8192;
940 self->readuniversal = (newline == NULL || newline[0] == '\0');
941 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200942 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 self->readtranslate = (newline == NULL);
944 if (newline) {
945 self->readnl = PyUnicode_FromString(newline);
946 if (self->readnl == NULL)
947 return -1;
948 }
949 self->writetranslate = (newline == NULL || newline[0] != '\0');
950 if (!self->readuniversal && self->readnl) {
951 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000952 if (self->writenl == NULL)
953 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 if (!strcmp(self->writenl, "\n"))
955 self->writenl = NULL;
956 }
957#ifdef MS_WINDOWS
958 else
959 self->writenl = "\r\n";
960#endif
961
962 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200963 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (res == NULL)
965 goto error;
966 r = PyObject_IsTrue(res);
967 Py_DECREF(res);
968 if (r == -1)
969 goto error;
970 if (r == 1) {
971 self->decoder = PyCodec_IncrementalDecoder(
972 encoding, errors);
973 if (self->decoder == NULL)
974 goto error;
975
976 if (self->readuniversal) {
977 PyObject *incrementalDecoder = PyObject_CallFunction(
978 (PyObject *)&PyIncrementalNewlineDecoder_Type,
979 "Oi", self->decoder, (int)self->readtranslate);
980 if (incrementalDecoder == NULL)
981 goto error;
982 Py_CLEAR(self->decoder);
983 self->decoder = incrementalDecoder;
984 }
985 }
986
987 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200988 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if (res == NULL)
990 goto error;
991 r = PyObject_IsTrue(res);
992 Py_DECREF(res);
993 if (r == -1)
994 goto error;
995 if (r == 1) {
996 PyObject *ci;
997 self->encoder = PyCodec_IncrementalEncoder(
998 encoding, errors);
999 if (self->encoder == NULL)
1000 goto error;
1001 /* Get the normalized named of the codec */
1002 ci = _PyCodec_Lookup(encoding);
1003 if (ci == NULL)
1004 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001005 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001007 if (res == NULL) {
1008 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1009 PyErr_Clear();
1010 else
1011 goto error;
1012 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 else if (PyUnicode_Check(res)) {
1014 encodefuncentry *e = encodefuncs;
1015 while (e->name != NULL) {
1016 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1017 self->encodefunc = e->encodefunc;
1018 break;
1019 }
1020 e++;
1021 }
1022 }
1023 Py_XDECREF(res);
1024 }
1025
1026 self->buffer = buffer;
1027 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1030 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1031 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001032 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001034 if (raw == NULL) {
1035 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1036 PyErr_Clear();
1037 else
1038 goto error;
1039 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 else if (Py_TYPE(raw) == &PyFileIO_Type)
1041 self->raw = raw;
1042 else
1043 Py_DECREF(raw);
1044 }
1045
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001046 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 if (res == NULL)
1048 goto error;
1049 self->seekable = self->telling = PyObject_IsTrue(res);
1050 Py_DECREF(res);
1051
Martin v. Löwis767046a2011-10-14 15:35:36 +02001052 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001053
Antoine Pitroue4501852009-05-14 18:55:55 +00001054 self->encoding_start_of_stream = 0;
1055 if (self->seekable && self->encoder) {
1056 PyObject *cookieObj;
1057 int cmp;
1058
1059 self->encoding_start_of_stream = 1;
1060
1061 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1062 if (cookieObj == NULL)
1063 goto error;
1064
1065 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1066 Py_DECREF(cookieObj);
1067 if (cmp < 0) {
1068 goto error;
1069 }
1070
1071 if (cmp == 0) {
1072 self->encoding_start_of_stream = 0;
1073 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1074 _PyIO_zero, NULL);
1075 if (res == NULL)
1076 goto error;
1077 Py_DECREF(res);
1078 }
1079 }
1080
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 self->ok = 1;
1082 return 0;
1083
1084 error:
1085 return -1;
1086}
1087
1088static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001089_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090{
1091 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1092 return -1;
1093 self->ok = 0;
1094 Py_CLEAR(self->buffer);
1095 Py_CLEAR(self->encoding);
1096 Py_CLEAR(self->encoder);
1097 Py_CLEAR(self->decoder);
1098 Py_CLEAR(self->readnl);
1099 Py_CLEAR(self->decoded_chars);
1100 Py_CLEAR(self->pending_bytes);
1101 Py_CLEAR(self->snapshot);
1102 Py_CLEAR(self->errors);
1103 Py_CLEAR(self->raw);
1104 return 0;
1105}
1106
1107static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001108textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109{
Antoine Pitroue033e062010-10-29 10:38:18 +00001110 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001111 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112 return;
1113 _PyObject_GC_UNTRACK(self);
1114 if (self->weakreflist != NULL)
1115 PyObject_ClearWeakRefs((PyObject *)self);
1116 Py_CLEAR(self->dict);
1117 Py_TYPE(self)->tp_free((PyObject *)self);
1118}
1119
1120static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
1123 Py_VISIT(self->buffer);
1124 Py_VISIT(self->encoding);
1125 Py_VISIT(self->encoder);
1126 Py_VISIT(self->decoder);
1127 Py_VISIT(self->readnl);
1128 Py_VISIT(self->decoded_chars);
1129 Py_VISIT(self->pending_bytes);
1130 Py_VISIT(self->snapshot);
1131 Py_VISIT(self->errors);
1132 Py_VISIT(self->raw);
1133
1134 Py_VISIT(self->dict);
1135 return 0;
1136}
1137
1138static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001139textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001141 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 return -1;
1143 Py_CLEAR(self->dict);
1144 return 0;
1145}
1146
1147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001148textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149
1150/* This macro takes some shortcuts to make the common case faster. */
1151#define CHECK_CLOSED(self) \
1152 do { \
1153 int r; \
1154 PyObject *_res; \
1155 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1156 if (self->raw != NULL) \
1157 r = _PyFileIO_closed(self->raw); \
1158 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001159 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 if (_res == NULL) \
1161 return NULL; \
1162 r = PyObject_IsTrue(_res); \
1163 Py_DECREF(_res); \
1164 if (r < 0) \
1165 return NULL; \
1166 } \
1167 if (r > 0) { \
1168 PyErr_SetString(PyExc_ValueError, \
1169 "I/O operation on closed file."); \
1170 return NULL; \
1171 } \
1172 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001173 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 return NULL; \
1175 } while (0)
1176
1177#define CHECK_INITIALIZED(self) \
1178 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001179 if (self->detached) { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "underlying buffer has been detached"); \
1182 } else { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "I/O operation on uninitialized object"); \
1185 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 return NULL; \
1187 }
1188
1189#define CHECK_INITIALIZED_INT(self) \
1190 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001191 if (self->detached) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "underlying buffer has been detached"); \
1194 } else { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "I/O operation on uninitialized object"); \
1197 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return -1; \
1199 }
1200
1201
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001203textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204{
1205 PyObject *buffer, *res;
1206 CHECK_INITIALIZED(self);
1207 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1208 if (res == NULL)
1209 return NULL;
1210 Py_DECREF(res);
1211 buffer = self->buffer;
1212 self->buffer = NULL;
1213 self->detached = 1;
1214 self->ok = 0;
1215 return buffer;
1216}
1217
Antoine Pitrou24f36292009-03-28 22:16:42 +00001218/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 underlying buffered object, though. */
1220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001221_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001223 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224
1225 if (self->pending_bytes == NULL)
1226 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001227
1228 pending = self->pending_bytes;
1229 Py_INCREF(pending);
1230 self->pending_bytes_count = 0;
1231 Py_CLEAR(self->pending_bytes);
1232
1233 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1234 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 if (b == NULL)
1236 return -1;
1237 ret = PyObject_CallMethodObjArgs(self->buffer,
1238 _PyIO_str_write, b, NULL);
1239 Py_DECREF(b);
1240 if (ret == NULL)
1241 return -1;
1242 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return 0;
1244}
1245
1246static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001247textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248{
1249 PyObject *ret;
1250 PyObject *text; /* owned reference */
1251 PyObject *b;
1252 Py_ssize_t textlen;
1253 int haslf = 0;
1254 int needflush = 0;
1255
1256 CHECK_INITIALIZED(self);
1257
1258 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1259 return NULL;
1260 }
1261
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 if (PyUnicode_READY(text) == -1)
1263 return NULL;
1264
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 CHECK_CLOSED(self);
1266
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001267 if (self->encoder == NULL)
1268 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 Py_INCREF(text);
1271
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273
1274 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 haslf = 1;
1277
1278 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279 PyObject *newtext = _PyObject_CallMethodId(
1280 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281 Py_DECREF(text);
1282 if (newtext == NULL)
1283 return NULL;
1284 text = newtext;
1285 }
1286
Antoine Pitroue96ec682011-07-23 21:46:35 +02001287 if (self->write_through)
1288 needflush = 1;
1289 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 needflush = 1;
1293
1294 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001295 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001297 self->encoding_start_of_stream = 0;
1298 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299 else
1300 b = PyObject_CallMethodObjArgs(self->encoder,
1301 _PyIO_str_encode, text, NULL);
1302 Py_DECREF(text);
1303 if (b == NULL)
1304 return NULL;
1305
1306 if (self->pending_bytes == NULL) {
1307 self->pending_bytes = PyList_New(0);
1308 if (self->pending_bytes == NULL) {
1309 Py_DECREF(b);
1310 return NULL;
1311 }
1312 self->pending_bytes_count = 0;
1313 }
1314 if (PyList_Append(self->pending_bytes, b) < 0) {
1315 Py_DECREF(b);
1316 return NULL;
1317 }
1318 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1319 Py_DECREF(b);
1320 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001321 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 return NULL;
1323 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001324
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 if (needflush) {
1326 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1327 if (ret == NULL)
1328 return NULL;
1329 Py_DECREF(ret);
1330 }
1331
1332 Py_CLEAR(self->snapshot);
1333
1334 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001335 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 if (ret == NULL)
1337 return NULL;
1338 Py_DECREF(ret);
1339 }
1340
1341 return PyLong_FromSsize_t(textlen);
1342}
1343
1344/* Steal a reference to chars and store it in the decoded_char buffer;
1345 */
1346static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348{
1349 Py_CLEAR(self->decoded_chars);
1350 self->decoded_chars = chars;
1351 self->decoded_chars_used = 0;
1352}
1353
1354static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001355textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356{
1357 PyObject *chars;
1358 Py_ssize_t avail;
1359
1360 if (self->decoded_chars == NULL)
1361 return PyUnicode_FromStringAndSize(NULL, 0);
1362
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001363 /* decoded_chars is guaranteed to be "ready". */
1364 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001365 - self->decoded_chars_used);
1366
1367 assert(avail >= 0);
1368
1369 if (n < 0 || n > avail)
1370 n = avail;
1371
1372 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001373 chars = PyUnicode_Substring(self->decoded_chars,
1374 self->decoded_chars_used,
1375 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376 if (chars == NULL)
1377 return NULL;
1378 }
1379 else {
1380 chars = self->decoded_chars;
1381 Py_INCREF(chars);
1382 }
1383
1384 self->decoded_chars_used += n;
1385 return chars;
1386}
1387
1388/* Read and decode the next chunk of data from the BufferedReader.
1389 */
1390static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001391textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392{
1393 PyObject *dec_buffer = NULL;
1394 PyObject *dec_flags = NULL;
1395 PyObject *input_chunk = NULL;
1396 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001397 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 int eof;
1399
1400 /* The return value is True unless EOF was reached. The decoded string is
1401 * placed in self._decoded_chars (replacing its previous value). The
1402 * entire input chunk is sent to the decoder, though some of it may remain
1403 * buffered in the decoder, yet to be converted.
1404 */
1405
1406 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001407 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408 return -1;
1409 }
1410
1411 if (self->telling) {
1412 /* To prepare for tell(), we need to snapshot a point in the file
1413 * where the decoder's input buffer is empty.
1414 */
1415
1416 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1417 _PyIO_str_getstate, NULL);
1418 if (state == NULL)
1419 return -1;
1420 /* Given this, we know there was a valid snapshot point
1421 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1422 */
1423 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1424 Py_DECREF(state);
1425 return -1;
1426 }
1427 Py_INCREF(dec_buffer);
1428 Py_INCREF(dec_flags);
1429 Py_DECREF(state);
1430 }
1431
1432 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1433 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1434 if (chunk_size == NULL)
1435 goto fail;
1436 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001437 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1438 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001439 Py_DECREF(chunk_size);
1440 if (input_chunk == NULL)
1441 goto fail;
1442 assert(PyBytes_Check(input_chunk));
1443
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001444 nbytes = PyBytes_Size(input_chunk);
1445 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001446
1447 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1448 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1449 self->decoder, input_chunk, eof);
1450 }
1451 else {
1452 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1453 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1454 }
1455
1456 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1457 if (decoded_chars == NULL)
1458 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001459 if (PyUnicode_READY(decoded_chars) == -1)
1460 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001461 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001462 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001463 if (nchars > 0)
1464 self->b2cratio = (double) nbytes / nchars;
1465 else
1466 self->b2cratio = 0.0;
1467 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 eof = 0;
1469
1470 if (self->telling) {
1471 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1472 * next input to be decoded is dec_buffer + input_chunk.
1473 */
1474 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1475 if (next_input == NULL)
1476 goto fail;
1477 assert (PyBytes_Check(next_input));
1478 Py_DECREF(dec_buffer);
1479 Py_CLEAR(self->snapshot);
1480 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1481 }
1482 Py_DECREF(input_chunk);
1483
1484 return (eof == 0);
1485
1486 fail:
1487 Py_XDECREF(dec_buffer);
1488 Py_XDECREF(dec_flags);
1489 Py_XDECREF(input_chunk);
1490 return -1;
1491}
1492
1493static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001494textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001495{
1496 Py_ssize_t n = -1;
1497 PyObject *result = NULL, *chunks = NULL;
1498
1499 CHECK_INITIALIZED(self);
1500
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001501 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001502 return NULL;
1503
1504 CHECK_CLOSED(self);
1505
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001506 if (self->decoder == NULL)
1507 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001508
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001509 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001510 return NULL;
1511
1512 if (n < 0) {
1513 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001514 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 PyObject *decoded;
1516 if (bytes == NULL)
1517 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001518
1519 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1520 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1521 bytes, 1);
1522 else
1523 decoded = PyObject_CallMethodObjArgs(
1524 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001525 Py_DECREF(bytes);
1526 if (decoded == NULL)
1527 goto fail;
1528
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001529 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001530
1531 if (result == NULL) {
1532 Py_DECREF(decoded);
1533 return NULL;
1534 }
1535
1536 PyUnicode_AppendAndDel(&result, decoded);
1537 if (result == NULL)
1538 goto fail;
1539
1540 Py_CLEAR(self->snapshot);
1541 return result;
1542 }
1543 else {
1544 int res = 1;
1545 Py_ssize_t remaining = n;
1546
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001547 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001548 if (result == NULL)
1549 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001550 if (PyUnicode_READY(result) == -1)
1551 goto fail;
1552 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001553
1554 /* Keep reading chunks until we have n characters to return */
1555 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001556 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001557 if (res < 0)
1558 goto fail;
1559 if (res == 0) /* EOF */
1560 break;
1561 if (chunks == NULL) {
1562 chunks = PyList_New(0);
1563 if (chunks == NULL)
1564 goto fail;
1565 }
1566 if (PyList_Append(chunks, result) < 0)
1567 goto fail;
1568 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001569 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570 if (result == NULL)
1571 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001572 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001573 }
1574 if (chunks != NULL) {
1575 if (result != NULL && PyList_Append(chunks, result) < 0)
1576 goto fail;
1577 Py_CLEAR(result);
1578 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1579 if (result == NULL)
1580 goto fail;
1581 Py_CLEAR(chunks);
1582 }
1583 return result;
1584 }
1585 fail:
1586 Py_XDECREF(result);
1587 Py_XDECREF(chunks);
1588 return NULL;
1589}
1590
1591
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001592/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 that is to the NUL character. Otherwise the function will produce
1594 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001595static char *
1596find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001598 if (kind == PyUnicode_1BYTE_KIND) {
1599 assert(ch < 256);
1600 return (char *) memchr((void *) s, (char) ch, end - s);
1601 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001603 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001604 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001605 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 return s;
1607 if (s == end)
1608 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001609 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610 }
1611}
1612
1613Py_ssize_t
1614_PyIO_find_line_ending(
1615 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001616 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001618 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619
1620 if (translated) {
1621 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001622 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001624 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625 else {
1626 *consumed = len;
1627 return -1;
1628 }
1629 }
1630 else if (universal) {
1631 /* Universal newline search. Find any of \r, \r\n, \n
1632 * The decoder ensures that \r\n are not split in two pieces
1633 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001634 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001635 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001636 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001638 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001640 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 if (s >= end) {
1642 *consumed = len;
1643 return -1;
1644 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001646 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001648 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001650 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001651 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001653 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 }
1655 }
1656 }
1657 else {
1658 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1660 char *nl = PyUnicode_DATA(readnl);
1661 /* Assume that readnl is an ASCII character. */
1662 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001664 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001666 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 *consumed = len;
1668 return -1;
1669 }
1670 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001672 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 if (e < s)
1675 e = s;
1676 while (s < e) {
1677 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001678 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001679 if (pos == NULL || pos >= e)
1680 break;
1681 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 break;
1684 }
1685 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 return (pos - start)/kind + readnl_len;
1687 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001689 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 if (pos == NULL)
1691 *consumed = len;
1692 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001693 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 return -1;
1695 }
1696 }
1697}
1698
1699static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001700_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701{
1702 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1703 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1704 int res;
1705
1706 CHECK_CLOSED(self);
1707
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001708 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 return NULL;
1710
1711 chunked = 0;
1712
1713 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001714 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001716 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 Py_ssize_t consumed = 0;
1718
1719 /* First, get some data if necessary */
1720 res = 1;
1721 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001722 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001723 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724 if (res < 0)
1725 goto error;
1726 if (res == 0)
1727 break;
1728 }
1729 if (res == 0) {
1730 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001731 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 Py_CLEAR(self->snapshot);
1733 start = endpos = offset_to_buffer = 0;
1734 break;
1735 }
1736
1737 if (remaining == NULL) {
1738 line = self->decoded_chars;
1739 start = self->decoded_chars_used;
1740 offset_to_buffer = 0;
1741 Py_INCREF(line);
1742 }
1743 else {
1744 assert(self->decoded_chars_used == 0);
1745 line = PyUnicode_Concat(remaining, self->decoded_chars);
1746 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001747 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748 Py_CLEAR(remaining);
1749 if (line == NULL)
1750 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001751 if (PyUnicode_READY(line) == -1)
1752 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 }
1754
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001755 ptr = PyUnicode_DATA(line);
1756 line_len = PyUnicode_GET_LENGTH(line);
1757 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758
1759 endpos = _PyIO_find_line_ending(
1760 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001762 ptr + kind * start,
1763 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001764 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001765 if (endpos >= 0) {
1766 endpos += start;
1767 if (limit >= 0 && (endpos - start) + chunked >= limit)
1768 endpos = start + limit - chunked;
1769 break;
1770 }
1771
1772 /* We can put aside up to `endpos` */
1773 endpos = consumed + start;
1774 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1775 /* Didn't find line ending, but reached length limit */
1776 endpos = start + limit - chunked;
1777 break;
1778 }
1779
1780 if (endpos > start) {
1781 /* No line ending seen yet - put aside current data */
1782 PyObject *s;
1783 if (chunks == NULL) {
1784 chunks = PyList_New(0);
1785 if (chunks == NULL)
1786 goto error;
1787 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001788 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 if (s == NULL)
1790 goto error;
1791 if (PyList_Append(chunks, s) < 0) {
1792 Py_DECREF(s);
1793 goto error;
1794 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001795 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001796 Py_DECREF(s);
1797 }
1798 /* There may be some remaining bytes we'll have to prepend to the
1799 next chunk of data */
1800 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001801 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001802 if (remaining == NULL)
1803 goto error;
1804 }
1805 Py_CLEAR(line);
1806 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001807 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 }
1809
1810 if (line != NULL) {
1811 /* Our line ends in the current buffer */
1812 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1814 PyObject *s = PyUnicode_Substring(line, start, endpos);
1815 Py_CLEAR(line);
1816 if (s == NULL)
1817 goto error;
1818 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001819 }
1820 }
1821 if (remaining != NULL) {
1822 if (chunks == NULL) {
1823 chunks = PyList_New(0);
1824 if (chunks == NULL)
1825 goto error;
1826 }
1827 if (PyList_Append(chunks, remaining) < 0)
1828 goto error;
1829 Py_CLEAR(remaining);
1830 }
1831 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001832 if (line != NULL) {
1833 if (PyList_Append(chunks, line) < 0)
1834 goto error;
1835 Py_DECREF(line);
1836 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1838 if (line == NULL)
1839 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001840 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001842 if (line == NULL) {
1843 Py_INCREF(_PyIO_empty_str);
1844 line = _PyIO_empty_str;
1845 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846
1847 return line;
1848
1849 error:
1850 Py_XDECREF(chunks);
1851 Py_XDECREF(remaining);
1852 Py_XDECREF(line);
1853 return NULL;
1854}
1855
1856static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001857textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001858{
1859 Py_ssize_t limit = -1;
1860
1861 CHECK_INITIALIZED(self);
1862 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1863 return NULL;
1864 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001865 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866}
1867
1868/* Seek and Tell */
1869
1870typedef struct {
1871 Py_off_t start_pos;
1872 int dec_flags;
1873 int bytes_to_feed;
1874 int chars_to_skip;
1875 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001876} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877
1878/*
1879 To speed up cookie packing/unpacking, we store the fields in a temporary
1880 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1881 The following macros define at which offsets in the intermediary byte
1882 string the various CookieStruct fields will be stored.
1883 */
1884
1885#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1886
1887#if defined(WORDS_BIGENDIAN)
1888
1889# define IS_LITTLE_ENDIAN 0
1890
1891/* We want the least significant byte of start_pos to also be the least
1892 significant byte of the cookie, which means that in big-endian mode we
1893 must copy the fields in reverse order. */
1894
1895# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1896# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1897# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1898# define OFF_CHARS_TO_SKIP (sizeof(char))
1899# define OFF_NEED_EOF 0
1900
1901#else
1902
1903# define IS_LITTLE_ENDIAN 1
1904
1905/* Little-endian mode: the least significant byte of start_pos will
1906 naturally end up the least significant byte of the cookie. */
1907
1908# define OFF_START_POS 0
1909# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1910# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1911# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1912# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1913
1914#endif
1915
1916static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001917textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918{
1919 unsigned char buffer[COOKIE_BUF_LEN];
1920 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1921 if (cookieLong == NULL)
1922 return -1;
1923
1924 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1925 IS_LITTLE_ENDIAN, 0) < 0) {
1926 Py_DECREF(cookieLong);
1927 return -1;
1928 }
1929 Py_DECREF(cookieLong);
1930
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001931 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1932 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1933 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1934 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1935 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001936
1937 return 0;
1938}
1939
1940static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001941textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001942{
1943 unsigned char buffer[COOKIE_BUF_LEN];
1944
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001945 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1946 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1947 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1948 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1949 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950
1951 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1952}
1953#undef IS_LITTLE_ENDIAN
1954
1955static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001956_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957{
1958 PyObject *res;
1959 /* When seeking to the start of the stream, we call decoder.reset()
1960 rather than decoder.getstate().
1961 This is for a few decoders such as utf-16 for which the state value
1962 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1963 utf-16, that we are expecting a BOM).
1964 */
1965 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1966 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1967 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001968 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1969 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001970 if (res == NULL)
1971 return -1;
1972 Py_DECREF(res);
1973 return 0;
1974}
1975
Antoine Pitroue4501852009-05-14 18:55:55 +00001976static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001977_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001978{
1979 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001980 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001981 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1982 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1983 self->encoding_start_of_stream = 1;
1984 }
1985 else {
1986 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1987 _PyIO_zero, NULL);
1988 self->encoding_start_of_stream = 0;
1989 }
1990 if (res == NULL)
1991 return -1;
1992 Py_DECREF(res);
1993 return 0;
1994}
1995
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001997textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001998{
1999 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002000 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 PyObject *res;
2003 int cmp;
2004
2005 CHECK_INITIALIZED(self);
2006
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002007 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2008 return NULL;
2009 CHECK_CLOSED(self);
2010
2011 Py_INCREF(cookieObj);
2012
2013 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002014 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015 goto fail;
2016 }
2017
2018 if (whence == 1) {
2019 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002020 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 if (cmp < 0)
2022 goto fail;
2023
2024 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002025 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 goto fail;
2027 }
2028
2029 /* Seeking to the current position should attempt to
2030 * sync the underlying buffer with the current position.
2031 */
2032 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002033 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 if (cookieObj == NULL)
2035 goto fail;
2036 }
2037 else if (whence == 2) {
2038 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002039 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 if (cmp < 0)
2041 goto fail;
2042
2043 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002044 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 goto fail;
2046 }
2047
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002048 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 if (res == NULL)
2050 goto fail;
2051 Py_DECREF(res);
2052
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002053 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 Py_CLEAR(self->snapshot);
2055 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002056 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057 if (res == NULL)
2058 goto fail;
2059 Py_DECREF(res);
2060 }
2061
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002062 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 Py_XDECREF(cookieObj);
2064 return res;
2065 }
2066 else if (whence != 0) {
2067 PyErr_Format(PyExc_ValueError,
2068 "invalid whence (%d, should be 0, 1 or 2)", whence);
2069 goto fail;
2070 }
2071
Antoine Pitroue4501852009-05-14 18:55:55 +00002072 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 if (cmp < 0)
2074 goto fail;
2075
2076 if (cmp == 1) {
2077 PyErr_Format(PyExc_ValueError,
2078 "negative seek position %R", cookieObj);
2079 goto fail;
2080 }
2081
2082 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2083 if (res == NULL)
2084 goto fail;
2085 Py_DECREF(res);
2086
2087 /* The strategy of seek() is to go back to the safe start point
2088 * and replay the effect of read(chars_to_skip) from there.
2089 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002090 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002091 goto fail;
2092
2093 /* Seek back to the safe start point. */
2094 posobj = PyLong_FromOff_t(cookie.start_pos);
2095 if (posobj == NULL)
2096 goto fail;
2097 res = PyObject_CallMethodObjArgs(self->buffer,
2098 _PyIO_str_seek, posobj, NULL);
2099 Py_DECREF(posobj);
2100 if (res == NULL)
2101 goto fail;
2102 Py_DECREF(res);
2103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002104 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 Py_CLEAR(self->snapshot);
2106
2107 /* Restore the decoder to its state from the safe start point. */
2108 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002109 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 goto fail;
2111 }
2112
2113 if (cookie.chars_to_skip) {
2114 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002115 PyObject *input_chunk = _PyObject_CallMethodId(
2116 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 PyObject *decoded;
2118
2119 if (input_chunk == NULL)
2120 goto fail;
2121
2122 assert (PyBytes_Check(input_chunk));
2123
2124 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2125 if (self->snapshot == NULL) {
2126 Py_DECREF(input_chunk);
2127 goto fail;
2128 }
2129
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002130 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2131 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002132
2133 if (decoded == NULL)
2134 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002135 if (PyUnicode_READY(decoded) == -1) {
2136 Py_DECREF(decoded);
2137 goto fail;
2138 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002140 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002141
2142 /* Skip chars_to_skip of the decoded characters. */
2143 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2144 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2145 goto fail;
2146 }
2147 self->decoded_chars_used = cookie.chars_to_skip;
2148 }
2149 else {
2150 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2151 if (self->snapshot == NULL)
2152 goto fail;
2153 }
2154
Antoine Pitroue4501852009-05-14 18:55:55 +00002155 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2156 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002157 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002158 goto fail;
2159 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160 return cookieObj;
2161 fail:
2162 Py_XDECREF(cookieObj);
2163 return NULL;
2164
2165}
2166
2167static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002168textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169{
2170 PyObject *res;
2171 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002172 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 PyObject *next_input;
2174 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002175 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176 PyObject *saved_state = NULL;
2177 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002178 char *dec_buffer;
2179 Py_ssize_t dec_buffer_len;
2180 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002181
2182 CHECK_INITIALIZED(self);
2183 CHECK_CLOSED(self);
2184
2185 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002186 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187 goto fail;
2188 }
2189 if (!self->telling) {
2190 PyErr_SetString(PyExc_IOError,
2191 "telling position disabled by next() call");
2192 goto fail;
2193 }
2194
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002195 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002197 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002198 if (res == NULL)
2199 goto fail;
2200 Py_DECREF(res);
2201
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002202 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002203 if (posobj == NULL)
2204 goto fail;
2205
2206 if (self->decoder == NULL || self->snapshot == NULL) {
2207 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2208 return posobj;
2209 }
2210
2211#if defined(HAVE_LARGEFILE_SUPPORT)
2212 cookie.start_pos = PyLong_AsLongLong(posobj);
2213#else
2214 cookie.start_pos = PyLong_AsLong(posobj);
2215#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002216 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002217 if (PyErr_Occurred())
2218 goto fail;
2219
2220 /* Skip backward to the snapshot point (see _read_chunk). */
2221 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2222 goto fail;
2223
2224 assert (PyBytes_Check(next_input));
2225
2226 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2227
2228 /* How many decoded characters have been used up since the snapshot? */
2229 if (self->decoded_chars_used == 0) {
2230 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002231 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 }
2233
2234 chars_to_skip = self->decoded_chars_used;
2235
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002236 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2238 _PyIO_str_getstate, NULL);
2239 if (saved_state == NULL)
2240 goto fail;
2241
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002242#define DECODER_GETSTATE() do { \
2243 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2244 _PyIO_str_getstate, NULL); \
2245 if (_state == NULL) \
2246 goto fail; \
2247 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2248 Py_DECREF(_state); \
2249 goto fail; \
2250 } \
2251 Py_DECREF(_state); \
2252 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002254 /* TODO: replace assert with exception */
2255#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002256 PyObject *_decoded = _PyObject_CallMethodId( \
2257 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002258 if (_decoded == NULL) \
2259 goto fail; \
2260 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002261 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002262 Py_DECREF(_decoded); \
2263 } while (0)
2264
2265 /* Fast search for an acceptable start point, close to our
2266 current pos */
2267 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2268 skip_back = 1;
2269 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2270 input = PyBytes_AS_STRING(next_input);
2271 while (skip_bytes > 0) {
2272 /* Decode up to temptative start point */
2273 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2274 goto fail;
2275 DECODER_DECODE(input, skip_bytes, chars_decoded);
2276 if (chars_decoded <= chars_to_skip) {
2277 DECODER_GETSTATE();
2278 if (dec_buffer_len == 0) {
2279 /* Before pos and no bytes buffered in decoder => OK */
2280 cookie.dec_flags = dec_flags;
2281 chars_to_skip -= chars_decoded;
2282 break;
2283 }
2284 /* Skip back by buffered amount and reset heuristic */
2285 skip_bytes -= dec_buffer_len;
2286 skip_back = 1;
2287 }
2288 else {
2289 /* We're too far ahead, skip back a bit */
2290 skip_bytes -= skip_back;
2291 skip_back *= 2;
2292 }
2293 }
2294 if (skip_bytes <= 0) {
2295 skip_bytes = 0;
2296 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2297 goto fail;
2298 }
2299
2300 /* Note our initial start point. */
2301 cookie.start_pos += skip_bytes;
2302 cookie.chars_to_skip = chars_to_skip;
2303 if (chars_to_skip == 0)
2304 goto finally;
2305
2306 /* We should be close to the desired position. Now feed the decoder one
2307 * byte at a time until we reach the `chars_to_skip` target.
2308 * As we go, note the nearest "safe start point" before the current
2309 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002310 * can safely start from there and advance to this location).
2311 */
2312 chars_decoded = 0;
2313 input = PyBytes_AS_STRING(next_input);
2314 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002315 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002317 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002318
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002319 DECODER_DECODE(input, 1, n);
2320 /* We got n chars for 1 byte */
2321 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002323 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002324
2325 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2326 /* Decoder buffer is empty, so this is a safe start point. */
2327 cookie.start_pos += cookie.bytes_to_feed;
2328 chars_to_skip -= chars_decoded;
2329 cookie.dec_flags = dec_flags;
2330 cookie.bytes_to_feed = 0;
2331 chars_decoded = 0;
2332 }
2333 if (chars_decoded >= chars_to_skip)
2334 break;
2335 input++;
2336 }
2337 if (input == input_end) {
2338 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002339 PyObject *decoded = _PyObject_CallMethodId(
2340 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341 if (decoded == NULL)
2342 goto fail;
2343 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002344 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345 Py_DECREF(decoded);
2346 cookie.need_eof = 1;
2347
2348 if (chars_decoded < chars_to_skip) {
2349 PyErr_SetString(PyExc_IOError,
2350 "can't reconstruct logical file position");
2351 goto fail;
2352 }
2353 }
2354
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002355finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002356 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357 Py_DECREF(saved_state);
2358 if (res == NULL)
2359 return NULL;
2360 Py_DECREF(res);
2361
2362 /* The returned cookie corresponds to the last safe start point. */
2363 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002364 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002366fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002367 if (saved_state) {
2368 PyObject *type, *value, *traceback;
2369 PyErr_Fetch(&type, &value, &traceback);
2370
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002371 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372 Py_DECREF(saved_state);
2373 if (res == NULL)
2374 return NULL;
2375 Py_DECREF(res);
2376
2377 PyErr_Restore(type, value, traceback);
2378 }
2379 return NULL;
2380}
2381
2382static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002383textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384{
2385 PyObject *pos = Py_None;
2386 PyObject *res;
2387
2388 CHECK_INITIALIZED(self)
2389 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2390 return NULL;
2391 }
2392
2393 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2394 if (res == NULL)
2395 return NULL;
2396 Py_DECREF(res);
2397
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002398 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399}
2400
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002401static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002402textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002403{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002404 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002405
2406 CHECK_INITIALIZED(self);
2407
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002408 res = PyUnicode_FromString("<_io.TextIOWrapper");
2409 if (res == NULL)
2410 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002411 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002412 if (nameobj == NULL) {
2413 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2414 PyErr_Clear();
2415 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002416 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002417 }
2418 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002419 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002420 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002421 if (s == NULL)
2422 goto error;
2423 PyUnicode_AppendAndDel(&res, s);
2424 if (res == NULL)
2425 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002426 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002427 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002428 if (modeobj == NULL) {
2429 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2430 PyErr_Clear();
2431 else
2432 goto error;
2433 }
2434 else {
2435 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2436 Py_DECREF(modeobj);
2437 if (s == NULL)
2438 goto error;
2439 PyUnicode_AppendAndDel(&res, s);
2440 if (res == NULL)
2441 return NULL;
2442 }
2443 s = PyUnicode_FromFormat("%U encoding=%R>",
2444 res, self->encoding);
2445 Py_DECREF(res);
2446 return s;
2447error:
2448 Py_XDECREF(res);
2449 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002450}
2451
2452
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002453/* Inquiries */
2454
2455static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002456textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457{
2458 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002459 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460}
2461
2462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002463textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464{
2465 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002466 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467}
2468
2469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471{
2472 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002473 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474}
2475
2476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002477textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478{
2479 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002480 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481}
2482
2483static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002484textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485{
2486 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002487 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002488}
2489
2490static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002491textiowrapper_getstate(textio *self, PyObject *args)
2492{
2493 PyErr_Format(PyExc_TypeError,
2494 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2495 return NULL;
2496}
2497
2498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002499textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500{
2501 CHECK_INITIALIZED(self);
2502 CHECK_CLOSED(self);
2503 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002504 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002506 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507}
2508
2509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002510textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511{
2512 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002513 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515
Antoine Pitrou6be88762010-05-03 16:48:20 +00002516 res = textiowrapper_closed_get(self, NULL);
2517 if (res == NULL)
2518 return NULL;
2519 r = PyObject_IsTrue(res);
2520 Py_DECREF(res);
2521 if (r < 0)
2522 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002523
Antoine Pitrou6be88762010-05-03 16:48:20 +00002524 if (r > 0) {
2525 Py_RETURN_NONE; /* stream already closed */
2526 }
2527 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002528 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002529 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002530 if (res)
2531 Py_DECREF(res);
2532 else
2533 PyErr_Clear();
2534 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002535 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002536 if (res == NULL) {
2537 return NULL;
2538 }
2539 else
2540 Py_DECREF(res);
2541
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002542 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002543 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544}
2545
2546static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002547textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548{
2549 PyObject *line;
2550
2551 CHECK_INITIALIZED(self);
2552
2553 self->telling = 0;
2554 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2555 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002556 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 }
2558 else {
2559 line = PyObject_CallMethodObjArgs((PyObject *)self,
2560 _PyIO_str_readline, NULL);
2561 if (line && !PyUnicode_Check(line)) {
2562 PyErr_Format(PyExc_IOError,
2563 "readline() should have returned an str object, "
2564 "not '%.200s'", Py_TYPE(line)->tp_name);
2565 Py_DECREF(line);
2566 return NULL;
2567 }
2568 }
2569
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002570 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002571 return NULL;
2572
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002573 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574 /* Reached EOF or would have blocked */
2575 Py_DECREF(line);
2576 Py_CLEAR(self->snapshot);
2577 self->telling = self->seekable;
2578 return NULL;
2579 }
2580
2581 return line;
2582}
2583
2584static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586{
2587 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002588 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002589}
2590
2591static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593{
2594 CHECK_INITIALIZED(self);
2595 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2596}
2597
2598static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600{
2601 PyObject *res;
2602 CHECK_INITIALIZED(self);
2603 if (self->decoder == NULL)
2604 Py_RETURN_NONE;
2605 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2606 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002607 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2608 PyErr_Clear();
2609 Py_RETURN_NONE;
2610 }
2611 else {
2612 return NULL;
2613 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002614 }
2615 return res;
2616}
2617
2618static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002620{
2621 CHECK_INITIALIZED(self);
2622 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2623}
2624
2625static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002626textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002627{
2628 CHECK_INITIALIZED(self);
2629 return PyLong_FromSsize_t(self->chunk_size);
2630}
2631
2632static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002633textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634{
2635 Py_ssize_t n;
2636 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002637 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002638 if (n == -1 && PyErr_Occurred())
2639 return -1;
2640 if (n <= 0) {
2641 PyErr_SetString(PyExc_ValueError,
2642 "a strictly positive integer is required");
2643 return -1;
2644 }
2645 self->chunk_size = n;
2646 return 0;
2647}
2648
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002649static PyMethodDef textiowrapper_methods[] = {
2650 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2651 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2652 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2653 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2654 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2655 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002657 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2658 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2659 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2660 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2661 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002662 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002664 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2665 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2666 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667 {NULL, NULL}
2668};
2669
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002670static PyMemberDef textiowrapper_members[] = {
2671 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2672 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2673 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 {NULL}
2675};
2676
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002677static PyGetSetDef textiowrapper_getset[] = {
2678 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2679 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002680/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2681*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002682 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2683 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2684 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2685 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002686 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687};
2688
2689PyTypeObject PyTextIOWrapper_Type = {
2690 PyVarObject_HEAD_INIT(NULL, 0)
2691 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002692 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002694 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002695 0, /*tp_print*/
2696 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002697 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002698 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002699 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 0, /*tp_as_number*/
2701 0, /*tp_as_sequence*/
2702 0, /*tp_as_mapping*/
2703 0, /*tp_hash */
2704 0, /*tp_call*/
2705 0, /*tp_str*/
2706 0, /*tp_getattro*/
2707 0, /*tp_setattro*/
2708 0, /*tp_as_buffer*/
2709 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2710 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002711 textiowrapper_doc, /* tp_doc */
2712 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2713 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002715 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002716 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002717 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2718 textiowrapper_methods, /* tp_methods */
2719 textiowrapper_members, /* tp_members */
2720 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002721 0, /* tp_base */
2722 0, /* tp_dict */
2723 0, /* tp_descr_get */
2724 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002725 offsetof(textio, dict), /*tp_dictoffset*/
2726 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002727 0, /* tp_alloc */
2728 PyType_GenericNew, /* tp_new */
2729};