blob: 79c64baeefffa77395b82473941f3c9e648f482a [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
17_Py_IDENTIFIER(device_encoding);
18_Py_IDENTIFIER(fileno);
19_Py_IDENTIFIER(flush);
20_Py_IDENTIFIER(getpreferredencoding);
21_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020022_Py_IDENTIFIER(mode);
23_Py_IDENTIFIER(name);
24_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020026_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(readable);
28_Py_IDENTIFIER(replace);
29_Py_IDENTIFIER(reset);
30_Py_IDENTIFIER(seek);
31_Py_IDENTIFIER(seekable);
32_Py_IDENTIFIER(setstate);
33_Py_IDENTIFIER(tell);
34_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036/* TextIOBase */
37
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000038PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable. There is no public constructor.\n"
44 );
45
46static PyObject *
47_unsupported(const char *message)
48{
49 PyErr_SetString(IO_STATE->unsupported_operation, message);
50 return NULL;
51}
52
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000053PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000054 "Separate the underlying buffer from the TextIOBase and return it.\n"
55 "\n"
56 "After the underlying buffer has been detached, the TextIO is in an\n"
57 "unusable state.\n"
58 );
59
60static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062{
63 return _unsupported("detach");
64}
65
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000066PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 "Read at most n characters from stream.\n"
68 "\n"
69 "Read from underlying buffer until we have n characters or we hit EOF.\n"
70 "If n is negative or omitted, read until EOF.\n"
71 );
72
73static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075{
76 return _unsupported("read");
77}
78
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000079PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080 "Read until newline or EOF.\n"
81 "\n"
82 "Returns an empty string if EOF is hit immediately.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("readline");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Write string to stream.\n"
93 "Returns the number of characters written (which is always equal to\n"
94 "the length of the string).\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("write");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Encoding of the text stream.\n"
105 "\n"
106 "Subclasses should override.\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 Py_RETURN_NONE;
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Line endings translated so far.\n"
117 "\n"
118 "Only line endings translated during reading are considered.\n"
119 "\n"
120 "Subclasses should override.\n"
121 );
122
123static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125{
126 Py_RETURN_NONE;
127}
128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000129PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000130 "The error setting of the decoder or encoder.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000142static PyMethodDef textiobase_methods[] = {
143 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
144 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
145 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
146 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 {NULL, NULL}
148};
149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyGetSetDef textiobase_getset[] = {
151 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
152 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
153 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000154 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155};
156
157PyTypeObject PyTextIOBase_Type = {
158 PyVarObject_HEAD_INIT(NULL, 0)
159 "_io._TextIOBase", /*tp_name*/
160 0, /*tp_basicsize*/
161 0, /*tp_itemsize*/
162 0, /*tp_dealloc*/
163 0, /*tp_print*/
164 0, /*tp_getattr*/
165 0, /*tp_setattr*/
166 0, /*tp_compare */
167 0, /*tp_repr*/
168 0, /*tp_as_number*/
169 0, /*tp_as_sequence*/
170 0, /*tp_as_mapping*/
171 0, /*tp_hash */
172 0, /*tp_call*/
173 0, /*tp_str*/
174 0, /*tp_getattro*/
175 0, /*tp_setattro*/
176 0, /*tp_as_buffer*/
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
196};
197
198
199/* IncrementalNewlineDecoder */
200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 "Codec used when reading a file in universal newlines mode. It wraps\n"
203 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
204 "records the types of newlines encountered. When used with\n"
205 "translate=False, it ensures that the newline sequence is returned in\n"
206 "one piece. When used with decoder=None, it expects unicode strings as\n"
207 "decode input and translates newlines without first invoking an external\n"
208 "decoder.\n"
209 );
210
211typedef struct {
212 PyObject_HEAD
213 PyObject *decoder;
214 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000215 signed int pendingcr: 1;
216 signed int translate: 1;
217 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000218} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219
220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000221incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 PyObject *args, PyObject *kwds)
223{
224 PyObject *decoder;
225 int translate;
226 PyObject *errors = NULL;
227 char *kwlist[] = {"decoder", "translate", "errors", NULL};
228
229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
230 kwlist, &decoder, &translate, &errors))
231 return -1;
232
233 self->decoder = decoder;
234 Py_INCREF(decoder);
235
236 if (errors == NULL) {
237 self->errors = PyUnicode_FromString("strict");
238 if (self->errors == NULL)
239 return -1;
240 }
241 else {
242 Py_INCREF(errors);
243 self->errors = errors;
244 }
245
246 self->translate = translate;
247 self->seennl = 0;
248 self->pendingcr = 0;
249
250 return 0;
251}
252
253static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000254incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255{
256 Py_CLEAR(self->decoder);
257 Py_CLEAR(self->errors);
258 Py_TYPE(self)->tp_free((PyObject *)self);
259}
260
261#define SEEN_CR 1
262#define SEEN_LF 2
263#define SEEN_CRLF 4
264#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
265
266PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000267_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 PyObject *input, int final)
269{
270 PyObject *output;
271 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273
274 if (self->decoder == NULL) {
275 PyErr_SetString(PyExc_ValueError,
276 "IncrementalNewlineDecoder.__init__ not called");
277 return NULL;
278 }
279
280 /* decode input (with the eventual \r from a previous pass) */
281 if (self->decoder != Py_None) {
282 output = PyObject_CallMethodObjArgs(self->decoder,
283 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
284 }
285 else {
286 output = input;
287 Py_INCREF(output);
288 }
289
290 if (output == NULL)
291 return NULL;
292
293 if (!PyUnicode_Check(output)) {
294 PyErr_SetString(PyExc_TypeError,
295 "decoder should return a string result");
296 goto error;
297 }
298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200299 if (PyUnicode_READY(output) == -1)
300 goto error;
301
302 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200304 /* Prefix output with CR */
305 int kind;
306 PyObject *modified;
307 char *out;
308
309 modified = PyUnicode_New(output_len + 1,
310 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 if (modified == NULL)
312 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 kind = PyUnicode_KIND(modified);
314 out = PyUnicode_DATA(modified);
315 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200316 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 self->pendingcr = 0;
320 output_len++;
321 }
322
323 /* retain last \r even when not translating data:
324 * then readline() is sure to get \r\n in one pass
325 */
326 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000327 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
329 {
330 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
331 if (modified == NULL)
332 goto error;
333 Py_DECREF(output);
334 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 self->pendingcr = 1;
336 }
337 }
338
339 /* Record which newlines are read and do newline translation if desired,
340 all in one pass. */
341 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 Py_ssize_t len;
344 int seennl = self->seennl;
345 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 in_str = PyUnicode_DATA(output);
349 len = PyUnicode_GET_LENGTH(output);
350 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 if (len == 0)
353 return output;
354
355 /* If, up to now, newlines are consistently \n, do a quick check
356 for the \r *byte* with the libc's optimized memchr.
357 */
358 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200359 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 }
361
Antoine Pitrou66913e22009-03-06 23:40:56 +0000362 if (only_lf) {
363 /* If not already seen, quick scan for a possible "\n" character.
364 (there's nothing else to be done, even when in translation mode)
365 */
366 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100368 if (kind == PyUnicode_1BYTE_KIND)
369 seennl |= SEEN_LF;
370 else {
371 Py_ssize_t i = 0;
372 for (;;) {
373 Py_UCS4 c;
374 /* Fast loop for non-control characters */
375 while (PyUnicode_READ(kind, in_str, i) > '\n')
376 i++;
377 c = PyUnicode_READ(kind, in_str, i++);
378 if (c == '\n') {
379 seennl |= SEEN_LF;
380 break;
381 }
382 if (i >= len)
383 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000385 }
386 }
387 /* Finished: we have scanned for newlines, and none of them
388 need translating */
389 }
390 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200391 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000392 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 if (seennl == SEEN_ALL)
394 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000395 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200396 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200398 while (PyUnicode_READ(kind, in_str, i) > '\r')
399 i++;
400 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 if (c == '\n')
402 seennl |= SEEN_LF;
403 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200404 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000405 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200406 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000407 }
408 else
409 seennl |= SEEN_CR;
410 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000412 break;
413 if (seennl == SEEN_ALL)
414 break;
415 }
416 endscan:
417 ;
418 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000419 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 void *translated;
421 int kind = PyUnicode_KIND(output);
422 void *in_str = PyUnicode_DATA(output);
423 Py_ssize_t in, out;
424 /* XXX: Previous in-place translation here is disabled as
425 resizing is not possible anymore */
426 /* We could try to optimize this so that we only do a copy
427 when there is something to translate. On the other hand,
428 we already know there is a \r byte, so chances are high
429 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200430 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 if (translated == NULL) {
432 PyErr_NoMemory();
433 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000436 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200437 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000438 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200439 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
440 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000441 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200442 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 seennl |= SEEN_LF;
444 continue;
445 }
446 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200447 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000448 in++;
449 seennl |= SEEN_CRLF;
450 }
451 else
452 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200453 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 continue;
455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200458 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000459 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200460 Py_DECREF(output);
461 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100462 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200463 if (!output)
464 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000465 }
466 self->seennl |= seennl;
467 }
468
469 return output;
470
471 error:
472 Py_DECREF(output);
473 return NULL;
474}
475
476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000477incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000478 PyObject *args, PyObject *kwds)
479{
480 char *kwlist[] = {"input", "final", NULL};
481 PyObject *input;
482 int final = 0;
483
484 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
485 kwlist, &input, &final))
486 return NULL;
487 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
488}
489
490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000491incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000492{
493 PyObject *buffer;
494 unsigned PY_LONG_LONG flag;
495
496 if (self->decoder != Py_None) {
497 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
498 _PyIO_str_getstate, NULL);
499 if (state == NULL)
500 return NULL;
501 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
502 Py_DECREF(state);
503 return NULL;
504 }
505 Py_INCREF(buffer);
506 Py_DECREF(state);
507 }
508 else {
509 buffer = PyBytes_FromString("");
510 flag = 0;
511 }
512 flag <<= 1;
513 if (self->pendingcr)
514 flag |= 1;
515 return Py_BuildValue("NK", buffer, flag);
516}
517
518static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000519incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000520{
521 PyObject *buffer;
522 unsigned PY_LONG_LONG flag;
523
524 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
525 return NULL;
526
527 self->pendingcr = (int) flag & 1;
528 flag >>= 1;
529
530 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200531 return _PyObject_CallMethodId(self->decoder,
532 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000533 else
534 Py_RETURN_NONE;
535}
536
537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000538incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000539{
540 self->seennl = 0;
541 self->pendingcr = 0;
542 if (self->decoder != Py_None)
543 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
544 else
545 Py_RETURN_NONE;
546}
547
548static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000549incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000550{
551 switch (self->seennl) {
552 case SEEN_CR:
553 return PyUnicode_FromString("\r");
554 case SEEN_LF:
555 return PyUnicode_FromString("\n");
556 case SEEN_CRLF:
557 return PyUnicode_FromString("\r\n");
558 case SEEN_CR | SEEN_LF:
559 return Py_BuildValue("ss", "\r", "\n");
560 case SEEN_CR | SEEN_CRLF:
561 return Py_BuildValue("ss", "\r", "\r\n");
562 case SEEN_LF | SEEN_CRLF:
563 return Py_BuildValue("ss", "\n", "\r\n");
564 case SEEN_CR | SEEN_LF | SEEN_CRLF:
565 return Py_BuildValue("sss", "\r", "\n", "\r\n");
566 default:
567 Py_RETURN_NONE;
568 }
569
570}
571
572
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000573static PyMethodDef incrementalnewlinedecoder_methods[] = {
574 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
575 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
576 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
577 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000578 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000579};
580
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000581static PyGetSetDef incrementalnewlinedecoder_getset[] = {
582 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000583 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000584};
585
586PyTypeObject PyIncrementalNewlineDecoder_Type = {
587 PyVarObject_HEAD_INIT(NULL, 0)
588 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000589 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000590 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000591 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000592 0, /*tp_print*/
593 0, /*tp_getattr*/
594 0, /*tp_setattr*/
595 0, /*tp_compare */
596 0, /*tp_repr*/
597 0, /*tp_as_number*/
598 0, /*tp_as_sequence*/
599 0, /*tp_as_mapping*/
600 0, /*tp_hash */
601 0, /*tp_call*/
602 0, /*tp_str*/
603 0, /*tp_getattro*/
604 0, /*tp_setattro*/
605 0, /*tp_as_buffer*/
606 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000607 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000608 0, /* tp_traverse */
609 0, /* tp_clear */
610 0, /* tp_richcompare */
611 0, /*tp_weaklistoffset*/
612 0, /* tp_iter */
613 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000614 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000615 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_base */
618 0, /* tp_dict */
619 0, /* tp_descr_get */
620 0, /* tp_descr_set */
621 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000622 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000623 0, /* tp_alloc */
624 PyType_GenericNew, /* tp_new */
625};
626
627
628/* TextIOWrapper */
629
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000630PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000631 "Character and line based layer over a BufferedIOBase object, buffer.\n"
632 "\n"
633 "encoding gives the name of the encoding that the stream will be\n"
634 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
635 "\n"
636 "errors determines the strictness of encoding and decoding (see the\n"
637 "codecs.register) and defaults to \"strict\".\n"
638 "\n"
639 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
640 "handling of line endings. If it is None, universal newlines is\n"
641 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
642 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
643 "caller. Conversely, on output, '\\n' is translated to the system\n"
Éric Araujofab97662012-02-26 02:14:08 +0100644 "default line separator, os.linesep. If newline is any other of its\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000645 "legal values, that newline becomes the newline when the file is read\n"
646 "and it is returned untranslated. On output, '\\n' is converted to the\n"
647 "newline.\n"
648 "\n"
649 "If line_buffering is True, a call to flush is implied when a call to\n"
650 "write contains a newline character."
651 );
652
653typedef PyObject *
654 (*encodefunc_t)(PyObject *, PyObject *);
655
656typedef struct
657{
658 PyObject_HEAD
659 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000660 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000661 Py_ssize_t chunk_size;
662 PyObject *buffer;
663 PyObject *encoding;
664 PyObject *encoder;
665 PyObject *decoder;
666 PyObject *readnl;
667 PyObject *errors;
668 const char *writenl; /* utf-8 encoded, NULL stands for \n */
669 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200670 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000671 char readuniversal;
672 char readtranslate;
673 char writetranslate;
674 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200675 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000677 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 /* Specialized encoding func (see below) */
679 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000680 /* Whether or not it's the start of the stream */
681 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682
683 /* Reads and writes are internally buffered in order to speed things up.
684 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000685
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000686 Please also note that text to be written is first encoded before being
687 buffered. This is necessary so that encoding errors are immediately
688 reported to the caller, but it unfortunately means that the
689 IncrementalEncoder (whose encode() method is always written in Python)
690 becomes a bottleneck for small writes.
691 */
692 PyObject *decoded_chars; /* buffer for text returned from decoder */
693 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
694 PyObject *pending_bytes; /* list of bytes objects waiting to be
695 written, or NULL */
696 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000697
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698 /* snapshot is either None, or a tuple (dec_flags, next_input) where
699 * dec_flags is the second (integer) item of the decoder state and
700 * next_input is the chunk of input bytes that comes next after the
701 * snapshot point. We use this to reconstruct decoder states in tell().
702 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703 PyObject *snapshot;
704 /* Bytes-to-characters ratio for the current chunk. Serves as input for
705 the heuristic in tell(). */
706 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000707
708 /* Cache raw object if it's a FileIO object */
709 PyObject *raw;
710
711 PyObject *weakreflist;
712 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000713} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714
715
716/* A couple of specialized cases in order to bypass the slow incremental
717 encoding methods for the most popular encodings. */
718
719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000720ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000721{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200722 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723}
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100728 return _PyUnicode_EncodeUTF16(text,
729 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730}
731
732static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000733utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000734{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100735 return _PyUnicode_EncodeUTF16(text,
736 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737}
738
739static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000740utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000741{
Antoine Pitroue4501852009-05-14 18:55:55 +0000742 if (!self->encoding_start_of_stream) {
743 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000745 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000748#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000749 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100750 return _PyUnicode_EncodeUTF16(text,
751 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752}
753
Antoine Pitroue4501852009-05-14 18:55:55 +0000754static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000755utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000756{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100757 return _PyUnicode_EncodeUTF32(text,
758 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000759}
760
761static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000762utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000763{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100764 return _PyUnicode_EncodeUTF32(text,
765 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000766}
767
768static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000769utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000770{
771 if (!self->encoding_start_of_stream) {
772 /* Skip the BOM and use native byte ordering */
773#if defined(WORDS_BIGENDIAN)
774 return utf32be_encode(self, text);
775#else
776 return utf32le_encode(self, text);
777#endif
778 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100779 return _PyUnicode_EncodeUTF32(text,
780 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000781}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000782
783static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000784utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000785{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200786 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787}
788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795/* Map normalized encoding names onto the specialized encoding funcs */
796
797typedef struct {
798 const char *name;
799 encodefunc_t encodefunc;
800} encodefuncentry;
801
Antoine Pitrou24f36292009-03-28 22:16:42 +0000802static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803 {"ascii", (encodefunc_t) ascii_encode},
804 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000805 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000806 {"utf-16-be", (encodefunc_t) utf16be_encode},
807 {"utf-16-le", (encodefunc_t) utf16le_encode},
808 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000809 {"utf-32-be", (encodefunc_t) utf32be_encode},
810 {"utf-32-le", (encodefunc_t) utf32le_encode},
811 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {NULL, NULL}
813};
814
815
816static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000817textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818{
819 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200820 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821 NULL};
822 PyObject *buffer, *raw;
823 char *encoding = NULL;
824 char *errors = NULL;
825 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 _PyIO_State *state = IO_STATE;
828
829 PyObject *res;
830 int r;
831
832 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000833 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200834 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 return -1;
838
839 if (newline && newline[0] != '\0'
840 && !(newline[0] == '\n' && newline[1] == '\0')
841 && !(newline[0] == '\r' && newline[1] == '\0')
842 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
843 PyErr_Format(PyExc_ValueError,
844 "illegal newline value: %s", newline);
845 return -1;
846 }
847
848 Py_CLEAR(self->buffer);
849 Py_CLEAR(self->encoding);
850 Py_CLEAR(self->encoder);
851 Py_CLEAR(self->decoder);
852 Py_CLEAR(self->readnl);
853 Py_CLEAR(self->decoded_chars);
854 Py_CLEAR(self->pending_bytes);
855 Py_CLEAR(self->snapshot);
856 Py_CLEAR(self->errors);
857 Py_CLEAR(self->raw);
858 self->decoded_chars_used = 0;
859 self->pending_bytes_count = 0;
860 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000861 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000862
863 if (encoding == NULL) {
864 /* Try os.device_encoding(fileno) */
865 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200866 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000867 /* Ignore only AttributeError and UnsupportedOperation */
868 if (fileno == NULL) {
869 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
870 PyErr_ExceptionMatches(state->unsupported_operation)) {
871 PyErr_Clear();
872 }
873 else {
874 goto error;
875 }
876 }
877 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200878 self->encoding = _PyObject_CallMethodId(state->os_module,
879 &PyId_device_encoding,
880 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000881 if (self->encoding == NULL)
882 goto error;
883 else if (!PyUnicode_Check(self->encoding))
884 Py_CLEAR(self->encoding);
885 }
886 }
887 if (encoding == NULL && self->encoding == NULL) {
888 if (state->locale_module == NULL) {
889 state->locale_module = PyImport_ImportModule("locale");
890 if (state->locale_module == NULL)
891 goto catch_ImportError;
892 else
893 goto use_locale;
894 }
895 else {
896 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200897 self->encoding = _PyObject_CallMethodId(
898 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000899 if (self->encoding == NULL) {
900 catch_ImportError:
901 /*
902 Importing locale can raise a ImportError because of
903 _functools, and locale.getpreferredencoding can raise a
904 ImportError if _locale is not available. These will happen
905 during module building.
906 */
907 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
908 PyErr_Clear();
909 self->encoding = PyUnicode_FromString("ascii");
910 }
911 else
912 goto error;
913 }
914 else if (!PyUnicode_Check(self->encoding))
915 Py_CLEAR(self->encoding);
916 }
917 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000918 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000920 if (encoding == NULL)
921 goto error;
922 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000923 else if (encoding != NULL) {
924 self->encoding = PyUnicode_FromString(encoding);
925 if (self->encoding == NULL)
926 goto error;
927 }
928 else {
929 PyErr_SetString(PyExc_IOError,
930 "could not determine default encoding");
931 }
932
933 if (errors == NULL)
934 errors = "strict";
935 self->errors = PyBytes_FromString(errors);
936 if (self->errors == NULL)
937 goto error;
938
939 self->chunk_size = 8192;
940 self->readuniversal = (newline == NULL || newline[0] == '\0');
941 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200942 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 self->readtranslate = (newline == NULL);
944 if (newline) {
945 self->readnl = PyUnicode_FromString(newline);
946 if (self->readnl == NULL)
947 return -1;
948 }
949 self->writetranslate = (newline == NULL || newline[0] != '\0');
950 if (!self->readuniversal && self->readnl) {
951 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000952 if (self->writenl == NULL)
953 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000954 if (!strcmp(self->writenl, "\n"))
955 self->writenl = NULL;
956 }
957#ifdef MS_WINDOWS
958 else
959 self->writenl = "\r\n";
960#endif
961
962 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200963 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (res == NULL)
965 goto error;
966 r = PyObject_IsTrue(res);
967 Py_DECREF(res);
968 if (r == -1)
969 goto error;
970 if (r == 1) {
971 self->decoder = PyCodec_IncrementalDecoder(
972 encoding, errors);
973 if (self->decoder == NULL)
974 goto error;
975
976 if (self->readuniversal) {
977 PyObject *incrementalDecoder = PyObject_CallFunction(
978 (PyObject *)&PyIncrementalNewlineDecoder_Type,
979 "Oi", self->decoder, (int)self->readtranslate);
980 if (incrementalDecoder == NULL)
981 goto error;
982 Py_CLEAR(self->decoder);
983 self->decoder = incrementalDecoder;
984 }
985 }
986
987 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200988 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000989 if (res == NULL)
990 goto error;
991 r = PyObject_IsTrue(res);
992 Py_DECREF(res);
993 if (r == -1)
994 goto error;
995 if (r == 1) {
996 PyObject *ci;
997 self->encoder = PyCodec_IncrementalEncoder(
998 encoding, errors);
999 if (self->encoder == NULL)
1000 goto error;
1001 /* Get the normalized named of the codec */
1002 ci = _PyCodec_Lookup(encoding);
1003 if (ci == NULL)
1004 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001005 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001006 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001007 if (res == NULL) {
1008 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1009 PyErr_Clear();
1010 else
1011 goto error;
1012 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001013 else if (PyUnicode_Check(res)) {
1014 encodefuncentry *e = encodefuncs;
1015 while (e->name != NULL) {
1016 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1017 self->encodefunc = e->encodefunc;
1018 break;
1019 }
1020 e++;
1021 }
1022 }
1023 Py_XDECREF(res);
1024 }
1025
1026 self->buffer = buffer;
1027 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001028
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001029 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1030 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1031 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001032 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001034 if (raw == NULL) {
1035 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1036 PyErr_Clear();
1037 else
1038 goto error;
1039 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001040 else if (Py_TYPE(raw) == &PyFileIO_Type)
1041 self->raw = raw;
1042 else
1043 Py_DECREF(raw);
1044 }
1045
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001046 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001047 if (res == NULL)
1048 goto error;
1049 self->seekable = self->telling = PyObject_IsTrue(res);
1050 Py_DECREF(res);
1051
Martin v. Löwis767046a2011-10-14 15:35:36 +02001052 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001053
Antoine Pitroue4501852009-05-14 18:55:55 +00001054 self->encoding_start_of_stream = 0;
1055 if (self->seekable && self->encoder) {
1056 PyObject *cookieObj;
1057 int cmp;
1058
1059 self->encoding_start_of_stream = 1;
1060
1061 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1062 if (cookieObj == NULL)
1063 goto error;
1064
1065 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1066 Py_DECREF(cookieObj);
1067 if (cmp < 0) {
1068 goto error;
1069 }
1070
1071 if (cmp == 0) {
1072 self->encoding_start_of_stream = 0;
1073 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1074 _PyIO_zero, NULL);
1075 if (res == NULL)
1076 goto error;
1077 Py_DECREF(res);
1078 }
1079 }
1080
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001081 self->ok = 1;
1082 return 0;
1083
1084 error:
1085 return -1;
1086}
1087
1088static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001089_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001090{
1091 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1092 return -1;
1093 self->ok = 0;
1094 Py_CLEAR(self->buffer);
1095 Py_CLEAR(self->encoding);
1096 Py_CLEAR(self->encoder);
1097 Py_CLEAR(self->decoder);
1098 Py_CLEAR(self->readnl);
1099 Py_CLEAR(self->decoded_chars);
1100 Py_CLEAR(self->pending_bytes);
1101 Py_CLEAR(self->snapshot);
1102 Py_CLEAR(self->errors);
1103 Py_CLEAR(self->raw);
1104 return 0;
1105}
1106
1107static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001108textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001109{
Antoine Pitroue033e062010-10-29 10:38:18 +00001110 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001111 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112 return;
1113 _PyObject_GC_UNTRACK(self);
1114 if (self->weakreflist != NULL)
1115 PyObject_ClearWeakRefs((PyObject *)self);
1116 Py_CLEAR(self->dict);
1117 Py_TYPE(self)->tp_free((PyObject *)self);
1118}
1119
1120static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
1123 Py_VISIT(self->buffer);
1124 Py_VISIT(self->encoding);
1125 Py_VISIT(self->encoder);
1126 Py_VISIT(self->decoder);
1127 Py_VISIT(self->readnl);
1128 Py_VISIT(self->decoded_chars);
1129 Py_VISIT(self->pending_bytes);
1130 Py_VISIT(self->snapshot);
1131 Py_VISIT(self->errors);
1132 Py_VISIT(self->raw);
1133
1134 Py_VISIT(self->dict);
1135 return 0;
1136}
1137
1138static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001139textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001140{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001141 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142 return -1;
1143 Py_CLEAR(self->dict);
1144 return 0;
1145}
1146
1147static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001148textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001149
1150/* This macro takes some shortcuts to make the common case faster. */
1151#define CHECK_CLOSED(self) \
1152 do { \
1153 int r; \
1154 PyObject *_res; \
1155 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1156 if (self->raw != NULL) \
1157 r = _PyFileIO_closed(self->raw); \
1158 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001159 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001160 if (_res == NULL) \
1161 return NULL; \
1162 r = PyObject_IsTrue(_res); \
1163 Py_DECREF(_res); \
1164 if (r < 0) \
1165 return NULL; \
1166 } \
1167 if (r > 0) { \
1168 PyErr_SetString(PyExc_ValueError, \
1169 "I/O operation on closed file."); \
1170 return NULL; \
1171 } \
1172 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001173 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001174 return NULL; \
1175 } while (0)
1176
1177#define CHECK_INITIALIZED(self) \
1178 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001179 if (self->detached) { \
1180 PyErr_SetString(PyExc_ValueError, \
1181 "underlying buffer has been detached"); \
1182 } else { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "I/O operation on uninitialized object"); \
1185 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001186 return NULL; \
1187 }
1188
1189#define CHECK_INITIALIZED_INT(self) \
1190 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001191 if (self->detached) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "underlying buffer has been detached"); \
1194 } else { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "I/O operation on uninitialized object"); \
1197 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return -1; \
1199 }
1200
1201
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001203textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204{
1205 PyObject *buffer, *res;
1206 CHECK_INITIALIZED(self);
1207 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1208 if (res == NULL)
1209 return NULL;
1210 Py_DECREF(res);
1211 buffer = self->buffer;
1212 self->buffer = NULL;
1213 self->detached = 1;
1214 self->ok = 0;
1215 return buffer;
1216}
1217
Antoine Pitrou24f36292009-03-28 22:16:42 +00001218/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001219 underlying buffered object, though. */
1220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001221_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001223 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224
1225 if (self->pending_bytes == NULL)
1226 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001227
1228 pending = self->pending_bytes;
1229 Py_INCREF(pending);
1230 self->pending_bytes_count = 0;
1231 Py_CLEAR(self->pending_bytes);
1232
1233 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1234 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235 if (b == NULL)
1236 return -1;
1237 ret = PyObject_CallMethodObjArgs(self->buffer,
1238 _PyIO_str_write, b, NULL);
1239 Py_DECREF(b);
1240 if (ret == NULL)
1241 return -1;
1242 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 return 0;
1244}
1245
1246static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001247textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248{
1249 PyObject *ret;
1250 PyObject *text; /* owned reference */
1251 PyObject *b;
1252 Py_ssize_t textlen;
1253 int haslf = 0;
1254 int needflush = 0;
1255
1256 CHECK_INITIALIZED(self);
1257
1258 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1259 return NULL;
1260 }
1261
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001262 if (PyUnicode_READY(text) == -1)
1263 return NULL;
1264
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 CHECK_CLOSED(self);
1266
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001267 if (self->encoder == NULL)
1268 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001269
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 Py_INCREF(text);
1271
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273
1274 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276 haslf = 1;
1277
1278 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001279 PyObject *newtext = _PyObject_CallMethodId(
1280 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281 Py_DECREF(text);
1282 if (newtext == NULL)
1283 return NULL;
1284 text = newtext;
1285 }
1286
Antoine Pitroue96ec682011-07-23 21:46:35 +02001287 if (self->write_through)
1288 needflush = 1;
1289 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001290 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 needflush = 1;
1293
1294 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001295 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001297 self->encoding_start_of_stream = 0;
1298 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299 else
1300 b = PyObject_CallMethodObjArgs(self->encoder,
1301 _PyIO_str_encode, text, NULL);
1302 Py_DECREF(text);
1303 if (b == NULL)
1304 return NULL;
1305
1306 if (self->pending_bytes == NULL) {
1307 self->pending_bytes = PyList_New(0);
1308 if (self->pending_bytes == NULL) {
1309 Py_DECREF(b);
1310 return NULL;
1311 }
1312 self->pending_bytes_count = 0;
1313 }
1314 if (PyList_Append(self->pending_bytes, b) < 0) {
1315 Py_DECREF(b);
1316 return NULL;
1317 }
1318 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1319 Py_DECREF(b);
1320 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001321 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 return NULL;
1323 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001324
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 if (needflush) {
1326 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1327 if (ret == NULL)
1328 return NULL;
1329 Py_DECREF(ret);
1330 }
1331
1332 Py_CLEAR(self->snapshot);
1333
1334 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001335 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001336 if (ret == NULL)
1337 return NULL;
1338 Py_DECREF(ret);
1339 }
1340
1341 return PyLong_FromSsize_t(textlen);
1342}
1343
1344/* Steal a reference to chars and store it in the decoded_char buffer;
1345 */
1346static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348{
1349 Py_CLEAR(self->decoded_chars);
1350 self->decoded_chars = chars;
1351 self->decoded_chars_used = 0;
1352}
1353
1354static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001355textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356{
1357 PyObject *chars;
1358 Py_ssize_t avail;
1359
1360 if (self->decoded_chars == NULL)
1361 return PyUnicode_FromStringAndSize(NULL, 0);
1362
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001363 /* decoded_chars is guaranteed to be "ready". */
1364 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001365 - self->decoded_chars_used);
1366
1367 assert(avail >= 0);
1368
1369 if (n < 0 || n > avail)
1370 n = avail;
1371
1372 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001373 chars = PyUnicode_Substring(self->decoded_chars,
1374 self->decoded_chars_used,
1375 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001376 if (chars == NULL)
1377 return NULL;
1378 }
1379 else {
1380 chars = self->decoded_chars;
1381 Py_INCREF(chars);
1382 }
1383
1384 self->decoded_chars_used += n;
1385 return chars;
1386}
1387
1388/* Read and decode the next chunk of data from the BufferedReader.
1389 */
1390static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001391textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392{
1393 PyObject *dec_buffer = NULL;
1394 PyObject *dec_flags = NULL;
1395 PyObject *input_chunk = NULL;
1396 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001397 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001398 int eof;
1399
1400 /* The return value is True unless EOF was reached. The decoded string is
1401 * placed in self._decoded_chars (replacing its previous value). The
1402 * entire input chunk is sent to the decoder, though some of it may remain
1403 * buffered in the decoder, yet to be converted.
1404 */
1405
1406 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001407 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408 return -1;
1409 }
1410
1411 if (self->telling) {
1412 /* To prepare for tell(), we need to snapshot a point in the file
1413 * where the decoder's input buffer is empty.
1414 */
1415
1416 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1417 _PyIO_str_getstate, NULL);
1418 if (state == NULL)
1419 return -1;
1420 /* Given this, we know there was a valid snapshot point
1421 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1422 */
1423 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1424 Py_DECREF(state);
1425 return -1;
1426 }
1427 Py_INCREF(dec_buffer);
1428 Py_INCREF(dec_flags);
1429 Py_DECREF(state);
1430 }
1431
1432 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001433 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001434 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001435 }
1436 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001437 if (chunk_size == NULL)
1438 goto fail;
1439 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001440 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1441 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001442 Py_DECREF(chunk_size);
1443 if (input_chunk == NULL)
1444 goto fail;
1445 assert(PyBytes_Check(input_chunk));
1446
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001447 nbytes = PyBytes_Size(input_chunk);
1448 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001449
1450 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1451 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1452 self->decoder, input_chunk, eof);
1453 }
1454 else {
1455 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1456 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1457 }
1458
1459 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1460 if (decoded_chars == NULL)
1461 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001462 if (PyUnicode_READY(decoded_chars) == -1)
1463 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001464 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001465 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001466 if (nchars > 0)
1467 self->b2cratio = (double) nbytes / nchars;
1468 else
1469 self->b2cratio = 0.0;
1470 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001471 eof = 0;
1472
1473 if (self->telling) {
1474 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1475 * next input to be decoded is dec_buffer + input_chunk.
1476 */
1477 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1478 if (next_input == NULL)
1479 goto fail;
1480 assert (PyBytes_Check(next_input));
1481 Py_DECREF(dec_buffer);
1482 Py_CLEAR(self->snapshot);
1483 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1484 }
1485 Py_DECREF(input_chunk);
1486
1487 return (eof == 0);
1488
1489 fail:
1490 Py_XDECREF(dec_buffer);
1491 Py_XDECREF(dec_flags);
1492 Py_XDECREF(input_chunk);
1493 return -1;
1494}
1495
1496static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001497textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001498{
1499 Py_ssize_t n = -1;
1500 PyObject *result = NULL, *chunks = NULL;
1501
1502 CHECK_INITIALIZED(self);
1503
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001504 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 return NULL;
1506
1507 CHECK_CLOSED(self);
1508
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001509 if (self->decoder == NULL)
1510 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001511
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001512 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001513 return NULL;
1514
1515 if (n < 0) {
1516 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001517 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 PyObject *decoded;
1519 if (bytes == NULL)
1520 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001521
1522 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1523 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1524 bytes, 1);
1525 else
1526 decoded = PyObject_CallMethodObjArgs(
1527 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001528 Py_DECREF(bytes);
1529 if (decoded == NULL)
1530 goto fail;
1531
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001532 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001533
1534 if (result == NULL) {
1535 Py_DECREF(decoded);
1536 return NULL;
1537 }
1538
1539 PyUnicode_AppendAndDel(&result, decoded);
1540 if (result == NULL)
1541 goto fail;
1542
1543 Py_CLEAR(self->snapshot);
1544 return result;
1545 }
1546 else {
1547 int res = 1;
1548 Py_ssize_t remaining = n;
1549
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001550 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001551 if (result == NULL)
1552 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001553 if (PyUnicode_READY(result) == -1)
1554 goto fail;
1555 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001556
1557 /* Keep reading chunks until we have n characters to return */
1558 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001559 res = textiowrapper_read_chunk(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001560 if (res < 0)
1561 goto fail;
1562 if (res == 0) /* EOF */
1563 break;
1564 if (chunks == NULL) {
1565 chunks = PyList_New(0);
1566 if (chunks == NULL)
1567 goto fail;
1568 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001569 if (PyUnicode_GET_LENGTH(result) > 0 &&
1570 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 goto fail;
1572 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001573 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 if (result == NULL)
1575 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001576 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 }
1578 if (chunks != NULL) {
1579 if (result != NULL && PyList_Append(chunks, result) < 0)
1580 goto fail;
1581 Py_CLEAR(result);
1582 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1583 if (result == NULL)
1584 goto fail;
1585 Py_CLEAR(chunks);
1586 }
1587 return result;
1588 }
1589 fail:
1590 Py_XDECREF(result);
1591 Py_XDECREF(chunks);
1592 return NULL;
1593}
1594
1595
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001596/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597 that is to the NUL character. Otherwise the function will produce
1598 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001599static char *
1600find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001601{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001602 if (kind == PyUnicode_1BYTE_KIND) {
1603 assert(ch < 256);
1604 return (char *) memchr((void *) s, (char) ch, end - s);
1605 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001607 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001608 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001609 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610 return s;
1611 if (s == end)
1612 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001613 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 }
1615}
1616
1617Py_ssize_t
1618_PyIO_find_line_ending(
1619 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001620 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001622 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623
1624 if (translated) {
1625 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001626 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001627 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001628 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 else {
1630 *consumed = len;
1631 return -1;
1632 }
1633 }
1634 else if (universal) {
1635 /* Universal newline search. Find any of \r, \r\n, \n
1636 * The decoder ensures that \r\n are not split in two pieces
1637 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001638 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001640 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001641 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001642 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001644 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 if (s >= end) {
1646 *consumed = len;
1647 return -1;
1648 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001649 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001650 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001651 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001652 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001654 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001655 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001657 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 }
1659 }
1660 }
1661 else {
1662 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001663 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1664 char *nl = PyUnicode_DATA(readnl);
1665 /* Assume that readnl is an ASCII character. */
1666 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001670 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 *consumed = len;
1672 return -1;
1673 }
1674 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001675 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001676 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001677 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 if (e < s)
1679 e = s;
1680 while (s < e) {
1681 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 if (pos == NULL || pos >= e)
1684 break;
1685 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 break;
1688 }
1689 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001690 return (pos - start)/kind + readnl_len;
1691 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001693 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (pos == NULL)
1695 *consumed = len;
1696 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001697 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 return -1;
1699 }
1700 }
1701}
1702
1703static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001704_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705{
1706 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1707 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1708 int res;
1709
1710 CHECK_CLOSED(self);
1711
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001712 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 return NULL;
1714
1715 chunked = 0;
1716
1717 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001719 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 Py_ssize_t consumed = 0;
1722
1723 /* First, get some data if necessary */
1724 res = 1;
1725 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001726 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001727 res = textiowrapper_read_chunk(self, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 if (res < 0)
1729 goto error;
1730 if (res == 0)
1731 break;
1732 }
1733 if (res == 0) {
1734 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001735 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 Py_CLEAR(self->snapshot);
1737 start = endpos = offset_to_buffer = 0;
1738 break;
1739 }
1740
1741 if (remaining == NULL) {
1742 line = self->decoded_chars;
1743 start = self->decoded_chars_used;
1744 offset_to_buffer = 0;
1745 Py_INCREF(line);
1746 }
1747 else {
1748 assert(self->decoded_chars_used == 0);
1749 line = PyUnicode_Concat(remaining, self->decoded_chars);
1750 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001751 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 Py_CLEAR(remaining);
1753 if (line == NULL)
1754 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001755 if (PyUnicode_READY(line) == -1)
1756 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001757 }
1758
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001759 ptr = PyUnicode_DATA(line);
1760 line_len = PyUnicode_GET_LENGTH(line);
1761 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762
1763 endpos = _PyIO_find_line_ending(
1764 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001765 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001766 ptr + kind * start,
1767 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001769 if (endpos >= 0) {
1770 endpos += start;
1771 if (limit >= 0 && (endpos - start) + chunked >= limit)
1772 endpos = start + limit - chunked;
1773 break;
1774 }
1775
1776 /* We can put aside up to `endpos` */
1777 endpos = consumed + start;
1778 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1779 /* Didn't find line ending, but reached length limit */
1780 endpos = start + limit - chunked;
1781 break;
1782 }
1783
1784 if (endpos > start) {
1785 /* No line ending seen yet - put aside current data */
1786 PyObject *s;
1787 if (chunks == NULL) {
1788 chunks = PyList_New(0);
1789 if (chunks == NULL)
1790 goto error;
1791 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001792 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001793 if (s == NULL)
1794 goto error;
1795 if (PyList_Append(chunks, s) < 0) {
1796 Py_DECREF(s);
1797 goto error;
1798 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001799 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 Py_DECREF(s);
1801 }
1802 /* There may be some remaining bytes we'll have to prepend to the
1803 next chunk of data */
1804 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001805 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 if (remaining == NULL)
1807 goto error;
1808 }
1809 Py_CLEAR(line);
1810 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001811 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001812 }
1813
1814 if (line != NULL) {
1815 /* Our line ends in the current buffer */
1816 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1818 PyObject *s = PyUnicode_Substring(line, start, endpos);
1819 Py_CLEAR(line);
1820 if (s == NULL)
1821 goto error;
1822 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001823 }
1824 }
1825 if (remaining != NULL) {
1826 if (chunks == NULL) {
1827 chunks = PyList_New(0);
1828 if (chunks == NULL)
1829 goto error;
1830 }
1831 if (PyList_Append(chunks, remaining) < 0)
1832 goto error;
1833 Py_CLEAR(remaining);
1834 }
1835 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001836 if (line != NULL) {
1837 if (PyList_Append(chunks, line) < 0)
1838 goto error;
1839 Py_DECREF(line);
1840 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1842 if (line == NULL)
1843 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001844 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 if (line == NULL) {
1847 Py_INCREF(_PyIO_empty_str);
1848 line = _PyIO_empty_str;
1849 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001850
1851 return line;
1852
1853 error:
1854 Py_XDECREF(chunks);
1855 Py_XDECREF(remaining);
1856 Py_XDECREF(line);
1857 return NULL;
1858}
1859
1860static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001861textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001862{
1863 Py_ssize_t limit = -1;
1864
1865 CHECK_INITIALIZED(self);
1866 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1867 return NULL;
1868 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001869 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870}
1871
1872/* Seek and Tell */
1873
1874typedef struct {
1875 Py_off_t start_pos;
1876 int dec_flags;
1877 int bytes_to_feed;
1878 int chars_to_skip;
1879 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001880} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001881
1882/*
1883 To speed up cookie packing/unpacking, we store the fields in a temporary
1884 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1885 The following macros define at which offsets in the intermediary byte
1886 string the various CookieStruct fields will be stored.
1887 */
1888
1889#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1890
1891#if defined(WORDS_BIGENDIAN)
1892
1893# define IS_LITTLE_ENDIAN 0
1894
1895/* We want the least significant byte of start_pos to also be the least
1896 significant byte of the cookie, which means that in big-endian mode we
1897 must copy the fields in reverse order. */
1898
1899# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1900# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1901# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1902# define OFF_CHARS_TO_SKIP (sizeof(char))
1903# define OFF_NEED_EOF 0
1904
1905#else
1906
1907# define IS_LITTLE_ENDIAN 1
1908
1909/* Little-endian mode: the least significant byte of start_pos will
1910 naturally end up the least significant byte of the cookie. */
1911
1912# define OFF_START_POS 0
1913# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1914# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1915# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1916# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1917
1918#endif
1919
1920static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001921textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001922{
1923 unsigned char buffer[COOKIE_BUF_LEN];
1924 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1925 if (cookieLong == NULL)
1926 return -1;
1927
1928 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1929 IS_LITTLE_ENDIAN, 0) < 0) {
1930 Py_DECREF(cookieLong);
1931 return -1;
1932 }
1933 Py_DECREF(cookieLong);
1934
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001935 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1936 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1937 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1938 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1939 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940
1941 return 0;
1942}
1943
1944static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001945textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001946{
1947 unsigned char buffer[COOKIE_BUF_LEN];
1948
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001949 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1950 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1951 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1952 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1953 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001954
1955 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1956}
1957#undef IS_LITTLE_ENDIAN
1958
1959static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001960_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961{
1962 PyObject *res;
1963 /* When seeking to the start of the stream, we call decoder.reset()
1964 rather than decoder.getstate().
1965 This is for a few decoders such as utf-16 for which the state value
1966 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1967 utf-16, that we are expecting a BOM).
1968 */
1969 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1970 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1971 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001972 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1973 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974 if (res == NULL)
1975 return -1;
1976 Py_DECREF(res);
1977 return 0;
1978}
1979
Antoine Pitroue4501852009-05-14 18:55:55 +00001980static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001981_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001982{
1983 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001984 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001985 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1986 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1987 self->encoding_start_of_stream = 1;
1988 }
1989 else {
1990 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1991 _PyIO_zero, NULL);
1992 self->encoding_start_of_stream = 0;
1993 }
1994 if (res == NULL)
1995 return -1;
1996 Py_DECREF(res);
1997 return 0;
1998}
1999
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002001textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002{
2003 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002004 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002006 PyObject *res;
2007 int cmp;
2008
2009 CHECK_INITIALIZED(self);
2010
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002011 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2012 return NULL;
2013 CHECK_CLOSED(self);
2014
2015 Py_INCREF(cookieObj);
2016
2017 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002018 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 goto fail;
2020 }
2021
2022 if (whence == 1) {
2023 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002024 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 if (cmp < 0)
2026 goto fail;
2027
2028 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002029 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 goto fail;
2031 }
2032
2033 /* Seeking to the current position should attempt to
2034 * sync the underlying buffer with the current position.
2035 */
2036 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002037 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 if (cookieObj == NULL)
2039 goto fail;
2040 }
2041 else if (whence == 2) {
2042 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002043 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (cmp < 0)
2045 goto fail;
2046
2047 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002048 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 goto fail;
2050 }
2051
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002052 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (res == NULL)
2054 goto fail;
2055 Py_DECREF(res);
2056
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002057 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002058 Py_CLEAR(self->snapshot);
2059 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002060 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061 if (res == NULL)
2062 goto fail;
2063 Py_DECREF(res);
2064 }
2065
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002066 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 Py_XDECREF(cookieObj);
2068 return res;
2069 }
2070 else if (whence != 0) {
2071 PyErr_Format(PyExc_ValueError,
2072 "invalid whence (%d, should be 0, 1 or 2)", whence);
2073 goto fail;
2074 }
2075
Antoine Pitroue4501852009-05-14 18:55:55 +00002076 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 if (cmp < 0)
2078 goto fail;
2079
2080 if (cmp == 1) {
2081 PyErr_Format(PyExc_ValueError,
2082 "negative seek position %R", cookieObj);
2083 goto fail;
2084 }
2085
2086 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2087 if (res == NULL)
2088 goto fail;
2089 Py_DECREF(res);
2090
2091 /* The strategy of seek() is to go back to the safe start point
2092 * and replay the effect of read(chars_to_skip) from there.
2093 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002094 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 goto fail;
2096
2097 /* Seek back to the safe start point. */
2098 posobj = PyLong_FromOff_t(cookie.start_pos);
2099 if (posobj == NULL)
2100 goto fail;
2101 res = PyObject_CallMethodObjArgs(self->buffer,
2102 _PyIO_str_seek, posobj, NULL);
2103 Py_DECREF(posobj);
2104 if (res == NULL)
2105 goto fail;
2106 Py_DECREF(res);
2107
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002108 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 Py_CLEAR(self->snapshot);
2110
2111 /* Restore the decoder to its state from the safe start point. */
2112 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002113 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 goto fail;
2115 }
2116
2117 if (cookie.chars_to_skip) {
2118 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002119 PyObject *input_chunk = _PyObject_CallMethodId(
2120 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002121 PyObject *decoded;
2122
2123 if (input_chunk == NULL)
2124 goto fail;
2125
2126 assert (PyBytes_Check(input_chunk));
2127
2128 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2129 if (self->snapshot == NULL) {
2130 Py_DECREF(input_chunk);
2131 goto fail;
2132 }
2133
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002134 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2135 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136
2137 if (decoded == NULL)
2138 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002139 if (PyUnicode_READY(decoded) == -1) {
2140 Py_DECREF(decoded);
2141 goto fail;
2142 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002144 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002145
2146 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002147 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2149 goto fail;
2150 }
2151 self->decoded_chars_used = cookie.chars_to_skip;
2152 }
2153 else {
2154 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2155 if (self->snapshot == NULL)
2156 goto fail;
2157 }
2158
Antoine Pitroue4501852009-05-14 18:55:55 +00002159 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2160 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002161 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002162 goto fail;
2163 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164 return cookieObj;
2165 fail:
2166 Py_XDECREF(cookieObj);
2167 return NULL;
2168
2169}
2170
2171static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002172textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173{
2174 PyObject *res;
2175 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002176 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002177 PyObject *next_input;
2178 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002179 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002180 PyObject *saved_state = NULL;
2181 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002182 char *dec_buffer;
2183 Py_ssize_t dec_buffer_len;
2184 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185
2186 CHECK_INITIALIZED(self);
2187 CHECK_CLOSED(self);
2188
2189 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002190 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191 goto fail;
2192 }
2193 if (!self->telling) {
2194 PyErr_SetString(PyExc_IOError,
2195 "telling position disabled by next() call");
2196 goto fail;
2197 }
2198
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002199 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002201 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 if (res == NULL)
2203 goto fail;
2204 Py_DECREF(res);
2205
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002206 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002207 if (posobj == NULL)
2208 goto fail;
2209
2210 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002211 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002212 return posobj;
2213 }
2214
2215#if defined(HAVE_LARGEFILE_SUPPORT)
2216 cookie.start_pos = PyLong_AsLongLong(posobj);
2217#else
2218 cookie.start_pos = PyLong_AsLong(posobj);
2219#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002220 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002221 if (PyErr_Occurred())
2222 goto fail;
2223
2224 /* Skip backward to the snapshot point (see _read_chunk). */
2225 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2226 goto fail;
2227
2228 assert (PyBytes_Check(next_input));
2229
2230 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2231
2232 /* How many decoded characters have been used up since the snapshot? */
2233 if (self->decoded_chars_used == 0) {
2234 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002235 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 }
2237
2238 chars_to_skip = self->decoded_chars_used;
2239
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002240 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002241 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2242 _PyIO_str_getstate, NULL);
2243 if (saved_state == NULL)
2244 goto fail;
2245
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002246#define DECODER_GETSTATE() do { \
2247 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2248 _PyIO_str_getstate, NULL); \
2249 if (_state == NULL) \
2250 goto fail; \
2251 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2252 Py_DECREF(_state); \
2253 goto fail; \
2254 } \
2255 Py_DECREF(_state); \
2256 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002258 /* TODO: replace assert with exception */
2259#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002260 PyObject *_decoded = _PyObject_CallMethodId( \
2261 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002262 if (_decoded == NULL) \
2263 goto fail; \
2264 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002265 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002266 Py_DECREF(_decoded); \
2267 } while (0)
2268
2269 /* Fast search for an acceptable start point, close to our
2270 current pos */
2271 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2272 skip_back = 1;
2273 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2274 input = PyBytes_AS_STRING(next_input);
2275 while (skip_bytes > 0) {
2276 /* Decode up to temptative start point */
2277 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2278 goto fail;
2279 DECODER_DECODE(input, skip_bytes, chars_decoded);
2280 if (chars_decoded <= chars_to_skip) {
2281 DECODER_GETSTATE();
2282 if (dec_buffer_len == 0) {
2283 /* Before pos and no bytes buffered in decoder => OK */
2284 cookie.dec_flags = dec_flags;
2285 chars_to_skip -= chars_decoded;
2286 break;
2287 }
2288 /* Skip back by buffered amount and reset heuristic */
2289 skip_bytes -= dec_buffer_len;
2290 skip_back = 1;
2291 }
2292 else {
2293 /* We're too far ahead, skip back a bit */
2294 skip_bytes -= skip_back;
2295 skip_back *= 2;
2296 }
2297 }
2298 if (skip_bytes <= 0) {
2299 skip_bytes = 0;
2300 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2301 goto fail;
2302 }
2303
2304 /* Note our initial start point. */
2305 cookie.start_pos += skip_bytes;
2306 cookie.chars_to_skip = chars_to_skip;
2307 if (chars_to_skip == 0)
2308 goto finally;
2309
2310 /* We should be close to the desired position. Now feed the decoder one
2311 * byte at a time until we reach the `chars_to_skip` target.
2312 * As we go, note the nearest "safe start point" before the current
2313 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 * can safely start from there and advance to this location).
2315 */
2316 chars_decoded = 0;
2317 input = PyBytes_AS_STRING(next_input);
2318 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002319 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002320 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002321 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002323 DECODER_DECODE(input, 1, n);
2324 /* We got n chars for 1 byte */
2325 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002326 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002327 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002328
2329 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2330 /* Decoder buffer is empty, so this is a safe start point. */
2331 cookie.start_pos += cookie.bytes_to_feed;
2332 chars_to_skip -= chars_decoded;
2333 cookie.dec_flags = dec_flags;
2334 cookie.bytes_to_feed = 0;
2335 chars_decoded = 0;
2336 }
2337 if (chars_decoded >= chars_to_skip)
2338 break;
2339 input++;
2340 }
2341 if (input == input_end) {
2342 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002343 PyObject *decoded = _PyObject_CallMethodId(
2344 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345 if (decoded == NULL)
2346 goto fail;
2347 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002348 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002349 Py_DECREF(decoded);
2350 cookie.need_eof = 1;
2351
2352 if (chars_decoded < chars_to_skip) {
2353 PyErr_SetString(PyExc_IOError,
2354 "can't reconstruct logical file position");
2355 goto fail;
2356 }
2357 }
2358
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002359finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002360 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002361 Py_DECREF(saved_state);
2362 if (res == NULL)
2363 return NULL;
2364 Py_DECREF(res);
2365
2366 /* The returned cookie corresponds to the last safe start point. */
2367 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002368 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002370fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 if (saved_state) {
2372 PyObject *type, *value, *traceback;
2373 PyErr_Fetch(&type, &value, &traceback);
2374
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002375 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 Py_DECREF(saved_state);
2377 if (res == NULL)
2378 return NULL;
2379 Py_DECREF(res);
2380
2381 PyErr_Restore(type, value, traceback);
2382 }
2383 return NULL;
2384}
2385
2386static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002387textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388{
2389 PyObject *pos = Py_None;
2390 PyObject *res;
2391
2392 CHECK_INITIALIZED(self)
2393 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2394 return NULL;
2395 }
2396
2397 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2398 if (res == NULL)
2399 return NULL;
2400 Py_DECREF(res);
2401
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002402 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403}
2404
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002405static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002406textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002407{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002408 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002409
2410 CHECK_INITIALIZED(self);
2411
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002412 res = PyUnicode_FromString("<_io.TextIOWrapper");
2413 if (res == NULL)
2414 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002415 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002416 if (nameobj == NULL) {
2417 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2418 PyErr_Clear();
2419 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002420 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002421 }
2422 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002423 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002424 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002425 if (s == NULL)
2426 goto error;
2427 PyUnicode_AppendAndDel(&res, s);
2428 if (res == NULL)
2429 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002430 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002431 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002432 if (modeobj == NULL) {
2433 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2434 PyErr_Clear();
2435 else
2436 goto error;
2437 }
2438 else {
2439 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2440 Py_DECREF(modeobj);
2441 if (s == NULL)
2442 goto error;
2443 PyUnicode_AppendAndDel(&res, s);
2444 if (res == NULL)
2445 return NULL;
2446 }
2447 s = PyUnicode_FromFormat("%U encoding=%R>",
2448 res, self->encoding);
2449 Py_DECREF(res);
2450 return s;
2451error:
2452 Py_XDECREF(res);
2453 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002454}
2455
2456
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002457/* Inquiries */
2458
2459static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002460textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002461{
2462 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002463 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464}
2465
2466static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002467textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002468{
2469 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002470 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471}
2472
2473static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002474textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002475{
2476 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002477 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478}
2479
2480static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002481textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482{
2483 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002484 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485}
2486
2487static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002488textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489{
2490 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002491 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492}
2493
2494static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002495textiowrapper_getstate(textio *self, PyObject *args)
2496{
2497 PyErr_Format(PyExc_TypeError,
2498 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2499 return NULL;
2500}
2501
2502static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 CHECK_INITIALIZED(self);
2506 CHECK_CLOSED(self);
2507 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002508 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002510 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511}
2512
2513static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002514textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515{
2516 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002517 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002519
Antoine Pitrou6be88762010-05-03 16:48:20 +00002520 res = textiowrapper_closed_get(self, NULL);
2521 if (res == NULL)
2522 return NULL;
2523 r = PyObject_IsTrue(res);
2524 Py_DECREF(res);
2525 if (r < 0)
2526 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002527
Antoine Pitrou6be88762010-05-03 16:48:20 +00002528 if (r > 0) {
2529 Py_RETURN_NONE; /* stream already closed */
2530 }
2531 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002532 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002533 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002534 if (res)
2535 Py_DECREF(res);
2536 else
2537 PyErr_Clear();
2538 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002539 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002540 if (res == NULL) {
2541 return NULL;
2542 }
2543 else
2544 Py_DECREF(res);
2545
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002546 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002547 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002548}
2549
2550static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552{
2553 PyObject *line;
2554
2555 CHECK_INITIALIZED(self);
2556
2557 self->telling = 0;
2558 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2559 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002560 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 }
2562 else {
2563 line = PyObject_CallMethodObjArgs((PyObject *)self,
2564 _PyIO_str_readline, NULL);
2565 if (line && !PyUnicode_Check(line)) {
2566 PyErr_Format(PyExc_IOError,
2567 "readline() should have returned an str object, "
2568 "not '%.200s'", Py_TYPE(line)->tp_name);
2569 Py_DECREF(line);
2570 return NULL;
2571 }
2572 }
2573
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002574 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002575 return NULL;
2576
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002577 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578 /* Reached EOF or would have blocked */
2579 Py_DECREF(line);
2580 Py_CLEAR(self->snapshot);
2581 self->telling = self->seekable;
2582 return NULL;
2583 }
2584
2585 return line;
2586}
2587
2588static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002589textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590{
2591 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002592 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593}
2594
2595static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002596textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597{
2598 CHECK_INITIALIZED(self);
2599 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2600}
2601
2602static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002603textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002604{
2605 PyObject *res;
2606 CHECK_INITIALIZED(self);
2607 if (self->decoder == NULL)
2608 Py_RETURN_NONE;
2609 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2610 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002611 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2612 PyErr_Clear();
2613 Py_RETURN_NONE;
2614 }
2615 else {
2616 return NULL;
2617 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618 }
2619 return res;
2620}
2621
2622static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002623textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002624{
2625 CHECK_INITIALIZED(self);
2626 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2627}
2628
2629static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002630textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002631{
2632 CHECK_INITIALIZED(self);
2633 return PyLong_FromSsize_t(self->chunk_size);
2634}
2635
2636static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002637textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002638{
2639 Py_ssize_t n;
2640 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002641 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002642 if (n == -1 && PyErr_Occurred())
2643 return -1;
2644 if (n <= 0) {
2645 PyErr_SetString(PyExc_ValueError,
2646 "a strictly positive integer is required");
2647 return -1;
2648 }
2649 self->chunk_size = n;
2650 return 0;
2651}
2652
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002653static PyMethodDef textiowrapper_methods[] = {
2654 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2655 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2656 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2657 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2658 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2659 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2662 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2663 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2664 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2665 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002666 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002668 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2669 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2670 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002671 {NULL, NULL}
2672};
2673
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674static PyMemberDef textiowrapper_members[] = {
2675 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2676 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2677 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002678 {NULL}
2679};
2680
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002681static PyGetSetDef textiowrapper_getset[] = {
2682 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2683 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002684/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2685*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002686 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2687 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2688 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2689 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002690 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002691};
2692
2693PyTypeObject PyTextIOWrapper_Type = {
2694 PyVarObject_HEAD_INIT(NULL, 0)
2695 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002698 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699 0, /*tp_print*/
2700 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002701 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002702 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002703 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002704 0, /*tp_as_number*/
2705 0, /*tp_as_sequence*/
2706 0, /*tp_as_mapping*/
2707 0, /*tp_hash */
2708 0, /*tp_call*/
2709 0, /*tp_str*/
2710 0, /*tp_getattro*/
2711 0, /*tp_setattro*/
2712 0, /*tp_as_buffer*/
2713 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2714 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002715 textiowrapper_doc, /* tp_doc */
2716 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2717 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002719 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002720 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2722 textiowrapper_methods, /* tp_methods */
2723 textiowrapper_members, /* tp_members */
2724 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725 0, /* tp_base */
2726 0, /* tp_dict */
2727 0, /* tp_descr_get */
2728 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002729 offsetof(textio, dict), /*tp_dictoffset*/
2730 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002731 0, /* tp_alloc */
2732 PyType_GenericNew, /* tp_new */
2733};