blob: eef99dabb77e4f114f4873d349b82d41471fe655 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
17_Py_IDENTIFIER(device_encoding);
18_Py_IDENTIFIER(fileno);
19_Py_IDENTIFIER(flush);
20_Py_IDENTIFIER(getpreferredencoding);
21_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020022_Py_IDENTIFIER(mode);
23_Py_IDENTIFIER(name);
24_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020025_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020026_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020027_Py_IDENTIFIER(readable);
28_Py_IDENTIFIER(replace);
29_Py_IDENTIFIER(reset);
30_Py_IDENTIFIER(seek);
31_Py_IDENTIFIER(seekable);
32_Py_IDENTIFIER(setstate);
33_Py_IDENTIFIER(tell);
34_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000036/* TextIOBase */
37
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000038PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000039 "Base class for text I/O.\n"
40 "\n"
41 "This class provides a character and line based interface to stream\n"
42 "I/O. There is no readinto method because Python's character strings\n"
43 "are immutable. There is no public constructor.\n"
44 );
45
46static PyObject *
47_unsupported(const char *message)
48{
49 PyErr_SetString(IO_STATE->unsupported_operation, message);
50 return NULL;
51}
52
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000053PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000054 "Separate the underlying buffer from the TextIOBase and return it.\n"
55 "\n"
56 "After the underlying buffer has been detached, the TextIO is in an\n"
57 "unusable state.\n"
58 );
59
60static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000061textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000062{
63 return _unsupported("detach");
64}
65
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000066PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000067 "Read at most n characters from stream.\n"
68 "\n"
69 "Read from underlying buffer until we have n characters or we hit EOF.\n"
70 "If n is negative or omitted, read until EOF.\n"
71 );
72
73static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000074textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000075{
76 return _unsupported("read");
77}
78
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000079PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000080 "Read until newline or EOF.\n"
81 "\n"
82 "Returns an empty string if EOF is hit immediately.\n"
83 );
84
85static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000086textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000087{
88 return _unsupported("readline");
89}
90
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000091PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000092 "Write string to stream.\n"
93 "Returns the number of characters written (which is always equal to\n"
94 "the length of the string).\n"
95 );
96
97static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000098textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000099{
100 return _unsupported("write");
101}
102
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000103PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000104 "Encoding of the text stream.\n"
105 "\n"
106 "Subclasses should override.\n"
107 );
108
109static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000110textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000111{
112 Py_RETURN_NONE;
113}
114
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000115PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000116 "Line endings translated so far.\n"
117 "\n"
118 "Only line endings translated during reading are considered.\n"
119 "\n"
120 "Subclasses should override.\n"
121 );
122
123static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000124textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000125{
126 Py_RETURN_NONE;
127}
128
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000129PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000130 "The error setting of the decoder or encoder.\n"
131 "\n"
132 "Subclasses should override.\n"
133 );
134
135static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000136textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000137{
138 Py_RETURN_NONE;
139}
140
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000141
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000142static PyMethodDef textiobase_methods[] = {
143 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
144 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
145 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
146 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000147 {NULL, NULL}
148};
149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000150static PyGetSetDef textiobase_getset[] = {
151 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
152 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
153 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000154 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000155};
156
157PyTypeObject PyTextIOBase_Type = {
158 PyVarObject_HEAD_INIT(NULL, 0)
159 "_io._TextIOBase", /*tp_name*/
160 0, /*tp_basicsize*/
161 0, /*tp_itemsize*/
162 0, /*tp_dealloc*/
163 0, /*tp_print*/
164 0, /*tp_getattr*/
165 0, /*tp_setattr*/
166 0, /*tp_compare */
167 0, /*tp_repr*/
168 0, /*tp_as_number*/
169 0, /*tp_as_sequence*/
170 0, /*tp_as_mapping*/
171 0, /*tp_hash */
172 0, /*tp_call*/
173 0, /*tp_str*/
174 0, /*tp_getattro*/
175 0, /*tp_setattro*/
176 0, /*tp_as_buffer*/
177 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
196};
197
198
199/* IncrementalNewlineDecoder */
200
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000201PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000202 "Codec used when reading a file in universal newlines mode. It wraps\n"
203 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
204 "records the types of newlines encountered. When used with\n"
205 "translate=False, it ensures that the newline sequence is returned in\n"
206 "one piece. When used with decoder=None, it expects unicode strings as\n"
207 "decode input and translates newlines without first invoking an external\n"
208 "decoder.\n"
209 );
210
211typedef struct {
212 PyObject_HEAD
213 PyObject *decoder;
214 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000215 signed int pendingcr: 1;
216 signed int translate: 1;
217 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000218} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000219
220static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000221incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000222 PyObject *args, PyObject *kwds)
223{
224 PyObject *decoder;
225 int translate;
226 PyObject *errors = NULL;
227 char *kwlist[] = {"decoder", "translate", "errors", NULL};
228
229 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
230 kwlist, &decoder, &translate, &errors))
231 return -1;
232
233 self->decoder = decoder;
234 Py_INCREF(decoder);
235
236 if (errors == NULL) {
237 self->errors = PyUnicode_FromString("strict");
238 if (self->errors == NULL)
239 return -1;
240 }
241 else {
242 Py_INCREF(errors);
243 self->errors = errors;
244 }
245
246 self->translate = translate;
247 self->seennl = 0;
248 self->pendingcr = 0;
249
250 return 0;
251}
252
253static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000254incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000255{
256 Py_CLEAR(self->decoder);
257 Py_CLEAR(self->errors);
258 Py_TYPE(self)->tp_free((PyObject *)self);
259}
260
261#define SEEN_CR 1
262#define SEEN_LF 2
263#define SEEN_CRLF 4
264#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
265
266PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000267_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000268 PyObject *input, int final)
269{
270 PyObject *output;
271 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000272 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000273
274 if (self->decoder == NULL) {
275 PyErr_SetString(PyExc_ValueError,
276 "IncrementalNewlineDecoder.__init__ not called");
277 return NULL;
278 }
279
280 /* decode input (with the eventual \r from a previous pass) */
281 if (self->decoder != Py_None) {
282 output = PyObject_CallMethodObjArgs(self->decoder,
283 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
284 }
285 else {
286 output = input;
287 Py_INCREF(output);
288 }
289
290 if (output == NULL)
291 return NULL;
292
293 if (!PyUnicode_Check(output)) {
294 PyErr_SetString(PyExc_TypeError,
295 "decoder should return a string result");
296 goto error;
297 }
298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200299 if (PyUnicode_READY(output) == -1)
300 goto error;
301
302 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000303 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200304 /* Prefix output with CR */
305 int kind;
306 PyObject *modified;
307 char *out;
308
309 modified = PyUnicode_New(output_len + 1,
310 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000311 if (modified == NULL)
312 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 kind = PyUnicode_KIND(modified);
314 out = PyUnicode_DATA(modified);
315 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200316 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000317 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200318 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000319 self->pendingcr = 0;
320 output_len++;
321 }
322
323 /* retain last \r even when not translating data:
324 * then readline() is sure to get \r\n in one pass
325 */
326 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000327 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200328 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
329 {
330 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
331 if (modified == NULL)
332 goto error;
333 Py_DECREF(output);
334 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000335 self->pendingcr = 1;
336 }
337 }
338
339 /* Record which newlines are read and do newline translation if desired,
340 all in one pass. */
341 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200342 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000343 Py_ssize_t len;
344 int seennl = self->seennl;
345 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200346 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000347
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 in_str = PyUnicode_DATA(output);
349 len = PyUnicode_GET_LENGTH(output);
350 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000351
352 if (len == 0)
353 return output;
354
355 /* If, up to now, newlines are consistently \n, do a quick check
356 for the \r *byte* with the libc's optimized memchr.
357 */
358 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200359 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360 }
361
Antoine Pitrou66913e22009-03-06 23:40:56 +0000362 if (only_lf) {
363 /* If not already seen, quick scan for a possible "\n" character.
364 (there's nothing else to be done, even when in translation mode)
365 */
366 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200367 memchr(in_str, '\n', kind * len) != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000369 for (;;) {
Victor Stinnerf7b8cb62011-09-29 03:28:17 +0200370 Py_UCS4 c;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200372 while (PyUnicode_READ(kind, in_str, i) > '\n')
373 i++;
374 c = PyUnicode_READ(kind, in_str, i++);
Antoine Pitrou66913e22009-03-06 23:40:56 +0000375 if (c == '\n') {
376 seennl |= SEEN_LF;
377 break;
378 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200379 if (i >= len)
Antoine Pitrou66913e22009-03-06 23:40:56 +0000380 break;
381 }
382 }
383 /* Finished: we have scanned for newlines, and none of them
384 need translating */
385 }
386 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200387 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000388 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000389 if (seennl == SEEN_ALL)
390 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000391 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200392 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000393 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200394 while (PyUnicode_READ(kind, in_str, i) > '\r')
395 i++;
396 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000397 if (c == '\n')
398 seennl |= SEEN_LF;
399 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000401 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200402 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000403 }
404 else
405 seennl |= SEEN_CR;
406 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000408 break;
409 if (seennl == SEEN_ALL)
410 break;
411 }
412 endscan:
413 ;
414 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000415 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200416 void *translated;
417 int kind = PyUnicode_KIND(output);
418 void *in_str = PyUnicode_DATA(output);
419 Py_ssize_t in, out;
420 /* XXX: Previous in-place translation here is disabled as
421 resizing is not possible anymore */
422 /* We could try to optimize this so that we only do a copy
423 when there is something to translate. On the other hand,
424 we already know there is a \r byte, so chances are high
425 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200426 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200427 if (translated == NULL) {
428 PyErr_NoMemory();
429 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000430 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200433 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000434 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200435 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
436 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000439 seennl |= SEEN_LF;
440 continue;
441 }
442 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200443 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000444 in++;
445 seennl |= SEEN_CRLF;
446 }
447 else
448 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200449 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 continue;
451 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200454 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000455 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 Py_DECREF(output);
457 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100458 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 if (!output)
460 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 }
462 self->seennl |= seennl;
463 }
464
465 return output;
466
467 error:
468 Py_DECREF(output);
469 return NULL;
470}
471
472static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000473incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 PyObject *args, PyObject *kwds)
475{
476 char *kwlist[] = {"input", "final", NULL};
477 PyObject *input;
478 int final = 0;
479
480 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
481 kwlist, &input, &final))
482 return NULL;
483 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
484}
485
486static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000487incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000488{
489 PyObject *buffer;
490 unsigned PY_LONG_LONG flag;
491
492 if (self->decoder != Py_None) {
493 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
494 _PyIO_str_getstate, NULL);
495 if (state == NULL)
496 return NULL;
497 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
498 Py_DECREF(state);
499 return NULL;
500 }
501 Py_INCREF(buffer);
502 Py_DECREF(state);
503 }
504 else {
505 buffer = PyBytes_FromString("");
506 flag = 0;
507 }
508 flag <<= 1;
509 if (self->pendingcr)
510 flag |= 1;
511 return Py_BuildValue("NK", buffer, flag);
512}
513
514static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000515incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000516{
517 PyObject *buffer;
518 unsigned PY_LONG_LONG flag;
519
520 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
521 return NULL;
522
523 self->pendingcr = (int) flag & 1;
524 flag >>= 1;
525
526 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200527 return _PyObject_CallMethodId(self->decoder,
528 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529 else
530 Py_RETURN_NONE;
531}
532
533static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000534incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000535{
536 self->seennl = 0;
537 self->pendingcr = 0;
538 if (self->decoder != Py_None)
539 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
540 else
541 Py_RETURN_NONE;
542}
543
544static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000545incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000546{
547 switch (self->seennl) {
548 case SEEN_CR:
549 return PyUnicode_FromString("\r");
550 case SEEN_LF:
551 return PyUnicode_FromString("\n");
552 case SEEN_CRLF:
553 return PyUnicode_FromString("\r\n");
554 case SEEN_CR | SEEN_LF:
555 return Py_BuildValue("ss", "\r", "\n");
556 case SEEN_CR | SEEN_CRLF:
557 return Py_BuildValue("ss", "\r", "\r\n");
558 case SEEN_LF | SEEN_CRLF:
559 return Py_BuildValue("ss", "\n", "\r\n");
560 case SEEN_CR | SEEN_LF | SEEN_CRLF:
561 return Py_BuildValue("sss", "\r", "\n", "\r\n");
562 default:
563 Py_RETURN_NONE;
564 }
565
566}
567
568
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000569static PyMethodDef incrementalnewlinedecoder_methods[] = {
570 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
571 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
572 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
573 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000574 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000575};
576
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000577static PyGetSetDef incrementalnewlinedecoder_getset[] = {
578 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000579 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000580};
581
582PyTypeObject PyIncrementalNewlineDecoder_Type = {
583 PyVarObject_HEAD_INIT(NULL, 0)
584 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000585 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000586 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000587 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588 0, /*tp_print*/
589 0, /*tp_getattr*/
590 0, /*tp_setattr*/
591 0, /*tp_compare */
592 0, /*tp_repr*/
593 0, /*tp_as_number*/
594 0, /*tp_as_sequence*/
595 0, /*tp_as_mapping*/
596 0, /*tp_hash */
597 0, /*tp_call*/
598 0, /*tp_str*/
599 0, /*tp_getattro*/
600 0, /*tp_setattro*/
601 0, /*tp_as_buffer*/
602 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000603 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604 0, /* tp_traverse */
605 0, /* tp_clear */
606 0, /* tp_richcompare */
607 0, /*tp_weaklistoffset*/
608 0, /* tp_iter */
609 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000610 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000611 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000612 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000613 0, /* tp_base */
614 0, /* tp_dict */
615 0, /* tp_descr_get */
616 0, /* tp_descr_set */
617 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000618 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000619 0, /* tp_alloc */
620 PyType_GenericNew, /* tp_new */
621};
622
623
624/* TextIOWrapper */
625
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000626PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000627 "Character and line based layer over a BufferedIOBase object, buffer.\n"
628 "\n"
629 "encoding gives the name of the encoding that the stream will be\n"
630 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
631 "\n"
632 "errors determines the strictness of encoding and decoding (see the\n"
633 "codecs.register) and defaults to \"strict\".\n"
634 "\n"
635 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
636 "handling of line endings. If it is None, universal newlines is\n"
637 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
638 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
639 "caller. Conversely, on output, '\\n' is translated to the system\n"
640 "default line seperator, os.linesep. If newline is any other of its\n"
641 "legal values, that newline becomes the newline when the file is read\n"
642 "and it is returned untranslated. On output, '\\n' is converted to the\n"
643 "newline.\n"
644 "\n"
645 "If line_buffering is True, a call to flush is implied when a call to\n"
646 "write contains a newline character."
647 );
648
649typedef PyObject *
650 (*encodefunc_t)(PyObject *, PyObject *);
651
652typedef struct
653{
654 PyObject_HEAD
655 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000656 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000657 Py_ssize_t chunk_size;
658 PyObject *buffer;
659 PyObject *encoding;
660 PyObject *encoder;
661 PyObject *decoder;
662 PyObject *readnl;
663 PyObject *errors;
664 const char *writenl; /* utf-8 encoded, NULL stands for \n */
665 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200666 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 char readuniversal;
668 char readtranslate;
669 char writetranslate;
670 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200671 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000672 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000673 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000674 /* Specialized encoding func (see below) */
675 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000676 /* Whether or not it's the start of the stream */
677 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678
679 /* Reads and writes are internally buffered in order to speed things up.
680 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000681
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 Please also note that text to be written is first encoded before being
683 buffered. This is necessary so that encoding errors are immediately
684 reported to the caller, but it unfortunately means that the
685 IncrementalEncoder (whose encode() method is always written in Python)
686 becomes a bottleneck for small writes.
687 */
688 PyObject *decoded_chars; /* buffer for text returned from decoder */
689 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
690 PyObject *pending_bytes; /* list of bytes objects waiting to be
691 written, or NULL */
692 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000693
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694 /* snapshot is either None, or a tuple (dec_flags, next_input) where
695 * dec_flags is the second (integer) item of the decoder state and
696 * next_input is the chunk of input bytes that comes next after the
697 * snapshot point. We use this to reconstruct decoder states in tell().
698 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000699 PyObject *snapshot;
700 /* Bytes-to-characters ratio for the current chunk. Serves as input for
701 the heuristic in tell(). */
702 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703
704 /* Cache raw object if it's a FileIO object */
705 PyObject *raw;
706
707 PyObject *weakreflist;
708 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000709} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710
711
712/* A couple of specialized cases in order to bypass the slow incremental
713 encoding methods for the most popular encodings. */
714
715static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000716ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000717{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200718 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000719}
720
721static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000722utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723{
724 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
725 PyUnicode_GET_SIZE(text),
726 PyBytes_AS_STRING(self->errors), 1);
727}
728
729static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000730utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731{
732 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
733 PyUnicode_GET_SIZE(text),
734 PyBytes_AS_STRING(self->errors), -1);
735}
736
737static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000738utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739{
Antoine Pitroue4501852009-05-14 18:55:55 +0000740 if (!self->encoding_start_of_stream) {
741 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000743 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000745 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000747 }
748 return PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(text),
749 PyUnicode_GET_SIZE(text),
750 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751}
752
Antoine Pitroue4501852009-05-14 18:55:55 +0000753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
756 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
757 PyUnicode_GET_SIZE(text),
758 PyBytes_AS_STRING(self->errors), 1);
759}
760
761static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000762utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000763{
764 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
765 PyUnicode_GET_SIZE(text),
766 PyBytes_AS_STRING(self->errors), -1);
767}
768
769static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000770utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000771{
772 if (!self->encoding_start_of_stream) {
773 /* Skip the BOM and use native byte ordering */
774#if defined(WORDS_BIGENDIAN)
775 return utf32be_encode(self, text);
776#else
777 return utf32le_encode(self, text);
778#endif
779 }
780 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(text),
781 PyUnicode_GET_SIZE(text),
782 PyBytes_AS_STRING(self->errors), 0);
783}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784
785static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000786utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000787{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200788 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000789}
790
791static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000792latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200794 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000795}
796
797/* Map normalized encoding names onto the specialized encoding funcs */
798
799typedef struct {
800 const char *name;
801 encodefunc_t encodefunc;
802} encodefuncentry;
803
Antoine Pitrou24f36292009-03-28 22:16:42 +0000804static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 {"ascii", (encodefunc_t) ascii_encode},
806 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000807 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808 {"utf-16-be", (encodefunc_t) utf16be_encode},
809 {"utf-16-le", (encodefunc_t) utf16le_encode},
810 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-32-be", (encodefunc_t) utf32be_encode},
812 {"utf-32-le", (encodefunc_t) utf32le_encode},
813 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000814 {NULL, NULL}
815};
816
817
818static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000819textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820{
821 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200822 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 NULL};
824 PyObject *buffer, *raw;
825 char *encoding = NULL;
826 char *errors = NULL;
827 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200828 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 _PyIO_State *state = IO_STATE;
830
831 PyObject *res;
832 int r;
833
834 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000835 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200838 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000839 return -1;
840
841 if (newline && newline[0] != '\0'
842 && !(newline[0] == '\n' && newline[1] == '\0')
843 && !(newline[0] == '\r' && newline[1] == '\0')
844 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
845 PyErr_Format(PyExc_ValueError,
846 "illegal newline value: %s", newline);
847 return -1;
848 }
849
850 Py_CLEAR(self->buffer);
851 Py_CLEAR(self->encoding);
852 Py_CLEAR(self->encoder);
853 Py_CLEAR(self->decoder);
854 Py_CLEAR(self->readnl);
855 Py_CLEAR(self->decoded_chars);
856 Py_CLEAR(self->pending_bytes);
857 Py_CLEAR(self->snapshot);
858 Py_CLEAR(self->errors);
859 Py_CLEAR(self->raw);
860 self->decoded_chars_used = 0;
861 self->pending_bytes_count = 0;
862 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000863 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000864
865 if (encoding == NULL) {
866 /* Try os.device_encoding(fileno) */
867 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200868 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000869 /* Ignore only AttributeError and UnsupportedOperation */
870 if (fileno == NULL) {
871 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
872 PyErr_ExceptionMatches(state->unsupported_operation)) {
873 PyErr_Clear();
874 }
875 else {
876 goto error;
877 }
878 }
879 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200880 self->encoding = _PyObject_CallMethodId(state->os_module,
881 &PyId_device_encoding,
882 "N", fileno);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883 if (self->encoding == NULL)
884 goto error;
885 else if (!PyUnicode_Check(self->encoding))
886 Py_CLEAR(self->encoding);
887 }
888 }
889 if (encoding == NULL && self->encoding == NULL) {
890 if (state->locale_module == NULL) {
891 state->locale_module = PyImport_ImportModule("locale");
892 if (state->locale_module == NULL)
893 goto catch_ImportError;
894 else
895 goto use_locale;
896 }
897 else {
898 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200899 self->encoding = _PyObject_CallMethodId(
900 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000901 if (self->encoding == NULL) {
902 catch_ImportError:
903 /*
904 Importing locale can raise a ImportError because of
905 _functools, and locale.getpreferredencoding can raise a
906 ImportError if _locale is not available. These will happen
907 during module building.
908 */
909 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
910 PyErr_Clear();
911 self->encoding = PyUnicode_FromString("ascii");
912 }
913 else
914 goto error;
915 }
916 else if (!PyUnicode_Check(self->encoding))
917 Py_CLEAR(self->encoding);
918 }
919 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000920 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000921 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000922 if (encoding == NULL)
923 goto error;
924 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000925 else if (encoding != NULL) {
926 self->encoding = PyUnicode_FromString(encoding);
927 if (self->encoding == NULL)
928 goto error;
929 }
930 else {
931 PyErr_SetString(PyExc_IOError,
932 "could not determine default encoding");
933 }
934
935 if (errors == NULL)
936 errors = "strict";
937 self->errors = PyBytes_FromString(errors);
938 if (self->errors == NULL)
939 goto error;
940
941 self->chunk_size = 8192;
942 self->readuniversal = (newline == NULL || newline[0] == '\0');
943 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200944 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 self->readtranslate = (newline == NULL);
946 if (newline) {
947 self->readnl = PyUnicode_FromString(newline);
948 if (self->readnl == NULL)
949 return -1;
950 }
951 self->writetranslate = (newline == NULL || newline[0] != '\0');
952 if (!self->readuniversal && self->readnl) {
953 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000954 if (self->writenl == NULL)
955 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000956 if (!strcmp(self->writenl, "\n"))
957 self->writenl = NULL;
958 }
959#ifdef MS_WINDOWS
960 else
961 self->writenl = "\r\n";
962#endif
963
964 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200965 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000966 if (res == NULL)
967 goto error;
968 r = PyObject_IsTrue(res);
969 Py_DECREF(res);
970 if (r == -1)
971 goto error;
972 if (r == 1) {
973 self->decoder = PyCodec_IncrementalDecoder(
974 encoding, errors);
975 if (self->decoder == NULL)
976 goto error;
977
978 if (self->readuniversal) {
979 PyObject *incrementalDecoder = PyObject_CallFunction(
980 (PyObject *)&PyIncrementalNewlineDecoder_Type,
981 "Oi", self->decoder, (int)self->readtranslate);
982 if (incrementalDecoder == NULL)
983 goto error;
984 Py_CLEAR(self->decoder);
985 self->decoder = incrementalDecoder;
986 }
987 }
988
989 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200990 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000991 if (res == NULL)
992 goto error;
993 r = PyObject_IsTrue(res);
994 Py_DECREF(res);
995 if (r == -1)
996 goto error;
997 if (r == 1) {
998 PyObject *ci;
999 self->encoder = PyCodec_IncrementalEncoder(
1000 encoding, errors);
1001 if (self->encoder == NULL)
1002 goto error;
1003 /* Get the normalized named of the codec */
1004 ci = _PyCodec_Lookup(encoding);
1005 if (ci == NULL)
1006 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001007 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001008 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001009 if (res == NULL) {
1010 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1011 PyErr_Clear();
1012 else
1013 goto error;
1014 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 else if (PyUnicode_Check(res)) {
1016 encodefuncentry *e = encodefuncs;
1017 while (e->name != NULL) {
1018 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1019 self->encodefunc = e->encodefunc;
1020 break;
1021 }
1022 e++;
1023 }
1024 }
1025 Py_XDECREF(res);
1026 }
1027
1028 self->buffer = buffer;
1029 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001030
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001031 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1032 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1033 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001034 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001035 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001036 if (raw == NULL) {
1037 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1038 PyErr_Clear();
1039 else
1040 goto error;
1041 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001042 else if (Py_TYPE(raw) == &PyFileIO_Type)
1043 self->raw = raw;
1044 else
1045 Py_DECREF(raw);
1046 }
1047
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001048 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 if (res == NULL)
1050 goto error;
1051 self->seekable = self->telling = PyObject_IsTrue(res);
1052 Py_DECREF(res);
1053
Martin v. Löwis767046a2011-10-14 15:35:36 +02001054 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001055
Antoine Pitroue4501852009-05-14 18:55:55 +00001056 self->encoding_start_of_stream = 0;
1057 if (self->seekable && self->encoder) {
1058 PyObject *cookieObj;
1059 int cmp;
1060
1061 self->encoding_start_of_stream = 1;
1062
1063 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1064 if (cookieObj == NULL)
1065 goto error;
1066
1067 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1068 Py_DECREF(cookieObj);
1069 if (cmp < 0) {
1070 goto error;
1071 }
1072
1073 if (cmp == 0) {
1074 self->encoding_start_of_stream = 0;
1075 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1076 _PyIO_zero, NULL);
1077 if (res == NULL)
1078 goto error;
1079 Py_DECREF(res);
1080 }
1081 }
1082
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001083 self->ok = 1;
1084 return 0;
1085
1086 error:
1087 return -1;
1088}
1089
1090static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001091_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001092{
1093 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1094 return -1;
1095 self->ok = 0;
1096 Py_CLEAR(self->buffer);
1097 Py_CLEAR(self->encoding);
1098 Py_CLEAR(self->encoder);
1099 Py_CLEAR(self->decoder);
1100 Py_CLEAR(self->readnl);
1101 Py_CLEAR(self->decoded_chars);
1102 Py_CLEAR(self->pending_bytes);
1103 Py_CLEAR(self->snapshot);
1104 Py_CLEAR(self->errors);
1105 Py_CLEAR(self->raw);
1106 return 0;
1107}
1108
1109static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001110textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001111{
Antoine Pitroue033e062010-10-29 10:38:18 +00001112 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001113 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001114 return;
1115 _PyObject_GC_UNTRACK(self);
1116 if (self->weakreflist != NULL)
1117 PyObject_ClearWeakRefs((PyObject *)self);
1118 Py_CLEAR(self->dict);
1119 Py_TYPE(self)->tp_free((PyObject *)self);
1120}
1121
1122static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001123textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001124{
1125 Py_VISIT(self->buffer);
1126 Py_VISIT(self->encoding);
1127 Py_VISIT(self->encoder);
1128 Py_VISIT(self->decoder);
1129 Py_VISIT(self->readnl);
1130 Py_VISIT(self->decoded_chars);
1131 Py_VISIT(self->pending_bytes);
1132 Py_VISIT(self->snapshot);
1133 Py_VISIT(self->errors);
1134 Py_VISIT(self->raw);
1135
1136 Py_VISIT(self->dict);
1137 return 0;
1138}
1139
1140static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001141textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001142{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001143 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001144 return -1;
1145 Py_CLEAR(self->dict);
1146 return 0;
1147}
1148
1149static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001150textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001151
1152/* This macro takes some shortcuts to make the common case faster. */
1153#define CHECK_CLOSED(self) \
1154 do { \
1155 int r; \
1156 PyObject *_res; \
1157 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1158 if (self->raw != NULL) \
1159 r = _PyFileIO_closed(self->raw); \
1160 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001161 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162 if (_res == NULL) \
1163 return NULL; \
1164 r = PyObject_IsTrue(_res); \
1165 Py_DECREF(_res); \
1166 if (r < 0) \
1167 return NULL; \
1168 } \
1169 if (r > 0) { \
1170 PyErr_SetString(PyExc_ValueError, \
1171 "I/O operation on closed file."); \
1172 return NULL; \
1173 } \
1174 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001175 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001176 return NULL; \
1177 } while (0)
1178
1179#define CHECK_INITIALIZED(self) \
1180 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001181 if (self->detached) { \
1182 PyErr_SetString(PyExc_ValueError, \
1183 "underlying buffer has been detached"); \
1184 } else { \
1185 PyErr_SetString(PyExc_ValueError, \
1186 "I/O operation on uninitialized object"); \
1187 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 return NULL; \
1189 }
1190
1191#define CHECK_INITIALIZED_INT(self) \
1192 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001193 if (self->detached) { \
1194 PyErr_SetString(PyExc_ValueError, \
1195 "underlying buffer has been detached"); \
1196 } else { \
1197 PyErr_SetString(PyExc_ValueError, \
1198 "I/O operation on uninitialized object"); \
1199 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001200 return -1; \
1201 }
1202
1203
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001205textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001206{
1207 PyObject *buffer, *res;
1208 CHECK_INITIALIZED(self);
1209 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1210 if (res == NULL)
1211 return NULL;
1212 Py_DECREF(res);
1213 buffer = self->buffer;
1214 self->buffer = NULL;
1215 self->detached = 1;
1216 self->ok = 0;
1217 return buffer;
1218}
1219
Antoine Pitrou24f36292009-03-28 22:16:42 +00001220/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001221 underlying buffered object, though. */
1222static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001223_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001224{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001225 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001226
1227 if (self->pending_bytes == NULL)
1228 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001229
1230 pending = self->pending_bytes;
1231 Py_INCREF(pending);
1232 self->pending_bytes_count = 0;
1233 Py_CLEAR(self->pending_bytes);
1234
1235 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1236 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237 if (b == NULL)
1238 return -1;
1239 ret = PyObject_CallMethodObjArgs(self->buffer,
1240 _PyIO_str_write, b, NULL);
1241 Py_DECREF(b);
1242 if (ret == NULL)
1243 return -1;
1244 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245 return 0;
1246}
1247
1248static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001249textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250{
1251 PyObject *ret;
1252 PyObject *text; /* owned reference */
1253 PyObject *b;
1254 Py_ssize_t textlen;
1255 int haslf = 0;
1256 int needflush = 0;
1257
1258 CHECK_INITIALIZED(self);
1259
1260 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1261 return NULL;
1262 }
1263
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001264 if (PyUnicode_READY(text) == -1)
1265 return NULL;
1266
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001267 CHECK_CLOSED(self);
1268
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001269 if (self->encoder == NULL)
1270 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001271
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001272 Py_INCREF(text);
1273
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001274 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275
1276 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001277 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 haslf = 1;
1279
1280 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001281 PyObject *newtext = _PyObject_CallMethodId(
1282 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 Py_DECREF(text);
1284 if (newtext == NULL)
1285 return NULL;
1286 text = newtext;
1287 }
1288
Antoine Pitroue96ec682011-07-23 21:46:35 +02001289 if (self->write_through)
1290 needflush = 1;
1291 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 needflush = 1;
1295
1296 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001297 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001298 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001299 self->encoding_start_of_stream = 0;
1300 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 else
1302 b = PyObject_CallMethodObjArgs(self->encoder,
1303 _PyIO_str_encode, text, NULL);
1304 Py_DECREF(text);
1305 if (b == NULL)
1306 return NULL;
1307
1308 if (self->pending_bytes == NULL) {
1309 self->pending_bytes = PyList_New(0);
1310 if (self->pending_bytes == NULL) {
1311 Py_DECREF(b);
1312 return NULL;
1313 }
1314 self->pending_bytes_count = 0;
1315 }
1316 if (PyList_Append(self->pending_bytes, b) < 0) {
1317 Py_DECREF(b);
1318 return NULL;
1319 }
1320 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1321 Py_DECREF(b);
1322 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001323 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001324 return NULL;
1325 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001326
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327 if (needflush) {
1328 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1329 if (ret == NULL)
1330 return NULL;
1331 Py_DECREF(ret);
1332 }
1333
1334 Py_CLEAR(self->snapshot);
1335
1336 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001337 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001338 if (ret == NULL)
1339 return NULL;
1340 Py_DECREF(ret);
1341 }
1342
1343 return PyLong_FromSsize_t(textlen);
1344}
1345
1346/* Steal a reference to chars and store it in the decoded_char buffer;
1347 */
1348static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001349textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001350{
1351 Py_CLEAR(self->decoded_chars);
1352 self->decoded_chars = chars;
1353 self->decoded_chars_used = 0;
1354}
1355
1356static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001357textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358{
1359 PyObject *chars;
1360 Py_ssize_t avail;
1361
1362 if (self->decoded_chars == NULL)
1363 return PyUnicode_FromStringAndSize(NULL, 0);
1364
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001365 /* decoded_chars is guaranteed to be "ready". */
1366 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367 - self->decoded_chars_used);
1368
1369 assert(avail >= 0);
1370
1371 if (n < 0 || n > avail)
1372 n = avail;
1373
1374 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001375 chars = PyUnicode_Substring(self->decoded_chars,
1376 self->decoded_chars_used,
1377 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 if (chars == NULL)
1379 return NULL;
1380 }
1381 else {
1382 chars = self->decoded_chars;
1383 Py_INCREF(chars);
1384 }
1385
1386 self->decoded_chars_used += n;
1387 return chars;
1388}
1389
1390/* Read and decode the next chunk of data from the BufferedReader.
1391 */
1392static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001393textiowrapper_read_chunk(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001394{
1395 PyObject *dec_buffer = NULL;
1396 PyObject *dec_flags = NULL;
1397 PyObject *input_chunk = NULL;
1398 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001399 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001400 int eof;
1401
1402 /* The return value is True unless EOF was reached. The decoded string is
1403 * placed in self._decoded_chars (replacing its previous value). The
1404 * entire input chunk is sent to the decoder, though some of it may remain
1405 * buffered in the decoder, yet to be converted.
1406 */
1407
1408 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001409 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001410 return -1;
1411 }
1412
1413 if (self->telling) {
1414 /* To prepare for tell(), we need to snapshot a point in the file
1415 * where the decoder's input buffer is empty.
1416 */
1417
1418 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1419 _PyIO_str_getstate, NULL);
1420 if (state == NULL)
1421 return -1;
1422 /* Given this, we know there was a valid snapshot point
1423 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1424 */
1425 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1426 Py_DECREF(state);
1427 return -1;
1428 }
1429 Py_INCREF(dec_buffer);
1430 Py_INCREF(dec_flags);
1431 Py_DECREF(state);
1432 }
1433
1434 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1435 chunk_size = PyLong_FromSsize_t(self->chunk_size);
1436 if (chunk_size == NULL)
1437 goto fail;
1438 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001439 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1440 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001441 Py_DECREF(chunk_size);
1442 if (input_chunk == NULL)
1443 goto fail;
1444 assert(PyBytes_Check(input_chunk));
1445
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001446 nbytes = PyBytes_Size(input_chunk);
1447 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001448
1449 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1450 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1451 self->decoder, input_chunk, eof);
1452 }
1453 else {
1454 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1455 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1456 }
1457
1458 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1459 if (decoded_chars == NULL)
1460 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001461 if (PyUnicode_READY(decoded_chars) == -1)
1462 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001463 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001464 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001465 if (nchars > 0)
1466 self->b2cratio = (double) nbytes / nchars;
1467 else
1468 self->b2cratio = 0.0;
1469 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001470 eof = 0;
1471
1472 if (self->telling) {
1473 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1474 * next input to be decoded is dec_buffer + input_chunk.
1475 */
1476 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1477 if (next_input == NULL)
1478 goto fail;
1479 assert (PyBytes_Check(next_input));
1480 Py_DECREF(dec_buffer);
1481 Py_CLEAR(self->snapshot);
1482 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1483 }
1484 Py_DECREF(input_chunk);
1485
1486 return (eof == 0);
1487
1488 fail:
1489 Py_XDECREF(dec_buffer);
1490 Py_XDECREF(dec_flags);
1491 Py_XDECREF(input_chunk);
1492 return -1;
1493}
1494
1495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001496textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001497{
1498 Py_ssize_t n = -1;
1499 PyObject *result = NULL, *chunks = NULL;
1500
1501 CHECK_INITIALIZED(self);
1502
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001503 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001504 return NULL;
1505
1506 CHECK_CLOSED(self);
1507
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001508 if (self->decoder == NULL)
1509 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001510
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001511 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001512 return NULL;
1513
1514 if (n < 0) {
1515 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001516 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001517 PyObject *decoded;
1518 if (bytes == NULL)
1519 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001520
1521 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1522 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1523 bytes, 1);
1524 else
1525 decoded = PyObject_CallMethodObjArgs(
1526 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001527 Py_DECREF(bytes);
1528 if (decoded == NULL)
1529 goto fail;
1530
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001531 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001532
1533 if (result == NULL) {
1534 Py_DECREF(decoded);
1535 return NULL;
1536 }
1537
1538 PyUnicode_AppendAndDel(&result, decoded);
1539 if (result == NULL)
1540 goto fail;
1541
1542 Py_CLEAR(self->snapshot);
1543 return result;
1544 }
1545 else {
1546 int res = 1;
1547 Py_ssize_t remaining = n;
1548
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001549 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 if (result == NULL)
1551 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001552 if (PyUnicode_READY(result) == -1)
1553 goto fail;
1554 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555
1556 /* Keep reading chunks until we have n characters to return */
1557 while (remaining > 0) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001558 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 if (res < 0)
1560 goto fail;
1561 if (res == 0) /* EOF */
1562 break;
1563 if (chunks == NULL) {
1564 chunks = PyList_New(0);
1565 if (chunks == NULL)
1566 goto fail;
1567 }
1568 if (PyList_Append(chunks, result) < 0)
1569 goto fail;
1570 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001571 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572 if (result == NULL)
1573 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001574 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001575 }
1576 if (chunks != NULL) {
1577 if (result != NULL && PyList_Append(chunks, result) < 0)
1578 goto fail;
1579 Py_CLEAR(result);
1580 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1581 if (result == NULL)
1582 goto fail;
1583 Py_CLEAR(chunks);
1584 }
1585 return result;
1586 }
1587 fail:
1588 Py_XDECREF(result);
1589 Py_XDECREF(chunks);
1590 return NULL;
1591}
1592
1593
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001594/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001595 that is to the NUL character. Otherwise the function will produce
1596 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001597static char *
1598find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001600 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001601 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001602 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001603 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 return s;
1605 if (s == end)
1606 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001607 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001608 }
1609}
1610
1611Py_ssize_t
1612_PyIO_find_line_ending(
1613 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001614 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001616 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617
1618 if (translated) {
1619 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001620 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001622 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 else {
1624 *consumed = len;
1625 return -1;
1626 }
1627 }
1628 else if (universal) {
1629 /* Universal newline search. Find any of \r, \r\n, \n
1630 * The decoder ensures that \r\n are not split in two pieces
1631 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001632 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001633 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001634 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001635 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001636 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001637 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001638 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639 if (s >= end) {
1640 *consumed = len;
1641 return -1;
1642 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001644 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001646 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001647 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001648 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001649 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001650 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001651 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 }
1653 }
1654 }
1655 else {
1656 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1658 char *nl = PyUnicode_DATA(readnl);
1659 /* Assume that readnl is an ASCII character. */
1660 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001664 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665 *consumed = len;
1666 return -1;
1667 }
1668 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001669 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001670 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 if (e < s)
1673 e = s;
1674 while (s < e) {
1675 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001676 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 if (pos == NULL || pos >= e)
1678 break;
1679 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001680 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 break;
1682 }
1683 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001684 return (pos - start)/kind + readnl_len;
1685 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001687 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (pos == NULL)
1689 *consumed = len;
1690 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001691 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 return -1;
1693 }
1694 }
1695}
1696
1697static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001698_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699{
1700 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1701 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1702 int res;
1703
1704 CHECK_CLOSED(self);
1705
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001706 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 return NULL;
1708
1709 chunked = 0;
1710
1711 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001712 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001714 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001715 Py_ssize_t consumed = 0;
1716
1717 /* First, get some data if necessary */
1718 res = 1;
1719 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001721 res = textiowrapper_read_chunk(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 if (res < 0)
1723 goto error;
1724 if (res == 0)
1725 break;
1726 }
1727 if (res == 0) {
1728 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001729 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 Py_CLEAR(self->snapshot);
1731 start = endpos = offset_to_buffer = 0;
1732 break;
1733 }
1734
1735 if (remaining == NULL) {
1736 line = self->decoded_chars;
1737 start = self->decoded_chars_used;
1738 offset_to_buffer = 0;
1739 Py_INCREF(line);
1740 }
1741 else {
1742 assert(self->decoded_chars_used == 0);
1743 line = PyUnicode_Concat(remaining, self->decoded_chars);
1744 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001746 Py_CLEAR(remaining);
1747 if (line == NULL)
1748 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001749 if (PyUnicode_READY(line) == -1)
1750 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751 }
1752
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001753 ptr = PyUnicode_DATA(line);
1754 line_len = PyUnicode_GET_LENGTH(line);
1755 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756
1757 endpos = _PyIO_find_line_ending(
1758 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001759 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001760 ptr + kind * start,
1761 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 if (endpos >= 0) {
1764 endpos += start;
1765 if (limit >= 0 && (endpos - start) + chunked >= limit)
1766 endpos = start + limit - chunked;
1767 break;
1768 }
1769
1770 /* We can put aside up to `endpos` */
1771 endpos = consumed + start;
1772 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1773 /* Didn't find line ending, but reached length limit */
1774 endpos = start + limit - chunked;
1775 break;
1776 }
1777
1778 if (endpos > start) {
1779 /* No line ending seen yet - put aside current data */
1780 PyObject *s;
1781 if (chunks == NULL) {
1782 chunks = PyList_New(0);
1783 if (chunks == NULL)
1784 goto error;
1785 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001786 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787 if (s == NULL)
1788 goto error;
1789 if (PyList_Append(chunks, s) < 0) {
1790 Py_DECREF(s);
1791 goto error;
1792 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001793 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 Py_DECREF(s);
1795 }
1796 /* There may be some remaining bytes we'll have to prepend to the
1797 next chunk of data */
1798 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001799 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 if (remaining == NULL)
1801 goto error;
1802 }
1803 Py_CLEAR(line);
1804 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001805 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 }
1807
1808 if (line != NULL) {
1809 /* Our line ends in the current buffer */
1810 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001811 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1812 PyObject *s = PyUnicode_Substring(line, start, endpos);
1813 Py_CLEAR(line);
1814 if (s == NULL)
1815 goto error;
1816 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817 }
1818 }
1819 if (remaining != NULL) {
1820 if (chunks == NULL) {
1821 chunks = PyList_New(0);
1822 if (chunks == NULL)
1823 goto error;
1824 }
1825 if (PyList_Append(chunks, remaining) < 0)
1826 goto error;
1827 Py_CLEAR(remaining);
1828 }
1829 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001830 if (line != NULL) {
1831 if (PyList_Append(chunks, line) < 0)
1832 goto error;
1833 Py_DECREF(line);
1834 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001835 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1836 if (line == NULL)
1837 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001838 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001839 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001840 if (line == NULL) {
1841 Py_INCREF(_PyIO_empty_str);
1842 line = _PyIO_empty_str;
1843 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844
1845 return line;
1846
1847 error:
1848 Py_XDECREF(chunks);
1849 Py_XDECREF(remaining);
1850 Py_XDECREF(line);
1851 return NULL;
1852}
1853
1854static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001855textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856{
1857 Py_ssize_t limit = -1;
1858
1859 CHECK_INITIALIZED(self);
1860 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1861 return NULL;
1862 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001863 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001864}
1865
1866/* Seek and Tell */
1867
1868typedef struct {
1869 Py_off_t start_pos;
1870 int dec_flags;
1871 int bytes_to_feed;
1872 int chars_to_skip;
1873 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001874} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001875
1876/*
1877 To speed up cookie packing/unpacking, we store the fields in a temporary
1878 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1879 The following macros define at which offsets in the intermediary byte
1880 string the various CookieStruct fields will be stored.
1881 */
1882
1883#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1884
1885#if defined(WORDS_BIGENDIAN)
1886
1887# define IS_LITTLE_ENDIAN 0
1888
1889/* We want the least significant byte of start_pos to also be the least
1890 significant byte of the cookie, which means that in big-endian mode we
1891 must copy the fields in reverse order. */
1892
1893# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1894# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1895# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1896# define OFF_CHARS_TO_SKIP (sizeof(char))
1897# define OFF_NEED_EOF 0
1898
1899#else
1900
1901# define IS_LITTLE_ENDIAN 1
1902
1903/* Little-endian mode: the least significant byte of start_pos will
1904 naturally end up the least significant byte of the cookie. */
1905
1906# define OFF_START_POS 0
1907# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1908# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1909# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1910# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1911
1912#endif
1913
1914static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001915textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001916{
1917 unsigned char buffer[COOKIE_BUF_LEN];
1918 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1919 if (cookieLong == NULL)
1920 return -1;
1921
1922 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1923 IS_LITTLE_ENDIAN, 0) < 0) {
1924 Py_DECREF(cookieLong);
1925 return -1;
1926 }
1927 Py_DECREF(cookieLong);
1928
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001929 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1930 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1931 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1932 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1933 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934
1935 return 0;
1936}
1937
1938static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001939textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940{
1941 unsigned char buffer[COOKIE_BUF_LEN];
1942
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001943 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1944 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1945 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1946 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1947 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001948
1949 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1950}
1951#undef IS_LITTLE_ENDIAN
1952
1953static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001954_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001955{
1956 PyObject *res;
1957 /* When seeking to the start of the stream, we call decoder.reset()
1958 rather than decoder.getstate().
1959 This is for a few decoders such as utf-16 for which the state value
1960 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1961 utf-16, that we are expecting a BOM).
1962 */
1963 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1964 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1965 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001966 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1967 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968 if (res == NULL)
1969 return -1;
1970 Py_DECREF(res);
1971 return 0;
1972}
1973
Antoine Pitroue4501852009-05-14 18:55:55 +00001974static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001975_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001976{
1977 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001978 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001979 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1980 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1981 self->encoding_start_of_stream = 1;
1982 }
1983 else {
1984 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1985 _PyIO_zero, NULL);
1986 self->encoding_start_of_stream = 0;
1987 }
1988 if (res == NULL)
1989 return -1;
1990 Py_DECREF(res);
1991 return 0;
1992}
1993
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001995textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996{
1997 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001998 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000 PyObject *res;
2001 int cmp;
2002
2003 CHECK_INITIALIZED(self);
2004
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2006 return NULL;
2007 CHECK_CLOSED(self);
2008
2009 Py_INCREF(cookieObj);
2010
2011 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002012 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002013 goto fail;
2014 }
2015
2016 if (whence == 1) {
2017 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002018 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019 if (cmp < 0)
2020 goto fail;
2021
2022 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002023 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 goto fail;
2025 }
2026
2027 /* Seeking to the current position should attempt to
2028 * sync the underlying buffer with the current position.
2029 */
2030 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002031 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002032 if (cookieObj == NULL)
2033 goto fail;
2034 }
2035 else if (whence == 2) {
2036 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002037 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 if (cmp < 0)
2039 goto fail;
2040
2041 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002042 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043 goto fail;
2044 }
2045
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002046 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 if (res == NULL)
2048 goto fail;
2049 Py_DECREF(res);
2050
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002051 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 Py_CLEAR(self->snapshot);
2053 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002054 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 if (res == NULL)
2056 goto fail;
2057 Py_DECREF(res);
2058 }
2059
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002060 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061 Py_XDECREF(cookieObj);
2062 return res;
2063 }
2064 else if (whence != 0) {
2065 PyErr_Format(PyExc_ValueError,
2066 "invalid whence (%d, should be 0, 1 or 2)", whence);
2067 goto fail;
2068 }
2069
Antoine Pitroue4501852009-05-14 18:55:55 +00002070 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 if (cmp < 0)
2072 goto fail;
2073
2074 if (cmp == 1) {
2075 PyErr_Format(PyExc_ValueError,
2076 "negative seek position %R", cookieObj);
2077 goto fail;
2078 }
2079
2080 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2081 if (res == NULL)
2082 goto fail;
2083 Py_DECREF(res);
2084
2085 /* The strategy of seek() is to go back to the safe start point
2086 * and replay the effect of read(chars_to_skip) from there.
2087 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002088 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 goto fail;
2090
2091 /* Seek back to the safe start point. */
2092 posobj = PyLong_FromOff_t(cookie.start_pos);
2093 if (posobj == NULL)
2094 goto fail;
2095 res = PyObject_CallMethodObjArgs(self->buffer,
2096 _PyIO_str_seek, posobj, NULL);
2097 Py_DECREF(posobj);
2098 if (res == NULL)
2099 goto fail;
2100 Py_DECREF(res);
2101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002102 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 Py_CLEAR(self->snapshot);
2104
2105 /* Restore the decoder to its state from the safe start point. */
2106 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002107 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 goto fail;
2109 }
2110
2111 if (cookie.chars_to_skip) {
2112 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002113 PyObject *input_chunk = _PyObject_CallMethodId(
2114 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 PyObject *decoded;
2116
2117 if (input_chunk == NULL)
2118 goto fail;
2119
2120 assert (PyBytes_Check(input_chunk));
2121
2122 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2123 if (self->snapshot == NULL) {
2124 Py_DECREF(input_chunk);
2125 goto fail;
2126 }
2127
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002128 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2129 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002130
2131 if (decoded == NULL)
2132 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002133 if (PyUnicode_READY(decoded) == -1) {
2134 Py_DECREF(decoded);
2135 goto fail;
2136 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002137
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002138 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139
2140 /* Skip chars_to_skip of the decoded characters. */
2141 if (PyUnicode_GetSize(self->decoded_chars) < cookie.chars_to_skip) {
2142 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2143 goto fail;
2144 }
2145 self->decoded_chars_used = cookie.chars_to_skip;
2146 }
2147 else {
2148 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2149 if (self->snapshot == NULL)
2150 goto fail;
2151 }
2152
Antoine Pitroue4501852009-05-14 18:55:55 +00002153 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2154 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002155 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002156 goto fail;
2157 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 return cookieObj;
2159 fail:
2160 Py_XDECREF(cookieObj);
2161 return NULL;
2162
2163}
2164
2165static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002166textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167{
2168 PyObject *res;
2169 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002170 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002171 PyObject *next_input;
2172 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002173 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002174 PyObject *saved_state = NULL;
2175 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002176 char *dec_buffer;
2177 Py_ssize_t dec_buffer_len;
2178 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002179
2180 CHECK_INITIALIZED(self);
2181 CHECK_CLOSED(self);
2182
2183 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002184 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185 goto fail;
2186 }
2187 if (!self->telling) {
2188 PyErr_SetString(PyExc_IOError,
2189 "telling position disabled by next() call");
2190 goto fail;
2191 }
2192
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002193 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002195 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 if (res == NULL)
2197 goto fail;
2198 Py_DECREF(res);
2199
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002200 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201 if (posobj == NULL)
2202 goto fail;
2203
2204 if (self->decoder == NULL || self->snapshot == NULL) {
2205 assert (self->decoded_chars == NULL || PyUnicode_GetSize(self->decoded_chars) == 0);
2206 return posobj;
2207 }
2208
2209#if defined(HAVE_LARGEFILE_SUPPORT)
2210 cookie.start_pos = PyLong_AsLongLong(posobj);
2211#else
2212 cookie.start_pos = PyLong_AsLong(posobj);
2213#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002214 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 if (PyErr_Occurred())
2216 goto fail;
2217
2218 /* Skip backward to the snapshot point (see _read_chunk). */
2219 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2220 goto fail;
2221
2222 assert (PyBytes_Check(next_input));
2223
2224 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2225
2226 /* How many decoded characters have been used up since the snapshot? */
2227 if (self->decoded_chars_used == 0) {
2228 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002229 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 }
2231
2232 chars_to_skip = self->decoded_chars_used;
2233
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002234 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2236 _PyIO_str_getstate, NULL);
2237 if (saved_state == NULL)
2238 goto fail;
2239
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002240#define DECODER_GETSTATE() do { \
2241 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2242 _PyIO_str_getstate, NULL); \
2243 if (_state == NULL) \
2244 goto fail; \
2245 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2246 Py_DECREF(_state); \
2247 goto fail; \
2248 } \
2249 Py_DECREF(_state); \
2250 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002251
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002252 /* TODO: replace assert with exception */
2253#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002254 PyObject *_decoded = _PyObject_CallMethodId( \
2255 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002256 if (_decoded == NULL) \
2257 goto fail; \
2258 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002259 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002260 Py_DECREF(_decoded); \
2261 } while (0)
2262
2263 /* Fast search for an acceptable start point, close to our
2264 current pos */
2265 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2266 skip_back = 1;
2267 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2268 input = PyBytes_AS_STRING(next_input);
2269 while (skip_bytes > 0) {
2270 /* Decode up to temptative start point */
2271 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2272 goto fail;
2273 DECODER_DECODE(input, skip_bytes, chars_decoded);
2274 if (chars_decoded <= chars_to_skip) {
2275 DECODER_GETSTATE();
2276 if (dec_buffer_len == 0) {
2277 /* Before pos and no bytes buffered in decoder => OK */
2278 cookie.dec_flags = dec_flags;
2279 chars_to_skip -= chars_decoded;
2280 break;
2281 }
2282 /* Skip back by buffered amount and reset heuristic */
2283 skip_bytes -= dec_buffer_len;
2284 skip_back = 1;
2285 }
2286 else {
2287 /* We're too far ahead, skip back a bit */
2288 skip_bytes -= skip_back;
2289 skip_back *= 2;
2290 }
2291 }
2292 if (skip_bytes <= 0) {
2293 skip_bytes = 0;
2294 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2295 goto fail;
2296 }
2297
2298 /* Note our initial start point. */
2299 cookie.start_pos += skip_bytes;
2300 cookie.chars_to_skip = chars_to_skip;
2301 if (chars_to_skip == 0)
2302 goto finally;
2303
2304 /* We should be close to the desired position. Now feed the decoder one
2305 * byte at a time until we reach the `chars_to_skip` target.
2306 * As we go, note the nearest "safe start point" before the current
2307 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308 * can safely start from there and advance to this location).
2309 */
2310 chars_decoded = 0;
2311 input = PyBytes_AS_STRING(next_input);
2312 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002313 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002314 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002315 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002316
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002317 DECODER_DECODE(input, 1, n);
2318 /* We got n chars for 1 byte */
2319 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002320 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002321 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002322
2323 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2324 /* Decoder buffer is empty, so this is a safe start point. */
2325 cookie.start_pos += cookie.bytes_to_feed;
2326 chars_to_skip -= chars_decoded;
2327 cookie.dec_flags = dec_flags;
2328 cookie.bytes_to_feed = 0;
2329 chars_decoded = 0;
2330 }
2331 if (chars_decoded >= chars_to_skip)
2332 break;
2333 input++;
2334 }
2335 if (input == input_end) {
2336 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002337 PyObject *decoded = _PyObject_CallMethodId(
2338 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002339 if (decoded == NULL)
2340 goto fail;
2341 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002342 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343 Py_DECREF(decoded);
2344 cookie.need_eof = 1;
2345
2346 if (chars_decoded < chars_to_skip) {
2347 PyErr_SetString(PyExc_IOError,
2348 "can't reconstruct logical file position");
2349 goto fail;
2350 }
2351 }
2352
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002353finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002354 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002355 Py_DECREF(saved_state);
2356 if (res == NULL)
2357 return NULL;
2358 Py_DECREF(res);
2359
2360 /* The returned cookie corresponds to the last safe start point. */
2361 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002362 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002364fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365 if (saved_state) {
2366 PyObject *type, *value, *traceback;
2367 PyErr_Fetch(&type, &value, &traceback);
2368
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002369 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 Py_DECREF(saved_state);
2371 if (res == NULL)
2372 return NULL;
2373 Py_DECREF(res);
2374
2375 PyErr_Restore(type, value, traceback);
2376 }
2377 return NULL;
2378}
2379
2380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002381textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002382{
2383 PyObject *pos = Py_None;
2384 PyObject *res;
2385
2386 CHECK_INITIALIZED(self)
2387 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2388 return NULL;
2389 }
2390
2391 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2392 if (res == NULL)
2393 return NULL;
2394 Py_DECREF(res);
2395
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002396 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397}
2398
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002399static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002400textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002401{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002402 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002403
2404 CHECK_INITIALIZED(self);
2405
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002406 res = PyUnicode_FromString("<_io.TextIOWrapper");
2407 if (res == NULL)
2408 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002409 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002410 if (nameobj == NULL) {
2411 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2412 PyErr_Clear();
2413 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002414 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002415 }
2416 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002417 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002418 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002419 if (s == NULL)
2420 goto error;
2421 PyUnicode_AppendAndDel(&res, s);
2422 if (res == NULL)
2423 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002424 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002425 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002426 if (modeobj == NULL) {
2427 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2428 PyErr_Clear();
2429 else
2430 goto error;
2431 }
2432 else {
2433 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2434 Py_DECREF(modeobj);
2435 if (s == NULL)
2436 goto error;
2437 PyUnicode_AppendAndDel(&res, s);
2438 if (res == NULL)
2439 return NULL;
2440 }
2441 s = PyUnicode_FromFormat("%U encoding=%R>",
2442 res, self->encoding);
2443 Py_DECREF(res);
2444 return s;
2445error:
2446 Py_XDECREF(res);
2447 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002448}
2449
2450
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451/* Inquiries */
2452
2453static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002454textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002455{
2456 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002457 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002458}
2459
2460static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002461textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002462{
2463 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002464 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002465}
2466
2467static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002468textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002469{
2470 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002471 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472}
2473
2474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002475textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476{
2477 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002478 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479}
2480
2481static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002482textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483{
2484 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002485 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486}
2487
2488static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002489textiowrapper_getstate(textio *self, PyObject *args)
2490{
2491 PyErr_Format(PyExc_TypeError,
2492 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2493 return NULL;
2494}
2495
2496static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002497textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002498{
2499 CHECK_INITIALIZED(self);
2500 CHECK_CLOSED(self);
2501 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002502 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002504 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505}
2506
2507static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002508textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509{
2510 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002511 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513
Antoine Pitrou6be88762010-05-03 16:48:20 +00002514 res = textiowrapper_closed_get(self, NULL);
2515 if (res == NULL)
2516 return NULL;
2517 r = PyObject_IsTrue(res);
2518 Py_DECREF(res);
2519 if (r < 0)
2520 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002521
Antoine Pitrou6be88762010-05-03 16:48:20 +00002522 if (r > 0) {
2523 Py_RETURN_NONE; /* stream already closed */
2524 }
2525 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002526 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002527 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002528 if (res)
2529 Py_DECREF(res);
2530 else
2531 PyErr_Clear();
2532 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002533 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002534 if (res == NULL) {
2535 return NULL;
2536 }
2537 else
2538 Py_DECREF(res);
2539
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002540 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002541 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002542}
2543
2544static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002545textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546{
2547 PyObject *line;
2548
2549 CHECK_INITIALIZED(self);
2550
2551 self->telling = 0;
2552 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2553 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002554 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555 }
2556 else {
2557 line = PyObject_CallMethodObjArgs((PyObject *)self,
2558 _PyIO_str_readline, NULL);
2559 if (line && !PyUnicode_Check(line)) {
2560 PyErr_Format(PyExc_IOError,
2561 "readline() should have returned an str object, "
2562 "not '%.200s'", Py_TYPE(line)->tp_name);
2563 Py_DECREF(line);
2564 return NULL;
2565 }
2566 }
2567
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002568 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002569 return NULL;
2570
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002571 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002572 /* Reached EOF or would have blocked */
2573 Py_DECREF(line);
2574 Py_CLEAR(self->snapshot);
2575 self->telling = self->seekable;
2576 return NULL;
2577 }
2578
2579 return line;
2580}
2581
2582static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002583textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002584{
2585 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002586 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002587}
2588
2589static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002590textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002591{
2592 CHECK_INITIALIZED(self);
2593 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2594}
2595
2596static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002597textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002598{
2599 PyObject *res;
2600 CHECK_INITIALIZED(self);
2601 if (self->decoder == NULL)
2602 Py_RETURN_NONE;
2603 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2604 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002605 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2606 PyErr_Clear();
2607 Py_RETURN_NONE;
2608 }
2609 else {
2610 return NULL;
2611 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612 }
2613 return res;
2614}
2615
2616static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002617textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002618{
2619 CHECK_INITIALIZED(self);
2620 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2621}
2622
2623static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002624textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625{
2626 CHECK_INITIALIZED(self);
2627 return PyLong_FromSsize_t(self->chunk_size);
2628}
2629
2630static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002631textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002632{
2633 Py_ssize_t n;
2634 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002635 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636 if (n == -1 && PyErr_Occurred())
2637 return -1;
2638 if (n <= 0) {
2639 PyErr_SetString(PyExc_ValueError,
2640 "a strictly positive integer is required");
2641 return -1;
2642 }
2643 self->chunk_size = n;
2644 return 0;
2645}
2646
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002647static PyMethodDef textiowrapper_methods[] = {
2648 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2649 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2650 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2651 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2652 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2653 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002655 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2656 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2657 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2658 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2659 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002660 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2663 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2664 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002665 {NULL, NULL}
2666};
2667
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002668static PyMemberDef textiowrapper_members[] = {
2669 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2670 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2671 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002672 {NULL}
2673};
2674
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675static PyGetSetDef textiowrapper_getset[] = {
2676 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2677 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002678/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2679*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002680 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2681 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2682 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2683 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002684 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685};
2686
2687PyTypeObject PyTextIOWrapper_Type = {
2688 PyVarObject_HEAD_INIT(NULL, 0)
2689 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002690 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002691 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002692 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 0, /*tp_print*/
2694 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002695 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002697 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002698 0, /*tp_as_number*/
2699 0, /*tp_as_sequence*/
2700 0, /*tp_as_mapping*/
2701 0, /*tp_hash */
2702 0, /*tp_call*/
2703 0, /*tp_str*/
2704 0, /*tp_getattro*/
2705 0, /*tp_setattro*/
2706 0, /*tp_as_buffer*/
2707 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2708 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002709 textiowrapper_doc, /* tp_doc */
2710 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2711 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002712 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002713 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002715 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2716 textiowrapper_methods, /* tp_methods */
2717 textiowrapper_members, /* tp_members */
2718 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 0, /* tp_base */
2720 0, /* tp_dict */
2721 0, /* tp_descr_get */
2722 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002723 offsetof(textio, dict), /*tp_dictoffset*/
2724 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725 0, /* tp_alloc */
2726 PyType_GenericNew, /* tp_new */
2727};