blob: d390d5a4267759ba933793d4e8cba2645da30dc5 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200633 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
638 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
639 "handling of line endings. If it is None, universal newlines is\n"
640 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
641 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
642 "caller. Conversely, on output, '\\n' is translated to the system\n"
Éric Araujofab97662012-02-26 02:14:08 +0100643 "default line separator, os.linesep. If newline is any other of its\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "legal values, that newline becomes the newline when the file is read\n"
645 "and it is returned untranslated. On output, '\\n' is converted to the\n"
646 "newline.\n"
647 "\n"
648 "If line_buffering is True, a call to flush is implied when a call to\n"
649 "write contains a newline character."
650 );
651
652typedef PyObject *
653 (*encodefunc_t)(PyObject *, PyObject *);
654
655typedef struct
656{
657 PyObject_HEAD
658 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000659 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 Py_ssize_t chunk_size;
661 PyObject *buffer;
662 PyObject *encoding;
663 PyObject *encoder;
664 PyObject *decoder;
665 PyObject *readnl;
666 PyObject *errors;
667 const char *writenl; /* utf-8 encoded, NULL stands for \n */
668 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200669 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670 char readuniversal;
671 char readtranslate;
672 char writetranslate;
673 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200674 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000675 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000676 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000684
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
690 */
691 PyObject *decoded_chars; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 PyObject *pending_bytes; /* list of bytes objects waiting to be
694 written, or NULL */
695 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000696
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
701 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000702 PyObject *snapshot;
703 /* Bytes-to-characters ratio for the current chunk. Serves as input for
704 the heuristic in tell(). */
705 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706
707 /* Cache raw object if it's a FileIO object */
708 PyObject *raw;
709
710 PyObject *weakreflist;
711 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000712} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714
715/* A couple of specialized cases in order to bypass the slow incremental
716 encoding methods for the most popular encodings. */
717
718static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100727 return _PyUnicode_EncodeUTF16(text,
728 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitroue4501852009-05-14 18:55:55 +0000741 if (!self->encoding_start_of_stream) {
742 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000744 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000746 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF16(text,
750 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751}
752
Antoine Pitroue4501852009-05-14 18:55:55 +0000753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF32(text,
757 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000758}
759
760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
770 if (!self->encoding_start_of_stream) {
771 /* Skip the BOM and use native byte ordering */
772#if defined(WORDS_BIGENDIAN)
773 return utf32be_encode(self, text);
774#else
775 return utf32le_encode(self, text);
776#endif
777 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100778 return _PyUnicode_EncodeUTF32(text,
779 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000780}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781
782static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000783utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200785 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786}
787
788static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000789latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200791 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792}
793
794/* Map normalized encoding names onto the specialized encoding funcs */
795
796typedef struct {
797 const char *name;
798 encodefunc_t encodefunc;
799} encodefuncentry;
800
Antoine Pitrou24f36292009-03-28 22:16:42 +0000801static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"ascii", (encodefunc_t) ascii_encode},
803 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 {"utf-16-be", (encodefunc_t) utf16be_encode},
806 {"utf-16-le", (encodefunc_t) utf16le_encode},
807 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000808 {"utf-32-be", (encodefunc_t) utf32be_encode},
809 {"utf-32-le", (encodefunc_t) utf32le_encode},
810 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 {NULL, NULL}
812};
813
814
815static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000816textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817{
818 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200819 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 NULL};
821 PyObject *buffer, *raw;
822 char *encoding = NULL;
823 char *errors = NULL;
824 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200825 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 _PyIO_State *state = IO_STATE;
827
828 PyObject *res;
829 int r;
830
831 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000832 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200833 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200835 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 return -1;
837
838 if (newline && newline[0] != '\0'
839 && !(newline[0] == '\n' && newline[1] == '\0')
840 && !(newline[0] == '\r' && newline[1] == '\0')
841 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
842 PyErr_Format(PyExc_ValueError,
843 "illegal newline value: %s", newline);
844 return -1;
845 }
846
847 Py_CLEAR(self->buffer);
848 Py_CLEAR(self->encoding);
849 Py_CLEAR(self->encoder);
850 Py_CLEAR(self->decoder);
851 Py_CLEAR(self->readnl);
852 Py_CLEAR(self->decoded_chars);
853 Py_CLEAR(self->pending_bytes);
854 Py_CLEAR(self->snapshot);
855 Py_CLEAR(self->errors);
856 Py_CLEAR(self->raw);
857 self->decoded_chars_used = 0;
858 self->pending_bytes_count = 0;
859 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000860 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000861
862 if (encoding == NULL) {
863 /* Try os.device_encoding(fileno) */
864 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200865 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 /* Ignore only AttributeError and UnsupportedOperation */
867 if (fileno == NULL) {
868 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
869 PyErr_ExceptionMatches(state->unsupported_operation)) {
870 PyErr_Clear();
871 }
872 else {
873 goto error;
874 }
875 }
876 else {
Brett Cannonefb00c02012-02-29 18:31:31 -0500877 int fd = (int) PyLong_AsLong(fileno);
878 Py_DECREF(fileno);
879 if (fd == -1 && PyErr_Occurred()) {
880 goto error;
881 }
882
883 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 if (self->encoding == NULL)
885 goto error;
886 else if (!PyUnicode_Check(self->encoding))
887 Py_CLEAR(self->encoding);
888 }
889 }
890 if (encoding == NULL && self->encoding == NULL) {
891 if (state->locale_module == NULL) {
892 state->locale_module = PyImport_ImportModule("locale");
893 if (state->locale_module == NULL)
894 goto catch_ImportError;
895 else
896 goto use_locale;
897 }
898 else {
899 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200901 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 if (self->encoding == NULL) {
903 catch_ImportError:
904 /*
905 Importing locale can raise a ImportError because of
906 _functools, and locale.getpreferredencoding can raise a
907 ImportError if _locale is not available. These will happen
908 during module building.
909 */
910 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
911 PyErr_Clear();
912 self->encoding = PyUnicode_FromString("ascii");
913 }
914 else
915 goto error;
916 }
917 else if (!PyUnicode_Check(self->encoding))
918 Py_CLEAR(self->encoding);
919 }
920 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000921 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000923 if (encoding == NULL)
924 goto error;
925 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000926 else if (encoding != NULL) {
927 self->encoding = PyUnicode_FromString(encoding);
928 if (self->encoding == NULL)
929 goto error;
930 }
931 else {
932 PyErr_SetString(PyExc_IOError,
933 "could not determine default encoding");
934 }
935
936 if (errors == NULL)
937 errors = "strict";
938 self->errors = PyBytes_FromString(errors);
939 if (self->errors == NULL)
940 goto error;
941
942 self->chunk_size = 8192;
943 self->readuniversal = (newline == NULL || newline[0] == '\0');
944 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200945 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 self->readtranslate = (newline == NULL);
947 if (newline) {
948 self->readnl = PyUnicode_FromString(newline);
949 if (self->readnl == NULL)
950 return -1;
951 }
952 self->writetranslate = (newline == NULL || newline[0] != '\0');
953 if (!self->readuniversal && self->readnl) {
954 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000955 if (self->writenl == NULL)
956 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000957 if (!strcmp(self->writenl, "\n"))
958 self->writenl = NULL;
959 }
960#ifdef MS_WINDOWS
961 else
962 self->writenl = "\r\n";
963#endif
964
965 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200966 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000967 if (res == NULL)
968 goto error;
969 r = PyObject_IsTrue(res);
970 Py_DECREF(res);
971 if (r == -1)
972 goto error;
973 if (r == 1) {
974 self->decoder = PyCodec_IncrementalDecoder(
975 encoding, errors);
976 if (self->decoder == NULL)
977 goto error;
978
979 if (self->readuniversal) {
980 PyObject *incrementalDecoder = PyObject_CallFunction(
981 (PyObject *)&PyIncrementalNewlineDecoder_Type,
982 "Oi", self->decoder, (int)self->readtranslate);
983 if (incrementalDecoder == NULL)
984 goto error;
985 Py_CLEAR(self->decoder);
986 self->decoder = incrementalDecoder;
987 }
988 }
989
990 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200991 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992 if (res == NULL)
993 goto error;
994 r = PyObject_IsTrue(res);
995 Py_DECREF(res);
996 if (r == -1)
997 goto error;
998 if (r == 1) {
999 PyObject *ci;
1000 self->encoder = PyCodec_IncrementalEncoder(
1001 encoding, errors);
1002 if (self->encoder == NULL)
1003 goto error;
1004 /* Get the normalized named of the codec */
1005 ci = _PyCodec_Lookup(encoding);
1006 if (ci == NULL)
1007 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001008 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001010 if (res == NULL) {
1011 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1012 PyErr_Clear();
1013 else
1014 goto error;
1015 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 else if (PyUnicode_Check(res)) {
1017 encodefuncentry *e = encodefuncs;
1018 while (e->name != NULL) {
1019 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1020 self->encodefunc = e->encodefunc;
1021 break;
1022 }
1023 e++;
1024 }
1025 }
1026 Py_XDECREF(res);
1027 }
1028
1029 self->buffer = buffer;
1030 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1033 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1034 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001035 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001037 if (raw == NULL) {
1038 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1039 PyErr_Clear();
1040 else
1041 goto error;
1042 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 else if (Py_TYPE(raw) == &PyFileIO_Type)
1044 self->raw = raw;
1045 else
1046 Py_DECREF(raw);
1047 }
1048
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001049 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 if (res == NULL)
1051 goto error;
1052 self->seekable = self->telling = PyObject_IsTrue(res);
1053 Py_DECREF(res);
1054
Martin v. Löwis767046a2011-10-14 15:35:36 +02001055 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001056
Antoine Pitroue4501852009-05-14 18:55:55 +00001057 self->encoding_start_of_stream = 0;
1058 if (self->seekable && self->encoder) {
1059 PyObject *cookieObj;
1060 int cmp;
1061
1062 self->encoding_start_of_stream = 1;
1063
1064 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1065 if (cookieObj == NULL)
1066 goto error;
1067
1068 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1069 Py_DECREF(cookieObj);
1070 if (cmp < 0) {
1071 goto error;
1072 }
1073
1074 if (cmp == 0) {
1075 self->encoding_start_of_stream = 0;
1076 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1077 _PyIO_zero, NULL);
1078 if (res == NULL)
1079 goto error;
1080 Py_DECREF(res);
1081 }
1082 }
1083
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001084 self->ok = 1;
1085 return 0;
1086
1087 error:
1088 return -1;
1089}
1090
1091static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001092_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093{
1094 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1095 return -1;
1096 self->ok = 0;
1097 Py_CLEAR(self->buffer);
1098 Py_CLEAR(self->encoding);
1099 Py_CLEAR(self->encoder);
1100 Py_CLEAR(self->decoder);
1101 Py_CLEAR(self->readnl);
1102 Py_CLEAR(self->decoded_chars);
1103 Py_CLEAR(self->pending_bytes);
1104 Py_CLEAR(self->snapshot);
1105 Py_CLEAR(self->errors);
1106 Py_CLEAR(self->raw);
1107 return 0;
1108}
1109
1110static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001111textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112{
Antoine Pitroue033e062010-10-29 10:38:18 +00001113 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001114 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115 return;
1116 _PyObject_GC_UNTRACK(self);
1117 if (self->weakreflist != NULL)
1118 PyObject_ClearWeakRefs((PyObject *)self);
1119 Py_CLEAR(self->dict);
1120 Py_TYPE(self)->tp_free((PyObject *)self);
1121}
1122
1123static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125{
1126 Py_VISIT(self->buffer);
1127 Py_VISIT(self->encoding);
1128 Py_VISIT(self->encoder);
1129 Py_VISIT(self->decoder);
1130 Py_VISIT(self->readnl);
1131 Py_VISIT(self->decoded_chars);
1132 Py_VISIT(self->pending_bytes);
1133 Py_VISIT(self->snapshot);
1134 Py_VISIT(self->errors);
1135 Py_VISIT(self->raw);
1136
1137 Py_VISIT(self->dict);
1138 return 0;
1139}
1140
1141static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001142textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001144 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145 return -1;
1146 Py_CLEAR(self->dict);
1147 return 0;
1148}
1149
1150static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001151textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152
1153/* This macro takes some shortcuts to make the common case faster. */
1154#define CHECK_CLOSED(self) \
1155 do { \
1156 int r; \
1157 PyObject *_res; \
1158 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1159 if (self->raw != NULL) \
1160 r = _PyFileIO_closed(self->raw); \
1161 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001162 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163 if (_res == NULL) \
1164 return NULL; \
1165 r = PyObject_IsTrue(_res); \
1166 Py_DECREF(_res); \
1167 if (r < 0) \
1168 return NULL; \
1169 } \
1170 if (r > 0) { \
1171 PyErr_SetString(PyExc_ValueError, \
1172 "I/O operation on closed file."); \
1173 return NULL; \
1174 } \
1175 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001176 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 return NULL; \
1178 } while (0)
1179
1180#define CHECK_INITIALIZED(self) \
1181 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001182 if (self->detached) { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "underlying buffer has been detached"); \
1185 } else { \
1186 PyErr_SetString(PyExc_ValueError, \
1187 "I/O operation on uninitialized object"); \
1188 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189 return NULL; \
1190 }
1191
1192#define CHECK_INITIALIZED_INT(self) \
1193 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001194 if (self->detached) { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "underlying buffer has been detached"); \
1197 } else { \
1198 PyErr_SetString(PyExc_ValueError, \
1199 "I/O operation on uninitialized object"); \
1200 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201 return -1; \
1202 }
1203
1204
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001205static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001206textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001207{
1208 PyObject *buffer, *res;
1209 CHECK_INITIALIZED(self);
1210 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1211 if (res == NULL)
1212 return NULL;
1213 Py_DECREF(res);
1214 buffer = self->buffer;
1215 self->buffer = NULL;
1216 self->detached = 1;
1217 self->ok = 0;
1218 return buffer;
1219}
1220
Antoine Pitrou24f36292009-03-28 22:16:42 +00001221/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 underlying buffered object, though. */
1223static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001224_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001226 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227
1228 if (self->pending_bytes == NULL)
1229 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001230
1231 pending = self->pending_bytes;
1232 Py_INCREF(pending);
1233 self->pending_bytes_count = 0;
1234 Py_CLEAR(self->pending_bytes);
1235
1236 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1237 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 if (b == NULL)
1239 return -1;
1240 ret = PyObject_CallMethodObjArgs(self->buffer,
1241 _PyIO_str_write, b, NULL);
1242 Py_DECREF(b);
1243 if (ret == NULL)
1244 return -1;
1245 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 return 0;
1247}
1248
1249static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001250textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251{
1252 PyObject *ret;
1253 PyObject *text; /* owned reference */
1254 PyObject *b;
1255 Py_ssize_t textlen;
1256 int haslf = 0;
1257 int needflush = 0;
1258
1259 CHECK_INITIALIZED(self);
1260
1261 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1262 return NULL;
1263 }
1264
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 if (PyUnicode_READY(text) == -1)
1266 return NULL;
1267
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 CHECK_CLOSED(self);
1269
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001270 if (self->encoder == NULL)
1271 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001272
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273 Py_INCREF(text);
1274
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276
1277 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 haslf = 1;
1280
1281 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282 PyObject *newtext = _PyObject_CallMethodId(
1283 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 Py_DECREF(text);
1285 if (newtext == NULL)
1286 return NULL;
1287 text = newtext;
1288 }
1289
Antoine Pitroue96ec682011-07-23 21:46:35 +02001290 if (self->write_through)
1291 needflush = 1;
1292 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295 needflush = 1;
1296
1297 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001298 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001300 self->encoding_start_of_stream = 0;
1301 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 else
1303 b = PyObject_CallMethodObjArgs(self->encoder,
1304 _PyIO_str_encode, text, NULL);
1305 Py_DECREF(text);
1306 if (b == NULL)
1307 return NULL;
1308
1309 if (self->pending_bytes == NULL) {
1310 self->pending_bytes = PyList_New(0);
1311 if (self->pending_bytes == NULL) {
1312 Py_DECREF(b);
1313 return NULL;
1314 }
1315 self->pending_bytes_count = 0;
1316 }
1317 if (PyList_Append(self->pending_bytes, b) < 0) {
1318 Py_DECREF(b);
1319 return NULL;
1320 }
1321 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1322 Py_DECREF(b);
1323 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001324 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 return NULL;
1326 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001327
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 if (needflush) {
1329 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1330 if (ret == NULL)
1331 return NULL;
1332 Py_DECREF(ret);
1333 }
1334
1335 Py_CLEAR(self->snapshot);
1336
1337 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001338 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 if (ret == NULL)
1340 return NULL;
1341 Py_DECREF(ret);
1342 }
1343
1344 return PyLong_FromSsize_t(textlen);
1345}
1346
1347/* Steal a reference to chars and store it in the decoded_char buffer;
1348 */
1349static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001350textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351{
1352 Py_CLEAR(self->decoded_chars);
1353 self->decoded_chars = chars;
1354 self->decoded_chars_used = 0;
1355}
1356
1357static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001358textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359{
1360 PyObject *chars;
1361 Py_ssize_t avail;
1362
1363 if (self->decoded_chars == NULL)
1364 return PyUnicode_FromStringAndSize(NULL, 0);
1365
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001366 /* decoded_chars is guaranteed to be "ready". */
1367 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368 - self->decoded_chars_used);
1369
1370 assert(avail >= 0);
1371
1372 if (n < 0 || n > avail)
1373 n = avail;
1374
1375 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 chars = PyUnicode_Substring(self->decoded_chars,
1377 self->decoded_chars_used,
1378 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 if (chars == NULL)
1380 return NULL;
1381 }
1382 else {
1383 chars = self->decoded_chars;
1384 Py_INCREF(chars);
1385 }
1386
1387 self->decoded_chars_used += n;
1388 return chars;
1389}
1390
1391/* Read and decode the next chunk of data from the BufferedReader.
1392 */
1393static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001394textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395{
1396 PyObject *dec_buffer = NULL;
1397 PyObject *dec_flags = NULL;
1398 PyObject *input_chunk = NULL;
1399 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001400 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 int eof;
1402
1403 /* The return value is True unless EOF was reached. The decoded string is
1404 * placed in self._decoded_chars (replacing its previous value). The
1405 * entire input chunk is sent to the decoder, though some of it may remain
1406 * buffered in the decoder, yet to be converted.
1407 */
1408
1409 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001410 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 return -1;
1412 }
1413
1414 if (self->telling) {
1415 /* To prepare for tell(), we need to snapshot a point in the file
1416 * where the decoder's input buffer is empty.
1417 */
1418
1419 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1420 _PyIO_str_getstate, NULL);
1421 if (state == NULL)
1422 return -1;
1423 /* Given this, we know there was a valid snapshot point
1424 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1425 */
1426 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1427 Py_DECREF(state);
1428 return -1;
1429 }
1430 Py_INCREF(dec_buffer);
1431 Py_INCREF(dec_flags);
1432 Py_DECREF(state);
1433 }
1434
1435 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001436 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001437 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001438 }
1439 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440 if (chunk_size == NULL)
1441 goto fail;
1442 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001443 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1444 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 Py_DECREF(chunk_size);
1446 if (input_chunk == NULL)
1447 goto fail;
1448 assert(PyBytes_Check(input_chunk));
1449
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001450 nbytes = PyBytes_Size(input_chunk);
1451 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452
1453 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1454 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1455 self->decoder, input_chunk, eof);
1456 }
1457 else {
1458 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1459 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1460 }
1461
1462 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1463 if (decoded_chars == NULL)
1464 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001465 if (PyUnicode_READY(decoded_chars) == -1)
1466 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001467 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001469 if (nchars > 0)
1470 self->b2cratio = (double) nbytes / nchars;
1471 else
1472 self->b2cratio = 0.0;
1473 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474 eof = 0;
1475
1476 if (self->telling) {
1477 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1478 * next input to be decoded is dec_buffer + input_chunk.
1479 */
1480 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1481 if (next_input == NULL)
1482 goto fail;
1483 assert (PyBytes_Check(next_input));
1484 Py_DECREF(dec_buffer);
1485 Py_CLEAR(self->snapshot);
1486 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1487 }
1488 Py_DECREF(input_chunk);
1489
1490 return (eof == 0);
1491
1492 fail:
1493 Py_XDECREF(dec_buffer);
1494 Py_XDECREF(dec_flags);
1495 Py_XDECREF(input_chunk);
1496 return -1;
1497}
1498
1499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001500textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501{
1502 Py_ssize_t n = -1;
1503 PyObject *result = NULL, *chunks = NULL;
1504
1505 CHECK_INITIALIZED(self);
1506
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001507 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 return NULL;
1509
1510 CHECK_CLOSED(self);
1511
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001512 if (self->decoder == NULL)
1513 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001514
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001515 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 return NULL;
1517
1518 if (n < 0) {
1519 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001520 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 PyObject *decoded;
1522 if (bytes == NULL)
1523 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001524
1525 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1526 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1527 bytes, 1);
1528 else
1529 decoded = PyObject_CallMethodObjArgs(
1530 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 Py_DECREF(bytes);
1532 if (decoded == NULL)
1533 goto fail;
1534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536
1537 if (result == NULL) {
1538 Py_DECREF(decoded);
1539 return NULL;
1540 }
1541
1542 PyUnicode_AppendAndDel(&result, decoded);
1543 if (result == NULL)
1544 goto fail;
1545
1546 Py_CLEAR(self->snapshot);
1547 return result;
1548 }
1549 else {
1550 int res = 1;
1551 Py_ssize_t remaining = n;
1552
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 if (result == NULL)
1555 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556 if (PyUnicode_READY(result) == -1)
1557 goto fail;
1558 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559
1560 /* Keep reading chunks until we have n characters to return */
1561 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001562 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001563 if (res < 0) {
1564 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1565 when EINTR occurs so we needn't do it ourselves. */
1566 if (_PyIO_trap_eintr()) {
1567 continue;
1568 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001570 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 if (res == 0) /* EOF */
1572 break;
1573 if (chunks == NULL) {
1574 chunks = PyList_New(0);
1575 if (chunks == NULL)
1576 goto fail;
1577 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001578 if (PyUnicode_GET_LENGTH(result) > 0 &&
1579 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001580 goto fail;
1581 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001582 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001583 if (result == NULL)
1584 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001585 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001586 }
1587 if (chunks != NULL) {
1588 if (result != NULL && PyList_Append(chunks, result) < 0)
1589 goto fail;
1590 Py_CLEAR(result);
1591 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1592 if (result == NULL)
1593 goto fail;
1594 Py_CLEAR(chunks);
1595 }
1596 return result;
1597 }
1598 fail:
1599 Py_XDECREF(result);
1600 Py_XDECREF(chunks);
1601 return NULL;
1602}
1603
1604
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001605/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001606 that is to the NUL character. Otherwise the function will produce
1607 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001608static char *
1609find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001610{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001611 if (kind == PyUnicode_1BYTE_KIND) {
1612 assert(ch < 256);
1613 return (char *) memchr((void *) s, (char) ch, end - s);
1614 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001615 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001616 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001617 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619 return s;
1620 if (s == end)
1621 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001622 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623 }
1624}
1625
1626Py_ssize_t
1627_PyIO_find_line_ending(
1628 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001629 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001631 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632
1633 if (translated) {
1634 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001635 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001636 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001637 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001638 else {
1639 *consumed = len;
1640 return -1;
1641 }
1642 }
1643 else if (universal) {
1644 /* Universal newline search. Find any of \r, \r\n, \n
1645 * The decoder ensures that \r\n are not split in two pieces
1646 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001647 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001649 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001650 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001651 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001653 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 if (s >= end) {
1655 *consumed = len;
1656 return -1;
1657 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001658 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001659 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001661 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001662 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001663 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001664 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001666 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 }
1668 }
1669 }
1670 else {
1671 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001672 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1673 char *nl = PyUnicode_DATA(readnl);
1674 /* Assume that readnl is an ASCII character. */
1675 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001677 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001679 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 *consumed = len;
1681 return -1;
1682 }
1683 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001684 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001685 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 if (e < s)
1688 e = s;
1689 while (s < e) {
1690 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 if (pos == NULL || pos >= e)
1693 break;
1694 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001695 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 break;
1697 }
1698 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001699 return (pos - start)/kind + readnl_len;
1700 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001702 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 if (pos == NULL)
1704 *consumed = len;
1705 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001706 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001707 return -1;
1708 }
1709 }
1710}
1711
1712static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001713_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714{
1715 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1716 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1717 int res;
1718
1719 CHECK_CLOSED(self);
1720
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001721 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 return NULL;
1723
1724 chunked = 0;
1725
1726 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001727 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001728 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 Py_ssize_t consumed = 0;
1731
1732 /* First, get some data if necessary */
1733 res = 1;
1734 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001735 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001736 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001737 if (res < 0) {
1738 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1739 when EINTR occurs so we needn't do it ourselves. */
1740 if (_PyIO_trap_eintr()) {
1741 continue;
1742 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001743 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001744 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 if (res == 0)
1746 break;
1747 }
1748 if (res == 0) {
1749 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001750 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001751 Py_CLEAR(self->snapshot);
1752 start = endpos = offset_to_buffer = 0;
1753 break;
1754 }
1755
1756 if (remaining == NULL) {
1757 line = self->decoded_chars;
1758 start = self->decoded_chars_used;
1759 offset_to_buffer = 0;
1760 Py_INCREF(line);
1761 }
1762 else {
1763 assert(self->decoded_chars_used == 0);
1764 line = PyUnicode_Concat(remaining, self->decoded_chars);
1765 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001766 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001767 Py_CLEAR(remaining);
1768 if (line == NULL)
1769 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001770 if (PyUnicode_READY(line) == -1)
1771 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 }
1773
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001774 ptr = PyUnicode_DATA(line);
1775 line_len = PyUnicode_GET_LENGTH(line);
1776 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777
1778 endpos = _PyIO_find_line_ending(
1779 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001780 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001781 ptr + kind * start,
1782 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001783 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 if (endpos >= 0) {
1785 endpos += start;
1786 if (limit >= 0 && (endpos - start) + chunked >= limit)
1787 endpos = start + limit - chunked;
1788 break;
1789 }
1790
1791 /* We can put aside up to `endpos` */
1792 endpos = consumed + start;
1793 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1794 /* Didn't find line ending, but reached length limit */
1795 endpos = start + limit - chunked;
1796 break;
1797 }
1798
1799 if (endpos > start) {
1800 /* No line ending seen yet - put aside current data */
1801 PyObject *s;
1802 if (chunks == NULL) {
1803 chunks = PyList_New(0);
1804 if (chunks == NULL)
1805 goto error;
1806 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001807 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001808 if (s == NULL)
1809 goto error;
1810 if (PyList_Append(chunks, s) < 0) {
1811 Py_DECREF(s);
1812 goto error;
1813 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001814 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 Py_DECREF(s);
1816 }
1817 /* There may be some remaining bytes we'll have to prepend to the
1818 next chunk of data */
1819 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001820 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001821 if (remaining == NULL)
1822 goto error;
1823 }
1824 Py_CLEAR(line);
1825 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001826 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001827 }
1828
1829 if (line != NULL) {
1830 /* Our line ends in the current buffer */
1831 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001832 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1833 PyObject *s = PyUnicode_Substring(line, start, endpos);
1834 Py_CLEAR(line);
1835 if (s == NULL)
1836 goto error;
1837 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001838 }
1839 }
1840 if (remaining != NULL) {
1841 if (chunks == NULL) {
1842 chunks = PyList_New(0);
1843 if (chunks == NULL)
1844 goto error;
1845 }
1846 if (PyList_Append(chunks, remaining) < 0)
1847 goto error;
1848 Py_CLEAR(remaining);
1849 }
1850 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001851 if (line != NULL) {
1852 if (PyList_Append(chunks, line) < 0)
1853 goto error;
1854 Py_DECREF(line);
1855 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001856 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1857 if (line == NULL)
1858 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001859 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001860 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001861 if (line == NULL) {
1862 Py_INCREF(_PyIO_empty_str);
1863 line = _PyIO_empty_str;
1864 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865
1866 return line;
1867
1868 error:
1869 Py_XDECREF(chunks);
1870 Py_XDECREF(remaining);
1871 Py_XDECREF(line);
1872 return NULL;
1873}
1874
1875static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001876textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001877{
1878 Py_ssize_t limit = -1;
1879
1880 CHECK_INITIALIZED(self);
1881 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1882 return NULL;
1883 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001884 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001885}
1886
1887/* Seek and Tell */
1888
1889typedef struct {
1890 Py_off_t start_pos;
1891 int dec_flags;
1892 int bytes_to_feed;
1893 int chars_to_skip;
1894 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001895} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001896
1897/*
1898 To speed up cookie packing/unpacking, we store the fields in a temporary
1899 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1900 The following macros define at which offsets in the intermediary byte
1901 string the various CookieStruct fields will be stored.
1902 */
1903
1904#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1905
1906#if defined(WORDS_BIGENDIAN)
1907
1908# define IS_LITTLE_ENDIAN 0
1909
1910/* We want the least significant byte of start_pos to also be the least
1911 significant byte of the cookie, which means that in big-endian mode we
1912 must copy the fields in reverse order. */
1913
1914# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1915# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1916# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1917# define OFF_CHARS_TO_SKIP (sizeof(char))
1918# define OFF_NEED_EOF 0
1919
1920#else
1921
1922# define IS_LITTLE_ENDIAN 1
1923
1924/* Little-endian mode: the least significant byte of start_pos will
1925 naturally end up the least significant byte of the cookie. */
1926
1927# define OFF_START_POS 0
1928# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1929# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1930# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1931# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1932
1933#endif
1934
1935static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001936textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001937{
1938 unsigned char buffer[COOKIE_BUF_LEN];
1939 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1940 if (cookieLong == NULL)
1941 return -1;
1942
1943 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1944 IS_LITTLE_ENDIAN, 0) < 0) {
1945 Py_DECREF(cookieLong);
1946 return -1;
1947 }
1948 Py_DECREF(cookieLong);
1949
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001950 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1951 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1952 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1953 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1954 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001955
1956 return 0;
1957}
1958
1959static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001960textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001961{
1962 unsigned char buffer[COOKIE_BUF_LEN];
1963
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001964 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1965 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1966 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1967 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1968 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969
1970 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1971}
1972#undef IS_LITTLE_ENDIAN
1973
1974static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001975_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976{
1977 PyObject *res;
1978 /* When seeking to the start of the stream, we call decoder.reset()
1979 rather than decoder.getstate().
1980 This is for a few decoders such as utf-16 for which the state value
1981 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1982 utf-16, that we are expecting a BOM).
1983 */
1984 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1985 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1986 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001987 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1988 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989 if (res == NULL)
1990 return -1;
1991 Py_DECREF(res);
1992 return 0;
1993}
1994
Antoine Pitroue4501852009-05-14 18:55:55 +00001995static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001996_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001997{
1998 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001999 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002000 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2001 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2002 self->encoding_start_of_stream = 1;
2003 }
2004 else {
2005 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2006 _PyIO_zero, NULL);
2007 self->encoding_start_of_stream = 0;
2008 }
2009 if (res == NULL)
2010 return -1;
2011 Py_DECREF(res);
2012 return 0;
2013}
2014
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002015static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002016textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017{
2018 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002019 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002020 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 PyObject *res;
2022 int cmp;
2023
2024 CHECK_INITIALIZED(self);
2025
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002026 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2027 return NULL;
2028 CHECK_CLOSED(self);
2029
2030 Py_INCREF(cookieObj);
2031
2032 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002033 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 goto fail;
2035 }
2036
2037 if (whence == 1) {
2038 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002039 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002040 if (cmp < 0)
2041 goto fail;
2042
2043 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002044 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045 goto fail;
2046 }
2047
2048 /* Seeking to the current position should attempt to
2049 * sync the underlying buffer with the current position.
2050 */
2051 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002052 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (cookieObj == NULL)
2054 goto fail;
2055 }
2056 else if (whence == 2) {
2057 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002058 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059 if (cmp < 0)
2060 goto fail;
2061
2062 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002063 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 goto fail;
2065 }
2066
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002067 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 if (res == NULL)
2069 goto fail;
2070 Py_DECREF(res);
2071
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002072 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 Py_CLEAR(self->snapshot);
2074 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002075 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002076 if (res == NULL)
2077 goto fail;
2078 Py_DECREF(res);
2079 }
2080
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002081 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002082 Py_XDECREF(cookieObj);
2083 return res;
2084 }
2085 else if (whence != 0) {
2086 PyErr_Format(PyExc_ValueError,
2087 "invalid whence (%d, should be 0, 1 or 2)", whence);
2088 goto fail;
2089 }
2090
Antoine Pitroue4501852009-05-14 18:55:55 +00002091 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 if (cmp < 0)
2093 goto fail;
2094
2095 if (cmp == 1) {
2096 PyErr_Format(PyExc_ValueError,
2097 "negative seek position %R", cookieObj);
2098 goto fail;
2099 }
2100
2101 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2102 if (res == NULL)
2103 goto fail;
2104 Py_DECREF(res);
2105
2106 /* The strategy of seek() is to go back to the safe start point
2107 * and replay the effect of read(chars_to_skip) from there.
2108 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002109 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 goto fail;
2111
2112 /* Seek back to the safe start point. */
2113 posobj = PyLong_FromOff_t(cookie.start_pos);
2114 if (posobj == NULL)
2115 goto fail;
2116 res = PyObject_CallMethodObjArgs(self->buffer,
2117 _PyIO_str_seek, posobj, NULL);
2118 Py_DECREF(posobj);
2119 if (res == NULL)
2120 goto fail;
2121 Py_DECREF(res);
2122
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002123 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124 Py_CLEAR(self->snapshot);
2125
2126 /* Restore the decoder to its state from the safe start point. */
2127 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002128 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002129 goto fail;
2130 }
2131
2132 if (cookie.chars_to_skip) {
2133 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002134 PyObject *input_chunk = _PyObject_CallMethodId(
2135 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 PyObject *decoded;
2137
2138 if (input_chunk == NULL)
2139 goto fail;
2140
2141 assert (PyBytes_Check(input_chunk));
2142
2143 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2144 if (self->snapshot == NULL) {
2145 Py_DECREF(input_chunk);
2146 goto fail;
2147 }
2148
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002149 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2150 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151
2152 if (decoded == NULL)
2153 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002154 if (PyUnicode_READY(decoded) == -1) {
2155 Py_DECREF(decoded);
2156 goto fail;
2157 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002159 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002160
2161 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002162 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2164 goto fail;
2165 }
2166 self->decoded_chars_used = cookie.chars_to_skip;
2167 }
2168 else {
2169 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2170 if (self->snapshot == NULL)
2171 goto fail;
2172 }
2173
Antoine Pitroue4501852009-05-14 18:55:55 +00002174 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2175 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002176 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002177 goto fail;
2178 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002179 return cookieObj;
2180 fail:
2181 Py_XDECREF(cookieObj);
2182 return NULL;
2183
2184}
2185
2186static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002187textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188{
2189 PyObject *res;
2190 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002191 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 PyObject *next_input;
2193 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002194 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195 PyObject *saved_state = NULL;
2196 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002197 char *dec_buffer;
2198 Py_ssize_t dec_buffer_len;
2199 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200
2201 CHECK_INITIALIZED(self);
2202 CHECK_CLOSED(self);
2203
2204 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002205 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002206 goto fail;
2207 }
2208 if (!self->telling) {
2209 PyErr_SetString(PyExc_IOError,
2210 "telling position disabled by next() call");
2211 goto fail;
2212 }
2213
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002214 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002216 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002217 if (res == NULL)
2218 goto fail;
2219 Py_DECREF(res);
2220
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002221 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 if (posobj == NULL)
2223 goto fail;
2224
2225 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002226 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227 return posobj;
2228 }
2229
2230#if defined(HAVE_LARGEFILE_SUPPORT)
2231 cookie.start_pos = PyLong_AsLongLong(posobj);
2232#else
2233 cookie.start_pos = PyLong_AsLong(posobj);
2234#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002235 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 if (PyErr_Occurred())
2237 goto fail;
2238
2239 /* Skip backward to the snapshot point (see _read_chunk). */
2240 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2241 goto fail;
2242
2243 assert (PyBytes_Check(next_input));
2244
2245 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2246
2247 /* How many decoded characters have been used up since the snapshot? */
2248 if (self->decoded_chars_used == 0) {
2249 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002250 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002251 }
2252
2253 chars_to_skip = self->decoded_chars_used;
2254
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002255 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002256 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2257 _PyIO_str_getstate, NULL);
2258 if (saved_state == NULL)
2259 goto fail;
2260
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002261#define DECODER_GETSTATE() do { \
2262 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2263 _PyIO_str_getstate, NULL); \
2264 if (_state == NULL) \
2265 goto fail; \
2266 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2267 Py_DECREF(_state); \
2268 goto fail; \
2269 } \
2270 Py_DECREF(_state); \
2271 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002273 /* TODO: replace assert with exception */
2274#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002275 PyObject *_decoded = _PyObject_CallMethodId( \
2276 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002277 if (_decoded == NULL) \
2278 goto fail; \
2279 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002280 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002281 Py_DECREF(_decoded); \
2282 } while (0)
2283
2284 /* Fast search for an acceptable start point, close to our
2285 current pos */
2286 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2287 skip_back = 1;
2288 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2289 input = PyBytes_AS_STRING(next_input);
2290 while (skip_bytes > 0) {
2291 /* Decode up to temptative start point */
2292 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2293 goto fail;
2294 DECODER_DECODE(input, skip_bytes, chars_decoded);
2295 if (chars_decoded <= chars_to_skip) {
2296 DECODER_GETSTATE();
2297 if (dec_buffer_len == 0) {
2298 /* Before pos and no bytes buffered in decoder => OK */
2299 cookie.dec_flags = dec_flags;
2300 chars_to_skip -= chars_decoded;
2301 break;
2302 }
2303 /* Skip back by buffered amount and reset heuristic */
2304 skip_bytes -= dec_buffer_len;
2305 skip_back = 1;
2306 }
2307 else {
2308 /* We're too far ahead, skip back a bit */
2309 skip_bytes -= skip_back;
2310 skip_back *= 2;
2311 }
2312 }
2313 if (skip_bytes <= 0) {
2314 skip_bytes = 0;
2315 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2316 goto fail;
2317 }
2318
2319 /* Note our initial start point. */
2320 cookie.start_pos += skip_bytes;
2321 cookie.chars_to_skip = chars_to_skip;
2322 if (chars_to_skip == 0)
2323 goto finally;
2324
2325 /* We should be close to the desired position. Now feed the decoder one
2326 * byte at a time until we reach the `chars_to_skip` target.
2327 * As we go, note the nearest "safe start point" before the current
2328 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002329 * can safely start from there and advance to this location).
2330 */
2331 chars_decoded = 0;
2332 input = PyBytes_AS_STRING(next_input);
2333 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002334 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002335 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002336 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002337
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002338 DECODER_DECODE(input, 1, n);
2339 /* We got n chars for 1 byte */
2340 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002342 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002343
2344 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2345 /* Decoder buffer is empty, so this is a safe start point. */
2346 cookie.start_pos += cookie.bytes_to_feed;
2347 chars_to_skip -= chars_decoded;
2348 cookie.dec_flags = dec_flags;
2349 cookie.bytes_to_feed = 0;
2350 chars_decoded = 0;
2351 }
2352 if (chars_decoded >= chars_to_skip)
2353 break;
2354 input++;
2355 }
2356 if (input == input_end) {
2357 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002358 PyObject *decoded = _PyObject_CallMethodId(
2359 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002360 if (decoded == NULL)
2361 goto fail;
2362 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002363 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 Py_DECREF(decoded);
2365 cookie.need_eof = 1;
2366
2367 if (chars_decoded < chars_to_skip) {
2368 PyErr_SetString(PyExc_IOError,
2369 "can't reconstruct logical file position");
2370 goto fail;
2371 }
2372 }
2373
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002374finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002375 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 Py_DECREF(saved_state);
2377 if (res == NULL)
2378 return NULL;
2379 Py_DECREF(res);
2380
2381 /* The returned cookie corresponds to the last safe start point. */
2382 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002383 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002384
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002385fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386 if (saved_state) {
2387 PyObject *type, *value, *traceback;
2388 PyErr_Fetch(&type, &value, &traceback);
2389
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002390 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 Py_DECREF(saved_state);
2392 if (res == NULL)
2393 return NULL;
2394 Py_DECREF(res);
2395
2396 PyErr_Restore(type, value, traceback);
2397 }
2398 return NULL;
2399}
2400
2401static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002402textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403{
2404 PyObject *pos = Py_None;
2405 PyObject *res;
2406
2407 CHECK_INITIALIZED(self)
2408 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2409 return NULL;
2410 }
2411
2412 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2413 if (res == NULL)
2414 return NULL;
2415 Py_DECREF(res);
2416
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002417 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418}
2419
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002420static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002421textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002422{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002423 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002424
2425 CHECK_INITIALIZED(self);
2426
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002427 res = PyUnicode_FromString("<_io.TextIOWrapper");
2428 if (res == NULL)
2429 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002430 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002431 if (nameobj == NULL) {
2432 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2433 PyErr_Clear();
2434 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002435 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002436 }
2437 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002438 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002439 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002440 if (s == NULL)
2441 goto error;
2442 PyUnicode_AppendAndDel(&res, s);
2443 if (res == NULL)
2444 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002445 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002446 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002447 if (modeobj == NULL) {
2448 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2449 PyErr_Clear();
2450 else
2451 goto error;
2452 }
2453 else {
2454 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2455 Py_DECREF(modeobj);
2456 if (s == NULL)
2457 goto error;
2458 PyUnicode_AppendAndDel(&res, s);
2459 if (res == NULL)
2460 return NULL;
2461 }
2462 s = PyUnicode_FromFormat("%U encoding=%R>",
2463 res, self->encoding);
2464 Py_DECREF(res);
2465 return s;
2466error:
2467 Py_XDECREF(res);
2468 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002469}
2470
2471
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002472/* Inquiries */
2473
2474static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002475textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476{
2477 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002478 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479}
2480
2481static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002482textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483{
2484 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002485 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486}
2487
2488static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002489textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490{
2491 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002492 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493}
2494
2495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497{
2498 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002499 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500}
2501
2502static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002506 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507}
2508
2509static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002510textiowrapper_getstate(textio *self, PyObject *args)
2511{
2512 PyErr_Format(PyExc_TypeError,
2513 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2514 return NULL;
2515}
2516
2517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002518textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002519{
2520 CHECK_INITIALIZED(self);
2521 CHECK_CLOSED(self);
2522 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002523 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002525 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526}
2527
2528static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002529textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530{
2531 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002532 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534
Antoine Pitrou6be88762010-05-03 16:48:20 +00002535 res = textiowrapper_closed_get(self, NULL);
2536 if (res == NULL)
2537 return NULL;
2538 r = PyObject_IsTrue(res);
2539 Py_DECREF(res);
2540 if (r < 0)
2541 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002542
Antoine Pitrou6be88762010-05-03 16:48:20 +00002543 if (r > 0) {
2544 Py_RETURN_NONE; /* stream already closed */
2545 }
2546 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002547 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002548 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002549 if (res)
2550 Py_DECREF(res);
2551 else
2552 PyErr_Clear();
2553 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002554 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002555 if (res == NULL) {
2556 return NULL;
2557 }
2558 else
2559 Py_DECREF(res);
2560
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002561 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002562 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563}
2564
2565static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002566textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567{
2568 PyObject *line;
2569
2570 CHECK_INITIALIZED(self);
2571
2572 self->telling = 0;
2573 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2574 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002575 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002576 }
2577 else {
2578 line = PyObject_CallMethodObjArgs((PyObject *)self,
2579 _PyIO_str_readline, NULL);
2580 if (line && !PyUnicode_Check(line)) {
2581 PyErr_Format(PyExc_IOError,
2582 "readline() should have returned an str object, "
2583 "not '%.200s'", Py_TYPE(line)->tp_name);
2584 Py_DECREF(line);
2585 return NULL;
2586 }
2587 }
2588
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002589 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002590 return NULL;
2591
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002592 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593 /* Reached EOF or would have blocked */
2594 Py_DECREF(line);
2595 Py_CLEAR(self->snapshot);
2596 self->telling = self->seekable;
2597 return NULL;
2598 }
2599
2600 return line;
2601}
2602
2603static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002604textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002605{
2606 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002607 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002608}
2609
2610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002611textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612{
2613 CHECK_INITIALIZED(self);
2614 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2615}
2616
2617static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002618textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619{
2620 PyObject *res;
2621 CHECK_INITIALIZED(self);
2622 if (self->decoder == NULL)
2623 Py_RETURN_NONE;
2624 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2625 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002626 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2627 PyErr_Clear();
2628 Py_RETURN_NONE;
2629 }
2630 else {
2631 return NULL;
2632 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633 }
2634 return res;
2635}
2636
2637static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002638textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002639{
2640 CHECK_INITIALIZED(self);
2641 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2642}
2643
2644static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002645textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002646{
2647 CHECK_INITIALIZED(self);
2648 return PyLong_FromSsize_t(self->chunk_size);
2649}
2650
2651static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002652textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653{
2654 Py_ssize_t n;
2655 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002656 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657 if (n == -1 && PyErr_Occurred())
2658 return -1;
2659 if (n <= 0) {
2660 PyErr_SetString(PyExc_ValueError,
2661 "a strictly positive integer is required");
2662 return -1;
2663 }
2664 self->chunk_size = n;
2665 return 0;
2666}
2667
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002668static PyMethodDef textiowrapper_methods[] = {
2669 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2670 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2671 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2672 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2673 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2674 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002676 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2677 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2678 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2679 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2680 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002681 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002683 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2684 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2685 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 {NULL, NULL}
2687};
2688
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689static PyMemberDef textiowrapper_members[] = {
2690 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2691 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2692 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 {NULL}
2694};
2695
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696static PyGetSetDef textiowrapper_getset[] = {
2697 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2698 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002699/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2700*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002701 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2702 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2703 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2704 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002705 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706};
2707
2708PyTypeObject PyTextIOWrapper_Type = {
2709 PyVarObject_HEAD_INIT(NULL, 0)
2710 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002711 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002712 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002713 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002714 0, /*tp_print*/
2715 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002716 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002717 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 0, /*tp_as_number*/
2720 0, /*tp_as_sequence*/
2721 0, /*tp_as_mapping*/
2722 0, /*tp_hash */
2723 0, /*tp_call*/
2724 0, /*tp_str*/
2725 0, /*tp_getattro*/
2726 0, /*tp_setattro*/
2727 0, /*tp_as_buffer*/
2728 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2729 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002730 textiowrapper_doc, /* tp_doc */
2731 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2732 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002733 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002734 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002735 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002736 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2737 textiowrapper_methods, /* tp_methods */
2738 textiowrapper_members, /* tp_members */
2739 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002740 0, /* tp_base */
2741 0, /* tp_dict */
2742 0, /* tp_descr_get */
2743 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002744 offsetof(textio, dict), /*tp_dictoffset*/
2745 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002746 0, /* tp_alloc */
2747 PyType_GenericNew, /* tp_new */
2748};