blob: ae105e53cc536451ca99232cac9cd1eda07657f8 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
633 "decoded or encoded with. It defaults to locale.getpreferredencoding.\n"
634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
638 "newline can be None, '', '\\n', '\\r', or '\\r\\n'. It controls the\n"
639 "handling of line endings. If it is None, universal newlines is\n"
640 "enabled. With this enabled, on input, the lines endings '\\n', '\\r',\n"
641 "or '\\r\\n' are translated to '\\n' before being returned to the\n"
642 "caller. Conversely, on output, '\\n' is translated to the system\n"
Éric Araujofab97662012-02-26 02:14:08 +0100643 "default line separator, os.linesep. If newline is any other of its\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "legal values, that newline becomes the newline when the file is read\n"
645 "and it is returned untranslated. On output, '\\n' is converted to the\n"
646 "newline.\n"
647 "\n"
648 "If line_buffering is True, a call to flush is implied when a call to\n"
649 "write contains a newline character."
650 );
651
652typedef PyObject *
653 (*encodefunc_t)(PyObject *, PyObject *);
654
655typedef struct
656{
657 PyObject_HEAD
658 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000659 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000660 Py_ssize_t chunk_size;
661 PyObject *buffer;
662 PyObject *encoding;
663 PyObject *encoder;
664 PyObject *decoder;
665 PyObject *readnl;
666 PyObject *errors;
667 const char *writenl; /* utf-8 encoded, NULL stands for \n */
668 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200669 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000670 char readuniversal;
671 char readtranslate;
672 char writetranslate;
673 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200674 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000675 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000676 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 /* Specialized encoding func (see below) */
678 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000679 /* Whether or not it's the start of the stream */
680 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000681
682 /* Reads and writes are internally buffered in order to speed things up.
683 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000684
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000685 Please also note that text to be written is first encoded before being
686 buffered. This is necessary so that encoding errors are immediately
687 reported to the caller, but it unfortunately means that the
688 IncrementalEncoder (whose encode() method is always written in Python)
689 becomes a bottleneck for small writes.
690 */
691 PyObject *decoded_chars; /* buffer for text returned from decoder */
692 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
693 PyObject *pending_bytes; /* list of bytes objects waiting to be
694 written, or NULL */
695 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000696
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000697 /* snapshot is either None, or a tuple (dec_flags, next_input) where
698 * dec_flags is the second (integer) item of the decoder state and
699 * next_input is the chunk of input bytes that comes next after the
700 * snapshot point. We use this to reconstruct decoder states in tell().
701 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000702 PyObject *snapshot;
703 /* Bytes-to-characters ratio for the current chunk. Serves as input for
704 the heuristic in tell(). */
705 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706
707 /* Cache raw object if it's a FileIO object */
708 PyObject *raw;
709
710 PyObject *weakreflist;
711 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000712} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714
715/* A couple of specialized cases in order to bypass the slow incremental
716 encoding methods for the most popular encodings. */
717
718static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200721 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000722}
723
724static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000725utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100727 return _PyUnicode_EncodeUTF16(text,
728 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitroue4501852009-05-14 18:55:55 +0000741 if (!self->encoding_start_of_stream) {
742 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000744 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000745#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000746 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100749 return _PyUnicode_EncodeUTF16(text,
750 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751}
752
Antoine Pitroue4501852009-05-14 18:55:55 +0000753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000755{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF32(text,
757 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000758}
759
760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
770 if (!self->encoding_start_of_stream) {
771 /* Skip the BOM and use native byte ordering */
772#if defined(WORDS_BIGENDIAN)
773 return utf32be_encode(self, text);
774#else
775 return utf32le_encode(self, text);
776#endif
777 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100778 return _PyUnicode_EncodeUTF32(text,
779 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000780}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000781
782static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000783utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000784{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200785 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000786}
787
788static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000789latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000790{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200791 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000792}
793
794/* Map normalized encoding names onto the specialized encoding funcs */
795
796typedef struct {
797 const char *name;
798 encodefunc_t encodefunc;
799} encodefuncentry;
800
Antoine Pitrou24f36292009-03-28 22:16:42 +0000801static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802 {"ascii", (encodefunc_t) ascii_encode},
803 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000804 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000805 {"utf-16-be", (encodefunc_t) utf16be_encode},
806 {"utf-16-le", (encodefunc_t) utf16le_encode},
807 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000808 {"utf-32-be", (encodefunc_t) utf32be_encode},
809 {"utf-32-le", (encodefunc_t) utf32le_encode},
810 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000811 {NULL, NULL}
812};
813
814
815static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000816textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000817{
818 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200819 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 NULL};
821 PyObject *buffer, *raw;
822 char *encoding = NULL;
823 char *errors = NULL;
824 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200825 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000826 _PyIO_State *state = IO_STATE;
827
828 PyObject *res;
829 int r;
830
831 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000832 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200833 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200835 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000836 return -1;
837
838 if (newline && newline[0] != '\0'
839 && !(newline[0] == '\n' && newline[1] == '\0')
840 && !(newline[0] == '\r' && newline[1] == '\0')
841 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
842 PyErr_Format(PyExc_ValueError,
843 "illegal newline value: %s", newline);
844 return -1;
845 }
846
847 Py_CLEAR(self->buffer);
848 Py_CLEAR(self->encoding);
849 Py_CLEAR(self->encoder);
850 Py_CLEAR(self->decoder);
851 Py_CLEAR(self->readnl);
852 Py_CLEAR(self->decoded_chars);
853 Py_CLEAR(self->pending_bytes);
854 Py_CLEAR(self->snapshot);
855 Py_CLEAR(self->errors);
856 Py_CLEAR(self->raw);
857 self->decoded_chars_used = 0;
858 self->pending_bytes_count = 0;
859 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000860 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000861
862 if (encoding == NULL) {
863 /* Try os.device_encoding(fileno) */
864 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200865 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000866 /* Ignore only AttributeError and UnsupportedOperation */
867 if (fileno == NULL) {
868 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
869 PyErr_ExceptionMatches(state->unsupported_operation)) {
870 PyErr_Clear();
871 }
872 else {
873 goto error;
874 }
875 }
876 else {
Brett Cannonefb00c02012-02-29 18:31:31 -0500877 int fd = (int) PyLong_AsLong(fileno);
878 Py_DECREF(fileno);
879 if (fd == -1 && PyErr_Occurred()) {
880 goto error;
881 }
882
883 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 if (self->encoding == NULL)
885 goto error;
886 else if (!PyUnicode_Check(self->encoding))
887 Py_CLEAR(self->encoding);
888 }
889 }
890 if (encoding == NULL && self->encoding == NULL) {
891 if (state->locale_module == NULL) {
892 state->locale_module = PyImport_ImportModule("locale");
893 if (state->locale_module == NULL)
894 goto catch_ImportError;
895 else
896 goto use_locale;
897 }
898 else {
899 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200900 self->encoding = _PyObject_CallMethodId(
901 state->locale_module, &PyId_getpreferredencoding, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 if (self->encoding == NULL) {
903 catch_ImportError:
904 /*
905 Importing locale can raise a ImportError because of
906 _functools, and locale.getpreferredencoding can raise a
907 ImportError if _locale is not available. These will happen
908 during module building.
909 */
910 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
911 PyErr_Clear();
912 self->encoding = PyUnicode_FromString("ascii");
913 }
914 else
915 goto error;
916 }
917 else if (!PyUnicode_Check(self->encoding))
918 Py_CLEAR(self->encoding);
919 }
920 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000921 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000922 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000923 if (encoding == NULL)
924 goto error;
925 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000926 else if (encoding != NULL) {
927 self->encoding = PyUnicode_FromString(encoding);
928 if (self->encoding == NULL)
929 goto error;
930 }
931 else {
932 PyErr_SetString(PyExc_IOError,
933 "could not determine default encoding");
934 }
935
936 if (errors == NULL)
937 errors = "strict";
938 self->errors = PyBytes_FromString(errors);
939 if (self->errors == NULL)
940 goto error;
941
942 self->chunk_size = 8192;
943 self->readuniversal = (newline == NULL || newline[0] == '\0');
944 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200945 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000946 self->readtranslate = (newline == NULL);
947 if (newline) {
948 self->readnl = PyUnicode_FromString(newline);
949 if (self->readnl == NULL)
950 return -1;
951 }
952 self->writetranslate = (newline == NULL || newline[0] != '\0');
953 if (!self->readuniversal && self->readnl) {
954 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000955 if (self->writenl == NULL)
956 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000957 if (!strcmp(self->writenl, "\n"))
958 self->writenl = NULL;
959 }
960#ifdef MS_WINDOWS
961 else
962 self->writenl = "\r\n";
963#endif
964
965 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200966 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000967 if (res == NULL)
968 goto error;
969 r = PyObject_IsTrue(res);
970 Py_DECREF(res);
971 if (r == -1)
972 goto error;
973 if (r == 1) {
974 self->decoder = PyCodec_IncrementalDecoder(
975 encoding, errors);
976 if (self->decoder == NULL)
977 goto error;
978
979 if (self->readuniversal) {
980 PyObject *incrementalDecoder = PyObject_CallFunction(
981 (PyObject *)&PyIncrementalNewlineDecoder_Type,
982 "Oi", self->decoder, (int)self->readtranslate);
983 if (incrementalDecoder == NULL)
984 goto error;
985 Py_CLEAR(self->decoder);
986 self->decoder = incrementalDecoder;
987 }
988 }
989
990 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200991 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000992 if (res == NULL)
993 goto error;
994 r = PyObject_IsTrue(res);
995 Py_DECREF(res);
996 if (r == -1)
997 goto error;
998 if (r == 1) {
999 PyObject *ci;
1000 self->encoder = PyCodec_IncrementalEncoder(
1001 encoding, errors);
1002 if (self->encoder == NULL)
1003 goto error;
1004 /* Get the normalized named of the codec */
1005 ci = _PyCodec_Lookup(encoding);
1006 if (ci == NULL)
1007 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001008 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001010 if (res == NULL) {
1011 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1012 PyErr_Clear();
1013 else
1014 goto error;
1015 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 else if (PyUnicode_Check(res)) {
1017 encodefuncentry *e = encodefuncs;
1018 while (e->name != NULL) {
1019 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1020 self->encodefunc = e->encodefunc;
1021 break;
1022 }
1023 e++;
1024 }
1025 }
1026 Py_XDECREF(res);
1027 }
1028
1029 self->buffer = buffer;
1030 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001031
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1033 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1034 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001035 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001036 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001037 if (raw == NULL) {
1038 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1039 PyErr_Clear();
1040 else
1041 goto error;
1042 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 else if (Py_TYPE(raw) == &PyFileIO_Type)
1044 self->raw = raw;
1045 else
1046 Py_DECREF(raw);
1047 }
1048
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001049 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 if (res == NULL)
1051 goto error;
1052 self->seekable = self->telling = PyObject_IsTrue(res);
1053 Py_DECREF(res);
1054
Martin v. Löwis767046a2011-10-14 15:35:36 +02001055 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001056
Antoine Pitroue4501852009-05-14 18:55:55 +00001057 self->encoding_start_of_stream = 0;
1058 if (self->seekable && self->encoder) {
1059 PyObject *cookieObj;
1060 int cmp;
1061
1062 self->encoding_start_of_stream = 1;
1063
1064 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1065 if (cookieObj == NULL)
1066 goto error;
1067
1068 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1069 Py_DECREF(cookieObj);
1070 if (cmp < 0) {
1071 goto error;
1072 }
1073
1074 if (cmp == 0) {
1075 self->encoding_start_of_stream = 0;
1076 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1077 _PyIO_zero, NULL);
1078 if (res == NULL)
1079 goto error;
1080 Py_DECREF(res);
1081 }
1082 }
1083
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001084 self->ok = 1;
1085 return 0;
1086
1087 error:
1088 return -1;
1089}
1090
1091static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001092_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001093{
1094 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1095 return -1;
1096 self->ok = 0;
1097 Py_CLEAR(self->buffer);
1098 Py_CLEAR(self->encoding);
1099 Py_CLEAR(self->encoder);
1100 Py_CLEAR(self->decoder);
1101 Py_CLEAR(self->readnl);
1102 Py_CLEAR(self->decoded_chars);
1103 Py_CLEAR(self->pending_bytes);
1104 Py_CLEAR(self->snapshot);
1105 Py_CLEAR(self->errors);
1106 Py_CLEAR(self->raw);
1107 return 0;
1108}
1109
1110static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001111textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001112{
Antoine Pitroue033e062010-10-29 10:38:18 +00001113 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001114 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001115 return;
1116 _PyObject_GC_UNTRACK(self);
1117 if (self->weakreflist != NULL)
1118 PyObject_ClearWeakRefs((PyObject *)self);
1119 Py_CLEAR(self->dict);
1120 Py_TYPE(self)->tp_free((PyObject *)self);
1121}
1122
1123static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125{
1126 Py_VISIT(self->buffer);
1127 Py_VISIT(self->encoding);
1128 Py_VISIT(self->encoder);
1129 Py_VISIT(self->decoder);
1130 Py_VISIT(self->readnl);
1131 Py_VISIT(self->decoded_chars);
1132 Py_VISIT(self->pending_bytes);
1133 Py_VISIT(self->snapshot);
1134 Py_VISIT(self->errors);
1135 Py_VISIT(self->raw);
1136
1137 Py_VISIT(self->dict);
1138 return 0;
1139}
1140
1141static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001142textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001143{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001144 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145 return -1;
1146 Py_CLEAR(self->dict);
1147 return 0;
1148}
1149
1150static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001151textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152
1153/* This macro takes some shortcuts to make the common case faster. */
1154#define CHECK_CLOSED(self) \
1155 do { \
1156 int r; \
1157 PyObject *_res; \
1158 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1159 if (self->raw != NULL) \
1160 r = _PyFileIO_closed(self->raw); \
1161 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001162 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163 if (_res == NULL) \
1164 return NULL; \
1165 r = PyObject_IsTrue(_res); \
1166 Py_DECREF(_res); \
1167 if (r < 0) \
1168 return NULL; \
1169 } \
1170 if (r > 0) { \
1171 PyErr_SetString(PyExc_ValueError, \
1172 "I/O operation on closed file."); \
1173 return NULL; \
1174 } \
1175 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001176 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177 return NULL; \
1178 } while (0)
1179
1180#define CHECK_INITIALIZED(self) \
1181 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001182 if (self->detached) { \
1183 PyErr_SetString(PyExc_ValueError, \
1184 "underlying buffer has been detached"); \
1185 } else { \
1186 PyErr_SetString(PyExc_ValueError, \
1187 "I/O operation on uninitialized object"); \
1188 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001189 return NULL; \
1190 }
1191
1192#define CHECK_INITIALIZED_INT(self) \
1193 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001194 if (self->detached) { \
1195 PyErr_SetString(PyExc_ValueError, \
1196 "underlying buffer has been detached"); \
1197 } else { \
1198 PyErr_SetString(PyExc_ValueError, \
1199 "I/O operation on uninitialized object"); \
1200 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001201 return -1; \
1202 }
1203
1204
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001205static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001206textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001207{
1208 PyObject *buffer, *res;
1209 CHECK_INITIALIZED(self);
1210 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1211 if (res == NULL)
1212 return NULL;
1213 Py_DECREF(res);
1214 buffer = self->buffer;
1215 self->buffer = NULL;
1216 self->detached = 1;
1217 self->ok = 0;
1218 return buffer;
1219}
1220
Antoine Pitrou24f36292009-03-28 22:16:42 +00001221/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 underlying buffered object, though. */
1223static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001224_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001225{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001226 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001227
1228 if (self->pending_bytes == NULL)
1229 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001230
1231 pending = self->pending_bytes;
1232 Py_INCREF(pending);
1233 self->pending_bytes_count = 0;
1234 Py_CLEAR(self->pending_bytes);
1235
1236 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1237 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001238 if (b == NULL)
1239 return -1;
1240 ret = PyObject_CallMethodObjArgs(self->buffer,
1241 _PyIO_str_write, b, NULL);
1242 Py_DECREF(b);
1243 if (ret == NULL)
1244 return -1;
1245 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246 return 0;
1247}
1248
1249static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001250textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001251{
1252 PyObject *ret;
1253 PyObject *text; /* owned reference */
1254 PyObject *b;
1255 Py_ssize_t textlen;
1256 int haslf = 0;
1257 int needflush = 0;
1258
1259 CHECK_INITIALIZED(self);
1260
1261 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1262 return NULL;
1263 }
1264
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001265 if (PyUnicode_READY(text) == -1)
1266 return NULL;
1267
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001268 CHECK_CLOSED(self);
1269
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001270 if (self->encoder == NULL)
1271 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001272
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001273 Py_INCREF(text);
1274
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001276
1277 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279 haslf = 1;
1280
1281 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001282 PyObject *newtext = _PyObject_CallMethodId(
1283 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001284 Py_DECREF(text);
1285 if (newtext == NULL)
1286 return NULL;
1287 text = newtext;
1288 }
1289
Antoine Pitroue96ec682011-07-23 21:46:35 +02001290 if (self->write_through)
1291 needflush = 1;
1292 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001293 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001294 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001295 needflush = 1;
1296
1297 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001298 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001300 self->encoding_start_of_stream = 0;
1301 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 else
1303 b = PyObject_CallMethodObjArgs(self->encoder,
1304 _PyIO_str_encode, text, NULL);
1305 Py_DECREF(text);
1306 if (b == NULL)
1307 return NULL;
1308
1309 if (self->pending_bytes == NULL) {
1310 self->pending_bytes = PyList_New(0);
1311 if (self->pending_bytes == NULL) {
1312 Py_DECREF(b);
1313 return NULL;
1314 }
1315 self->pending_bytes_count = 0;
1316 }
1317 if (PyList_Append(self->pending_bytes, b) < 0) {
1318 Py_DECREF(b);
1319 return NULL;
1320 }
1321 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1322 Py_DECREF(b);
1323 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001324 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 return NULL;
1326 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001327
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001328 if (needflush) {
1329 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1330 if (ret == NULL)
1331 return NULL;
1332 Py_DECREF(ret);
1333 }
1334
1335 Py_CLEAR(self->snapshot);
1336
1337 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001338 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001339 if (ret == NULL)
1340 return NULL;
1341 Py_DECREF(ret);
1342 }
1343
1344 return PyLong_FromSsize_t(textlen);
1345}
1346
1347/* Steal a reference to chars and store it in the decoded_char buffer;
1348 */
1349static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001350textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351{
1352 Py_CLEAR(self->decoded_chars);
1353 self->decoded_chars = chars;
1354 self->decoded_chars_used = 0;
1355}
1356
1357static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001358textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001359{
1360 PyObject *chars;
1361 Py_ssize_t avail;
1362
1363 if (self->decoded_chars == NULL)
1364 return PyUnicode_FromStringAndSize(NULL, 0);
1365
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001366 /* decoded_chars is guaranteed to be "ready". */
1367 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001368 - self->decoded_chars_used);
1369
1370 assert(avail >= 0);
1371
1372 if (n < 0 || n > avail)
1373 n = avail;
1374
1375 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 chars = PyUnicode_Substring(self->decoded_chars,
1377 self->decoded_chars_used,
1378 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379 if (chars == NULL)
1380 return NULL;
1381 }
1382 else {
1383 chars = self->decoded_chars;
1384 Py_INCREF(chars);
1385 }
1386
1387 self->decoded_chars_used += n;
1388 return chars;
1389}
1390
1391/* Read and decode the next chunk of data from the BufferedReader.
1392 */
1393static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001394textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001395{
1396 PyObject *dec_buffer = NULL;
1397 PyObject *dec_flags = NULL;
1398 PyObject *input_chunk = NULL;
1399 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001400 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001401 int eof;
1402
1403 /* The return value is True unless EOF was reached. The decoded string is
1404 * placed in self._decoded_chars (replacing its previous value). The
1405 * entire input chunk is sent to the decoder, though some of it may remain
1406 * buffered in the decoder, yet to be converted.
1407 */
1408
1409 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001410 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 return -1;
1412 }
1413
1414 if (self->telling) {
1415 /* To prepare for tell(), we need to snapshot a point in the file
1416 * where the decoder's input buffer is empty.
1417 */
1418
1419 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1420 _PyIO_str_getstate, NULL);
1421 if (state == NULL)
1422 return -1;
1423 /* Given this, we know there was a valid snapshot point
1424 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1425 */
1426 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1427 Py_DECREF(state);
1428 return -1;
1429 }
1430 Py_INCREF(dec_buffer);
1431 Py_INCREF(dec_flags);
1432 Py_DECREF(state);
1433 }
1434
1435 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001436 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001437 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001438 }
1439 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001440 if (chunk_size == NULL)
1441 goto fail;
1442 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001443 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1444 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001445 Py_DECREF(chunk_size);
1446 if (input_chunk == NULL)
1447 goto fail;
1448 assert(PyBytes_Check(input_chunk));
1449
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001450 nbytes = PyBytes_Size(input_chunk);
1451 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452
1453 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1454 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1455 self->decoder, input_chunk, eof);
1456 }
1457 else {
1458 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1459 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1460 }
1461
1462 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1463 if (decoded_chars == NULL)
1464 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001465 if (PyUnicode_READY(decoded_chars) == -1)
1466 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001467 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001468 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001469 if (nchars > 0)
1470 self->b2cratio = (double) nbytes / nchars;
1471 else
1472 self->b2cratio = 0.0;
1473 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001474 eof = 0;
1475
1476 if (self->telling) {
1477 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1478 * next input to be decoded is dec_buffer + input_chunk.
1479 */
1480 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1481 if (next_input == NULL)
1482 goto fail;
1483 assert (PyBytes_Check(next_input));
1484 Py_DECREF(dec_buffer);
1485 Py_CLEAR(self->snapshot);
1486 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1487 }
1488 Py_DECREF(input_chunk);
1489
1490 return (eof == 0);
1491
1492 fail:
1493 Py_XDECREF(dec_buffer);
1494 Py_XDECREF(dec_flags);
1495 Py_XDECREF(input_chunk);
1496 return -1;
1497}
1498
1499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001500textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501{
1502 Py_ssize_t n = -1;
1503 PyObject *result = NULL, *chunks = NULL;
1504
1505 CHECK_INITIALIZED(self);
1506
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001507 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508 return NULL;
1509
1510 CHECK_CLOSED(self);
1511
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001512 if (self->decoder == NULL)
1513 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001514
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001515 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001516 return NULL;
1517
1518 if (n < 0) {
1519 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001520 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 PyObject *decoded;
1522 if (bytes == NULL)
1523 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001524
1525 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1526 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1527 bytes, 1);
1528 else
1529 decoded = PyObject_CallMethodObjArgs(
1530 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 Py_DECREF(bytes);
1532 if (decoded == NULL)
1533 goto fail;
1534
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001535 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001536
1537 if (result == NULL) {
1538 Py_DECREF(decoded);
1539 return NULL;
1540 }
1541
1542 PyUnicode_AppendAndDel(&result, decoded);
1543 if (result == NULL)
1544 goto fail;
1545
1546 Py_CLEAR(self->snapshot);
1547 return result;
1548 }
1549 else {
1550 int res = 1;
1551 Py_ssize_t remaining = n;
1552
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 if (result == NULL)
1555 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001556 if (PyUnicode_READY(result) == -1)
1557 goto fail;
1558 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559
1560 /* Keep reading chunks until we have n characters to return */
1561 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001562 res = textiowrapper_read_chunk(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001563 if (res < 0)
1564 goto fail;
1565 if (res == 0) /* EOF */
1566 break;
1567 if (chunks == NULL) {
1568 chunks = PyList_New(0);
1569 if (chunks == NULL)
1570 goto fail;
1571 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001572 if (PyUnicode_GET_LENGTH(result) > 0 &&
1573 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574 goto fail;
1575 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001576 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001577 if (result == NULL)
1578 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001579 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001580 }
1581 if (chunks != NULL) {
1582 if (result != NULL && PyList_Append(chunks, result) < 0)
1583 goto fail;
1584 Py_CLEAR(result);
1585 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1586 if (result == NULL)
1587 goto fail;
1588 Py_CLEAR(chunks);
1589 }
1590 return result;
1591 }
1592 fail:
1593 Py_XDECREF(result);
1594 Py_XDECREF(chunks);
1595 return NULL;
1596}
1597
1598
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001599/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001600 that is to the NUL character. Otherwise the function will produce
1601 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001602static char *
1603find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001605 if (kind == PyUnicode_1BYTE_KIND) {
1606 assert(ch < 256);
1607 return (char *) memchr((void *) s, (char) ch, end - s);
1608 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001610 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001611 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001612 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 return s;
1614 if (s == end)
1615 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001616 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617 }
1618}
1619
1620Py_ssize_t
1621_PyIO_find_line_ending(
1622 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001623 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001625 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626
1627 if (translated) {
1628 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001629 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001631 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 else {
1633 *consumed = len;
1634 return -1;
1635 }
1636 }
1637 else if (universal) {
1638 /* Universal newline search. Find any of \r, \r\n, \n
1639 * The decoder ensures that \r\n are not split in two pieces
1640 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001641 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001643 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001645 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001646 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001647 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 if (s >= end) {
1649 *consumed = len;
1650 return -1;
1651 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001653 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001654 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001655 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001658 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001659 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001660 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 }
1662 }
1663 }
1664 else {
1665 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001666 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1667 char *nl = PyUnicode_DATA(readnl);
1668 /* Assume that readnl is an ASCII character. */
1669 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001673 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 *consumed = len;
1675 return -1;
1676 }
1677 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001678 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001679 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001680 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 if (e < s)
1682 e = s;
1683 while (s < e) {
1684 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001685 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 if (pos == NULL || pos >= e)
1687 break;
1688 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001689 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 break;
1691 }
1692 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001693 return (pos - start)/kind + readnl_len;
1694 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001695 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (pos == NULL)
1698 *consumed = len;
1699 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001700 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 return -1;
1702 }
1703 }
1704}
1705
1706static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001707_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708{
1709 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1710 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1711 int res;
1712
1713 CHECK_CLOSED(self);
1714
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001715 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 return NULL;
1717
1718 chunked = 0;
1719
1720 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001721 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001722 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001723 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724 Py_ssize_t consumed = 0;
1725
1726 /* First, get some data if necessary */
1727 res = 1;
1728 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001730 res = textiowrapper_read_chunk(self, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001731 if (res < 0)
1732 goto error;
1733 if (res == 0)
1734 break;
1735 }
1736 if (res == 0) {
1737 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001738 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 Py_CLEAR(self->snapshot);
1740 start = endpos = offset_to_buffer = 0;
1741 break;
1742 }
1743
1744 if (remaining == NULL) {
1745 line = self->decoded_chars;
1746 start = self->decoded_chars_used;
1747 offset_to_buffer = 0;
1748 Py_INCREF(line);
1749 }
1750 else {
1751 assert(self->decoded_chars_used == 0);
1752 line = PyUnicode_Concat(remaining, self->decoded_chars);
1753 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001754 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755 Py_CLEAR(remaining);
1756 if (line == NULL)
1757 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001758 if (PyUnicode_READY(line) == -1)
1759 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001760 }
1761
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 ptr = PyUnicode_DATA(line);
1763 line_len = PyUnicode_GET_LENGTH(line);
1764 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001765
1766 endpos = _PyIO_find_line_ending(
1767 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001769 ptr + kind * start,
1770 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001771 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001772 if (endpos >= 0) {
1773 endpos += start;
1774 if (limit >= 0 && (endpos - start) + chunked >= limit)
1775 endpos = start + limit - chunked;
1776 break;
1777 }
1778
1779 /* We can put aside up to `endpos` */
1780 endpos = consumed + start;
1781 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1782 /* Didn't find line ending, but reached length limit */
1783 endpos = start + limit - chunked;
1784 break;
1785 }
1786
1787 if (endpos > start) {
1788 /* No line ending seen yet - put aside current data */
1789 PyObject *s;
1790 if (chunks == NULL) {
1791 chunks = PyList_New(0);
1792 if (chunks == NULL)
1793 goto error;
1794 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001795 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001796 if (s == NULL)
1797 goto error;
1798 if (PyList_Append(chunks, s) < 0) {
1799 Py_DECREF(s);
1800 goto error;
1801 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001802 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001803 Py_DECREF(s);
1804 }
1805 /* There may be some remaining bytes we'll have to prepend to the
1806 next chunk of data */
1807 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001808 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001809 if (remaining == NULL)
1810 goto error;
1811 }
1812 Py_CLEAR(line);
1813 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001814 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 }
1816
1817 if (line != NULL) {
1818 /* Our line ends in the current buffer */
1819 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001820 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1821 PyObject *s = PyUnicode_Substring(line, start, endpos);
1822 Py_CLEAR(line);
1823 if (s == NULL)
1824 goto error;
1825 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001826 }
1827 }
1828 if (remaining != NULL) {
1829 if (chunks == NULL) {
1830 chunks = PyList_New(0);
1831 if (chunks == NULL)
1832 goto error;
1833 }
1834 if (PyList_Append(chunks, remaining) < 0)
1835 goto error;
1836 Py_CLEAR(remaining);
1837 }
1838 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001839 if (line != NULL) {
1840 if (PyList_Append(chunks, line) < 0)
1841 goto error;
1842 Py_DECREF(line);
1843 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001844 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1845 if (line == NULL)
1846 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001847 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001849 if (line == NULL) {
1850 Py_INCREF(_PyIO_empty_str);
1851 line = _PyIO_empty_str;
1852 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853
1854 return line;
1855
1856 error:
1857 Py_XDECREF(chunks);
1858 Py_XDECREF(remaining);
1859 Py_XDECREF(line);
1860 return NULL;
1861}
1862
1863static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001864textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865{
1866 Py_ssize_t limit = -1;
1867
1868 CHECK_INITIALIZED(self);
1869 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1870 return NULL;
1871 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001872 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001873}
1874
1875/* Seek and Tell */
1876
1877typedef struct {
1878 Py_off_t start_pos;
1879 int dec_flags;
1880 int bytes_to_feed;
1881 int chars_to_skip;
1882 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001883} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884
1885/*
1886 To speed up cookie packing/unpacking, we store the fields in a temporary
1887 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1888 The following macros define at which offsets in the intermediary byte
1889 string the various CookieStruct fields will be stored.
1890 */
1891
1892#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1893
1894#if defined(WORDS_BIGENDIAN)
1895
1896# define IS_LITTLE_ENDIAN 0
1897
1898/* We want the least significant byte of start_pos to also be the least
1899 significant byte of the cookie, which means that in big-endian mode we
1900 must copy the fields in reverse order. */
1901
1902# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1903# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1904# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1905# define OFF_CHARS_TO_SKIP (sizeof(char))
1906# define OFF_NEED_EOF 0
1907
1908#else
1909
1910# define IS_LITTLE_ENDIAN 1
1911
1912/* Little-endian mode: the least significant byte of start_pos will
1913 naturally end up the least significant byte of the cookie. */
1914
1915# define OFF_START_POS 0
1916# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1917# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1918# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1919# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1920
1921#endif
1922
1923static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001924textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001925{
1926 unsigned char buffer[COOKIE_BUF_LEN];
1927 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1928 if (cookieLong == NULL)
1929 return -1;
1930
1931 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1932 IS_LITTLE_ENDIAN, 0) < 0) {
1933 Py_DECREF(cookieLong);
1934 return -1;
1935 }
1936 Py_DECREF(cookieLong);
1937
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001938 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1939 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1940 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1941 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1942 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001943
1944 return 0;
1945}
1946
1947static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001948textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949{
1950 unsigned char buffer[COOKIE_BUF_LEN];
1951
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001952 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1953 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1954 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1955 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1956 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001957
1958 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1959}
1960#undef IS_LITTLE_ENDIAN
1961
1962static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001963_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964{
1965 PyObject *res;
1966 /* When seeking to the start of the stream, we call decoder.reset()
1967 rather than decoder.getstate().
1968 This is for a few decoders such as utf-16 for which the state value
1969 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1970 utf-16, that we are expecting a BOM).
1971 */
1972 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1973 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1974 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001975 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1976 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 if (res == NULL)
1978 return -1;
1979 Py_DECREF(res);
1980 return 0;
1981}
1982
Antoine Pitroue4501852009-05-14 18:55:55 +00001983static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001984_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00001985{
1986 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001987 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00001988 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
1989 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
1990 self->encoding_start_of_stream = 1;
1991 }
1992 else {
1993 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1994 _PyIO_zero, NULL);
1995 self->encoding_start_of_stream = 0;
1996 }
1997 if (res == NULL)
1998 return -1;
1999 Py_DECREF(res);
2000 return 0;
2001}
2002
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002004textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005{
2006 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002007 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009 PyObject *res;
2010 int cmp;
2011
2012 CHECK_INITIALIZED(self);
2013
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002014 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2015 return NULL;
2016 CHECK_CLOSED(self);
2017
2018 Py_INCREF(cookieObj);
2019
2020 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002021 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022 goto fail;
2023 }
2024
2025 if (whence == 1) {
2026 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002027 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028 if (cmp < 0)
2029 goto fail;
2030
2031 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002032 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 goto fail;
2034 }
2035
2036 /* Seeking to the current position should attempt to
2037 * sync the underlying buffer with the current position.
2038 */
2039 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002040 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 if (cookieObj == NULL)
2042 goto fail;
2043 }
2044 else if (whence == 2) {
2045 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002046 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 if (cmp < 0)
2048 goto fail;
2049
2050 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002051 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 goto fail;
2053 }
2054
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002055 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002056 if (res == NULL)
2057 goto fail;
2058 Py_DECREF(res);
2059
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002060 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061 Py_CLEAR(self->snapshot);
2062 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002063 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002064 if (res == NULL)
2065 goto fail;
2066 Py_DECREF(res);
2067 }
2068
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002069 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002070 Py_XDECREF(cookieObj);
2071 return res;
2072 }
2073 else if (whence != 0) {
2074 PyErr_Format(PyExc_ValueError,
2075 "invalid whence (%d, should be 0, 1 or 2)", whence);
2076 goto fail;
2077 }
2078
Antoine Pitroue4501852009-05-14 18:55:55 +00002079 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 if (cmp < 0)
2081 goto fail;
2082
2083 if (cmp == 1) {
2084 PyErr_Format(PyExc_ValueError,
2085 "negative seek position %R", cookieObj);
2086 goto fail;
2087 }
2088
2089 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2090 if (res == NULL)
2091 goto fail;
2092 Py_DECREF(res);
2093
2094 /* The strategy of seek() is to go back to the safe start point
2095 * and replay the effect of read(chars_to_skip) from there.
2096 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002097 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 goto fail;
2099
2100 /* Seek back to the safe start point. */
2101 posobj = PyLong_FromOff_t(cookie.start_pos);
2102 if (posobj == NULL)
2103 goto fail;
2104 res = PyObject_CallMethodObjArgs(self->buffer,
2105 _PyIO_str_seek, posobj, NULL);
2106 Py_DECREF(posobj);
2107 if (res == NULL)
2108 goto fail;
2109 Py_DECREF(res);
2110
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002111 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002112 Py_CLEAR(self->snapshot);
2113
2114 /* Restore the decoder to its state from the safe start point. */
2115 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002116 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 goto fail;
2118 }
2119
2120 if (cookie.chars_to_skip) {
2121 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002122 PyObject *input_chunk = _PyObject_CallMethodId(
2123 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124 PyObject *decoded;
2125
2126 if (input_chunk == NULL)
2127 goto fail;
2128
2129 assert (PyBytes_Check(input_chunk));
2130
2131 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2132 if (self->snapshot == NULL) {
2133 Py_DECREF(input_chunk);
2134 goto fail;
2135 }
2136
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002137 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2138 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139
2140 if (decoded == NULL)
2141 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002142 if (PyUnicode_READY(decoded) == -1) {
2143 Py_DECREF(decoded);
2144 goto fail;
2145 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002146
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002147 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002148
2149 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002150 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2152 goto fail;
2153 }
2154 self->decoded_chars_used = cookie.chars_to_skip;
2155 }
2156 else {
2157 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2158 if (self->snapshot == NULL)
2159 goto fail;
2160 }
2161
Antoine Pitroue4501852009-05-14 18:55:55 +00002162 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2163 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002164 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002165 goto fail;
2166 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 return cookieObj;
2168 fail:
2169 Py_XDECREF(cookieObj);
2170 return NULL;
2171
2172}
2173
2174static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002175textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176{
2177 PyObject *res;
2178 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002179 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002180 PyObject *next_input;
2181 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002182 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183 PyObject *saved_state = NULL;
2184 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002185 char *dec_buffer;
2186 Py_ssize_t dec_buffer_len;
2187 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188
2189 CHECK_INITIALIZED(self);
2190 CHECK_CLOSED(self);
2191
2192 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002193 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 goto fail;
2195 }
2196 if (!self->telling) {
2197 PyErr_SetString(PyExc_IOError,
2198 "telling position disabled by next() call");
2199 goto fail;
2200 }
2201
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002202 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002203 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002204 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205 if (res == NULL)
2206 goto fail;
2207 Py_DECREF(res);
2208
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002209 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 if (posobj == NULL)
2211 goto fail;
2212
2213 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002214 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 return posobj;
2216 }
2217
2218#if defined(HAVE_LARGEFILE_SUPPORT)
2219 cookie.start_pos = PyLong_AsLongLong(posobj);
2220#else
2221 cookie.start_pos = PyLong_AsLong(posobj);
2222#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002223 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224 if (PyErr_Occurred())
2225 goto fail;
2226
2227 /* Skip backward to the snapshot point (see _read_chunk). */
2228 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2229 goto fail;
2230
2231 assert (PyBytes_Check(next_input));
2232
2233 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2234
2235 /* How many decoded characters have been used up since the snapshot? */
2236 if (self->decoded_chars_used == 0) {
2237 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002238 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002239 }
2240
2241 chars_to_skip = self->decoded_chars_used;
2242
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002243 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002244 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2245 _PyIO_str_getstate, NULL);
2246 if (saved_state == NULL)
2247 goto fail;
2248
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002249#define DECODER_GETSTATE() do { \
2250 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2251 _PyIO_str_getstate, NULL); \
2252 if (_state == NULL) \
2253 goto fail; \
2254 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2255 Py_DECREF(_state); \
2256 goto fail; \
2257 } \
2258 Py_DECREF(_state); \
2259 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002261 /* TODO: replace assert with exception */
2262#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002263 PyObject *_decoded = _PyObject_CallMethodId( \
2264 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002265 if (_decoded == NULL) \
2266 goto fail; \
2267 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002268 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002269 Py_DECREF(_decoded); \
2270 } while (0)
2271
2272 /* Fast search for an acceptable start point, close to our
2273 current pos */
2274 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2275 skip_back = 1;
2276 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2277 input = PyBytes_AS_STRING(next_input);
2278 while (skip_bytes > 0) {
2279 /* Decode up to temptative start point */
2280 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2281 goto fail;
2282 DECODER_DECODE(input, skip_bytes, chars_decoded);
2283 if (chars_decoded <= chars_to_skip) {
2284 DECODER_GETSTATE();
2285 if (dec_buffer_len == 0) {
2286 /* Before pos and no bytes buffered in decoder => OK */
2287 cookie.dec_flags = dec_flags;
2288 chars_to_skip -= chars_decoded;
2289 break;
2290 }
2291 /* Skip back by buffered amount and reset heuristic */
2292 skip_bytes -= dec_buffer_len;
2293 skip_back = 1;
2294 }
2295 else {
2296 /* We're too far ahead, skip back a bit */
2297 skip_bytes -= skip_back;
2298 skip_back *= 2;
2299 }
2300 }
2301 if (skip_bytes <= 0) {
2302 skip_bytes = 0;
2303 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2304 goto fail;
2305 }
2306
2307 /* Note our initial start point. */
2308 cookie.start_pos += skip_bytes;
2309 cookie.chars_to_skip = chars_to_skip;
2310 if (chars_to_skip == 0)
2311 goto finally;
2312
2313 /* We should be close to the desired position. Now feed the decoder one
2314 * byte at a time until we reach the `chars_to_skip` target.
2315 * As we go, note the nearest "safe start point" before the current
2316 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002317 * can safely start from there and advance to this location).
2318 */
2319 chars_decoded = 0;
2320 input = PyBytes_AS_STRING(next_input);
2321 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002322 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002323 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002324 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002325
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002326 DECODER_DECODE(input, 1, n);
2327 /* We got n chars for 1 byte */
2328 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002329 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002330 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002331
2332 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2333 /* Decoder buffer is empty, so this is a safe start point. */
2334 cookie.start_pos += cookie.bytes_to_feed;
2335 chars_to_skip -= chars_decoded;
2336 cookie.dec_flags = dec_flags;
2337 cookie.bytes_to_feed = 0;
2338 chars_decoded = 0;
2339 }
2340 if (chars_decoded >= chars_to_skip)
2341 break;
2342 input++;
2343 }
2344 if (input == input_end) {
2345 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002346 PyObject *decoded = _PyObject_CallMethodId(
2347 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348 if (decoded == NULL)
2349 goto fail;
2350 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002351 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002352 Py_DECREF(decoded);
2353 cookie.need_eof = 1;
2354
2355 if (chars_decoded < chars_to_skip) {
2356 PyErr_SetString(PyExc_IOError,
2357 "can't reconstruct logical file position");
2358 goto fail;
2359 }
2360 }
2361
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002362finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002363 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 Py_DECREF(saved_state);
2365 if (res == NULL)
2366 return NULL;
2367 Py_DECREF(res);
2368
2369 /* The returned cookie corresponds to the last safe start point. */
2370 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002371 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002373fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 if (saved_state) {
2375 PyObject *type, *value, *traceback;
2376 PyErr_Fetch(&type, &value, &traceback);
2377
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002378 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002379 Py_DECREF(saved_state);
2380 if (res == NULL)
2381 return NULL;
2382 Py_DECREF(res);
2383
2384 PyErr_Restore(type, value, traceback);
2385 }
2386 return NULL;
2387}
2388
2389static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002390textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391{
2392 PyObject *pos = Py_None;
2393 PyObject *res;
2394
2395 CHECK_INITIALIZED(self)
2396 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2397 return NULL;
2398 }
2399
2400 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2401 if (res == NULL)
2402 return NULL;
2403 Py_DECREF(res);
2404
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002405 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002406}
2407
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002408static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002409textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002410{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002411 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002412
2413 CHECK_INITIALIZED(self);
2414
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002415 res = PyUnicode_FromString("<_io.TextIOWrapper");
2416 if (res == NULL)
2417 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002418 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002419 if (nameobj == NULL) {
2420 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2421 PyErr_Clear();
2422 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002423 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002424 }
2425 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002426 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002427 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002428 if (s == NULL)
2429 goto error;
2430 PyUnicode_AppendAndDel(&res, s);
2431 if (res == NULL)
2432 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002433 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002434 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002435 if (modeobj == NULL) {
2436 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2437 PyErr_Clear();
2438 else
2439 goto error;
2440 }
2441 else {
2442 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2443 Py_DECREF(modeobj);
2444 if (s == NULL)
2445 goto error;
2446 PyUnicode_AppendAndDel(&res, s);
2447 if (res == NULL)
2448 return NULL;
2449 }
2450 s = PyUnicode_FromFormat("%U encoding=%R>",
2451 res, self->encoding);
2452 Py_DECREF(res);
2453 return s;
2454error:
2455 Py_XDECREF(res);
2456 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002457}
2458
2459
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002460/* Inquiries */
2461
2462static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002463textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002464{
2465 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002466 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002467}
2468
2469static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002470textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002471{
2472 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002473 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002474}
2475
2476static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002477textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002478{
2479 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002480 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002481}
2482
2483static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002484textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485{
2486 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002487 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002488}
2489
2490static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002491textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492{
2493 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002494 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002495}
2496
2497static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002498textiowrapper_getstate(textio *self, PyObject *args)
2499{
2500 PyErr_Format(PyExc_TypeError,
2501 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2502 return NULL;
2503}
2504
2505static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002506textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507{
2508 CHECK_INITIALIZED(self);
2509 CHECK_CLOSED(self);
2510 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002511 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002513 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514}
2515
2516static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002517textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518{
2519 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002520 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002522
Antoine Pitrou6be88762010-05-03 16:48:20 +00002523 res = textiowrapper_closed_get(self, NULL);
2524 if (res == NULL)
2525 return NULL;
2526 r = PyObject_IsTrue(res);
2527 Py_DECREF(res);
2528 if (r < 0)
2529 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002530
Antoine Pitrou6be88762010-05-03 16:48:20 +00002531 if (r > 0) {
2532 Py_RETURN_NONE; /* stream already closed */
2533 }
2534 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002535 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002536 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002537 if (res)
2538 Py_DECREF(res);
2539 else
2540 PyErr_Clear();
2541 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002542 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002543 if (res == NULL) {
2544 return NULL;
2545 }
2546 else
2547 Py_DECREF(res);
2548
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002549 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002550 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551}
2552
2553static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002554textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002555{
2556 PyObject *line;
2557
2558 CHECK_INITIALIZED(self);
2559
2560 self->telling = 0;
2561 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2562 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002563 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564 }
2565 else {
2566 line = PyObject_CallMethodObjArgs((PyObject *)self,
2567 _PyIO_str_readline, NULL);
2568 if (line && !PyUnicode_Check(line)) {
2569 PyErr_Format(PyExc_IOError,
2570 "readline() should have returned an str object, "
2571 "not '%.200s'", Py_TYPE(line)->tp_name);
2572 Py_DECREF(line);
2573 return NULL;
2574 }
2575 }
2576
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002577 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002578 return NULL;
2579
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002580 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002581 /* Reached EOF or would have blocked */
2582 Py_DECREF(line);
2583 Py_CLEAR(self->snapshot);
2584 self->telling = self->seekable;
2585 return NULL;
2586 }
2587
2588 return line;
2589}
2590
2591static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002592textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002593{
2594 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002595 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596}
2597
2598static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002599textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600{
2601 CHECK_INITIALIZED(self);
2602 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2603}
2604
2605static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002606textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607{
2608 PyObject *res;
2609 CHECK_INITIALIZED(self);
2610 if (self->decoder == NULL)
2611 Py_RETURN_NONE;
2612 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2613 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002614 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2615 PyErr_Clear();
2616 Py_RETURN_NONE;
2617 }
2618 else {
2619 return NULL;
2620 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002621 }
2622 return res;
2623}
2624
2625static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002626textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002627{
2628 CHECK_INITIALIZED(self);
2629 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2630}
2631
2632static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002633textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634{
2635 CHECK_INITIALIZED(self);
2636 return PyLong_FromSsize_t(self->chunk_size);
2637}
2638
2639static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002640textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002641{
2642 Py_ssize_t n;
2643 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002644 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002645 if (n == -1 && PyErr_Occurred())
2646 return -1;
2647 if (n <= 0) {
2648 PyErr_SetString(PyExc_ValueError,
2649 "a strictly positive integer is required");
2650 return -1;
2651 }
2652 self->chunk_size = n;
2653 return 0;
2654}
2655
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002656static PyMethodDef textiowrapper_methods[] = {
2657 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2658 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2659 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2660 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2661 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2662 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002664 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2665 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2666 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2667 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2668 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002669 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002670
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002671 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2672 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2673 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002674 {NULL, NULL}
2675};
2676
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002677static PyMemberDef textiowrapper_members[] = {
2678 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2679 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2680 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681 {NULL}
2682};
2683
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002684static PyGetSetDef textiowrapper_getset[] = {
2685 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2686 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002687/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2688*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2690 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2691 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2692 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002693 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694};
2695
2696PyTypeObject PyTextIOWrapper_Type = {
2697 PyVarObject_HEAD_INIT(NULL, 0)
2698 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002699 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002701 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002702 0, /*tp_print*/
2703 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002704 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002705 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002706 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 0, /*tp_as_number*/
2708 0, /*tp_as_sequence*/
2709 0, /*tp_as_mapping*/
2710 0, /*tp_hash */
2711 0, /*tp_call*/
2712 0, /*tp_str*/
2713 0, /*tp_getattro*/
2714 0, /*tp_setattro*/
2715 0, /*tp_as_buffer*/
2716 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2717 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718 textiowrapper_doc, /* tp_doc */
2719 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2720 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002721 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002722 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002723 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002724 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2725 textiowrapper_methods, /* tp_methods */
2726 textiowrapper_members, /* tp_members */
2727 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728 0, /* tp_base */
2729 0, /* tp_dict */
2730 0, /* tp_descr_get */
2731 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002732 offsetof(textio, dict), /*tp_dictoffset*/
2733 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734 0, /* tp_alloc */
2735 PyType_GenericNew, /* tp_new */
2736};