blob: 421dc5051402573736b44890b22240e191e17f33 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200633 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200638 "newline controls how line endings are handled. It can be None, '',\n"
639 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
640 "\n"
641 "* On input, if newline is None, universal newlines mode is\n"
642 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
643 " these are translated into '\\n' before being returned to the\n"
644 " caller. If it is '', universal newline mode is enabled, but line\n"
645 " endings are returned to the caller untranslated. If it has any of\n"
646 " the other legal values, input lines are only terminated by the given\n"
647 " string, and the line ending is returned to the caller untranslated.\n"
648 "\n"
649 "* On output, if newline is None, any '\\n' characters written are\n"
650 " translated to the system default line separator, os.linesep. If\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200651 " newline is '' or '\n', no translation takes place. If newline is any\n"
652 " of the other legal values, any '\\n' characters written are translated\n"
653 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 "\n"
655 "If line_buffering is True, a call to flush is implied when a call to\n"
656 "write contains a newline character."
657 );
658
659typedef PyObject *
660 (*encodefunc_t)(PyObject *, PyObject *);
661
662typedef struct
663{
664 PyObject_HEAD
665 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000666 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 Py_ssize_t chunk_size;
668 PyObject *buffer;
669 PyObject *encoding;
670 PyObject *encoder;
671 PyObject *decoder;
672 PyObject *readnl;
673 PyObject *errors;
674 const char *writenl; /* utf-8 encoded, NULL stands for \n */
675 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200676 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 char readuniversal;
678 char readtranslate;
679 char writetranslate;
680 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200681 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000683 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 /* Specialized encoding func (see below) */
685 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000686 /* Whether or not it's the start of the stream */
687 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
689 /* Reads and writes are internally buffered in order to speed things up.
690 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000691
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 Please also note that text to be written is first encoded before being
693 buffered. This is necessary so that encoding errors are immediately
694 reported to the caller, but it unfortunately means that the
695 IncrementalEncoder (whose encode() method is always written in Python)
696 becomes a bottleneck for small writes.
697 */
698 PyObject *decoded_chars; /* buffer for text returned from decoder */
699 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
700 PyObject *pending_bytes; /* list of bytes objects waiting to be
701 written, or NULL */
702 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 /* snapshot is either None, or a tuple (dec_flags, next_input) where
705 * dec_flags is the second (integer) item of the decoder state and
706 * next_input is the chunk of input bytes that comes next after the
707 * snapshot point. We use this to reconstruct decoder states in tell().
708 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000709 PyObject *snapshot;
710 /* Bytes-to-characters ratio for the current chunk. Serves as input for
711 the heuristic in tell(). */
712 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714 /* Cache raw object if it's a FileIO object */
715 PyObject *raw;
716
717 PyObject *weakreflist;
718 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720
721
722/* A couple of specialized cases in order to bypass the slow incremental
723 encoding methods for the most popular encodings. */
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF16(text,
742 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747{
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000753 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000755 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF16(text,
757 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758}
759
Antoine Pitroue4501852009-05-14 18:55:55 +0000760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100770 return _PyUnicode_EncodeUTF32(text,
771 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000776{
777 if (!self->encoding_start_of_stream) {
778 /* Skip the BOM and use native byte ordering */
779#if defined(WORDS_BIGENDIAN)
780 return utf32be_encode(self, text);
781#else
782 return utf32le_encode(self, text);
783#endif
784 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100785 return _PyUnicode_EncodeUTF32(text,
786 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799}
800
801/* Map normalized encoding names onto the specialized encoding funcs */
802
803typedef struct {
804 const char *name;
805 encodefunc_t encodefunc;
806} encodefuncentry;
807
Antoine Pitrou24f36292009-03-28 22:16:42 +0000808static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {"ascii", (encodefunc_t) ascii_encode},
810 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {"utf-16-be", (encodefunc_t) utf16be_encode},
813 {"utf-16-le", (encodefunc_t) utf16le_encode},
814 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-32-be", (encodefunc_t) utf32be_encode},
816 {"utf-32-le", (encodefunc_t) utf32le_encode},
817 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 {NULL, NULL}
819};
820
821
822static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 NULL};
828 PyObject *buffer, *raw;
829 char *encoding = NULL;
830 char *errors = NULL;
831 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 _PyIO_State *state = IO_STATE;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000839 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000867 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868
869 if (encoding == NULL) {
870 /* Try os.device_encoding(fileno) */
871 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 /* Ignore only AttributeError and UnsupportedOperation */
874 if (fileno == NULL) {
875 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 PyErr_ExceptionMatches(state->unsupported_operation)) {
877 PyErr_Clear();
878 }
879 else {
880 goto error;
881 }
882 }
883 else {
Brett Cannonefb00c02012-02-29 18:31:31 -0500884 int fd = (int) PyLong_AsLong(fileno);
885 Py_DECREF(fileno);
886 if (fd == -1 && PyErr_Occurred()) {
887 goto error;
888 }
889
890 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 if (self->encoding == NULL)
892 goto error;
893 else if (!PyUnicode_Check(self->encoding))
894 Py_CLEAR(self->encoding);
895 }
896 }
897 if (encoding == NULL && self->encoding == NULL) {
898 if (state->locale_module == NULL) {
899 state->locale_module = PyImport_ImportModule("locale");
900 if (state->locale_module == NULL)
901 goto catch_ImportError;
902 else
903 goto use_locale;
904 }
905 else {
906 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200907 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200908 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 if (self->encoding == NULL) {
910 catch_ImportError:
911 /*
912 Importing locale can raise a ImportError because of
913 _functools, and locale.getpreferredencoding can raise a
914 ImportError if _locale is not available. These will happen
915 during module building.
916 */
917 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
918 PyErr_Clear();
919 self->encoding = PyUnicode_FromString("ascii");
920 }
921 else
922 goto error;
923 }
924 else if (!PyUnicode_Check(self->encoding))
925 Py_CLEAR(self->encoding);
926 }
927 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000928 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000930 if (encoding == NULL)
931 goto error;
932 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 else if (encoding != NULL) {
934 self->encoding = PyUnicode_FromString(encoding);
935 if (self->encoding == NULL)
936 goto error;
937 }
938 else {
939 PyErr_SetString(PyExc_IOError,
940 "could not determine default encoding");
941 }
942
943 if (errors == NULL)
944 errors = "strict";
945 self->errors = PyBytes_FromString(errors);
946 if (self->errors == NULL)
947 goto error;
948
949 self->chunk_size = 8192;
950 self->readuniversal = (newline == NULL || newline[0] == '\0');
951 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200952 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 self->readtranslate = (newline == NULL);
954 if (newline) {
955 self->readnl = PyUnicode_FromString(newline);
956 if (self->readnl == NULL)
957 return -1;
958 }
959 self->writetranslate = (newline == NULL || newline[0] != '\0');
960 if (!self->readuniversal && self->readnl) {
961 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000962 if (self->writenl == NULL)
963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (!strcmp(self->writenl, "\n"))
965 self->writenl = NULL;
966 }
967#ifdef MS_WINDOWS
968 else
969 self->writenl = "\r\n";
970#endif
971
972 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200973 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (res == NULL)
975 goto error;
976 r = PyObject_IsTrue(res);
977 Py_DECREF(res);
978 if (r == -1)
979 goto error;
980 if (r == 1) {
981 self->decoder = PyCodec_IncrementalDecoder(
982 encoding, errors);
983 if (self->decoder == NULL)
984 goto error;
985
986 if (self->readuniversal) {
987 PyObject *incrementalDecoder = PyObject_CallFunction(
988 (PyObject *)&PyIncrementalNewlineDecoder_Type,
989 "Oi", self->decoder, (int)self->readtranslate);
990 if (incrementalDecoder == NULL)
991 goto error;
992 Py_CLEAR(self->decoder);
993 self->decoder = incrementalDecoder;
994 }
995 }
996
997 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 if (res == NULL)
1000 goto error;
1001 r = PyObject_IsTrue(res);
1002 Py_DECREF(res);
1003 if (r == -1)
1004 goto error;
1005 if (r == 1) {
1006 PyObject *ci;
1007 self->encoder = PyCodec_IncrementalEncoder(
1008 encoding, errors);
1009 if (self->encoder == NULL)
1010 goto error;
1011 /* Get the normalized named of the codec */
1012 ci = _PyCodec_Lookup(encoding);
1013 if (ci == NULL)
1014 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001015 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
1024 encodefuncentry *e = encodefuncs;
1025 while (e->name != NULL) {
1026 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
1036 self->buffer = buffer;
1037 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1040 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1041 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001042 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (raw == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (Py_TYPE(raw) == &PyFileIO_Type)
1051 self->raw = raw;
1052 else
1053 Py_DECREF(raw);
1054 }
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 if (res == NULL)
1058 goto error;
1059 self->seekable = self->telling = PyObject_IsTrue(res);
1060 Py_DECREF(res);
1061
Martin v. Löwis767046a2011-10-14 15:35:36 +02001062 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001063
Antoine Pitroue4501852009-05-14 18:55:55 +00001064 self->encoding_start_of_stream = 0;
1065 if (self->seekable && self->encoder) {
1066 PyObject *cookieObj;
1067 int cmp;
1068
1069 self->encoding_start_of_stream = 1;
1070
1071 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1072 if (cookieObj == NULL)
1073 goto error;
1074
1075 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1076 Py_DECREF(cookieObj);
1077 if (cmp < 0) {
1078 goto error;
1079 }
1080
1081 if (cmp == 0) {
1082 self->encoding_start_of_stream = 0;
1083 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1084 _PyIO_zero, NULL);
1085 if (res == NULL)
1086 goto error;
1087 Py_DECREF(res);
1088 }
1089 }
1090
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001091 self->ok = 1;
1092 return 0;
1093
1094 error:
1095 return -1;
1096}
1097
1098static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001099_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001100{
1101 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1102 return -1;
1103 self->ok = 0;
1104 Py_CLEAR(self->buffer);
1105 Py_CLEAR(self->encoding);
1106 Py_CLEAR(self->encoder);
1107 Py_CLEAR(self->decoder);
1108 Py_CLEAR(self->readnl);
1109 Py_CLEAR(self->decoded_chars);
1110 Py_CLEAR(self->pending_bytes);
1111 Py_CLEAR(self->snapshot);
1112 Py_CLEAR(self->errors);
1113 Py_CLEAR(self->raw);
1114 return 0;
1115}
1116
1117static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001118textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119{
Antoine Pitroue033e062010-10-29 10:38:18 +00001120 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122 return;
1123 _PyObject_GC_UNTRACK(self);
1124 if (self->weakreflist != NULL)
1125 PyObject_ClearWeakRefs((PyObject *)self);
1126 Py_CLEAR(self->dict);
1127 Py_TYPE(self)->tp_free((PyObject *)self);
1128}
1129
1130static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001131textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132{
1133 Py_VISIT(self->buffer);
1134 Py_VISIT(self->encoding);
1135 Py_VISIT(self->encoder);
1136 Py_VISIT(self->decoder);
1137 Py_VISIT(self->readnl);
1138 Py_VISIT(self->decoded_chars);
1139 Py_VISIT(self->pending_bytes);
1140 Py_VISIT(self->snapshot);
1141 Py_VISIT(self->errors);
1142 Py_VISIT(self->raw);
1143
1144 Py_VISIT(self->dict);
1145 return 0;
1146}
1147
1148static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001149textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001151 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001152 return -1;
1153 Py_CLEAR(self->dict);
1154 return 0;
1155}
1156
1157static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001158textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001159
1160/* This macro takes some shortcuts to make the common case faster. */
1161#define CHECK_CLOSED(self) \
1162 do { \
1163 int r; \
1164 PyObject *_res; \
1165 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1166 if (self->raw != NULL) \
1167 r = _PyFileIO_closed(self->raw); \
1168 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001169 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 if (_res == NULL) \
1171 return NULL; \
1172 r = PyObject_IsTrue(_res); \
1173 Py_DECREF(_res); \
1174 if (r < 0) \
1175 return NULL; \
1176 } \
1177 if (r > 0) { \
1178 PyErr_SetString(PyExc_ValueError, \
1179 "I/O operation on closed file."); \
1180 return NULL; \
1181 } \
1182 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001183 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 return NULL; \
1185 } while (0)
1186
1187#define CHECK_INITIALIZED(self) \
1188 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001189 if (self->detached) { \
1190 PyErr_SetString(PyExc_ValueError, \
1191 "underlying buffer has been detached"); \
1192 } else { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "I/O operation on uninitialized object"); \
1195 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001196 return NULL; \
1197 }
1198
1199#define CHECK_INITIALIZED_INT(self) \
1200 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001201 if (self->detached) { \
1202 PyErr_SetString(PyExc_ValueError, \
1203 "underlying buffer has been detached"); \
1204 } else { \
1205 PyErr_SetString(PyExc_ValueError, \
1206 "I/O operation on uninitialized object"); \
1207 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001208 return -1; \
1209 }
1210
1211
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001212static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001213textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001214{
1215 PyObject *buffer, *res;
1216 CHECK_INITIALIZED(self);
1217 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1218 if (res == NULL)
1219 return NULL;
1220 Py_DECREF(res);
1221 buffer = self->buffer;
1222 self->buffer = NULL;
1223 self->detached = 1;
1224 self->ok = 0;
1225 return buffer;
1226}
1227
Antoine Pitrou24f36292009-03-28 22:16:42 +00001228/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001229 underlying buffered object, though. */
1230static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001231_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001233 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001234
1235 if (self->pending_bytes == NULL)
1236 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001237
1238 pending = self->pending_bytes;
1239 Py_INCREF(pending);
1240 self->pending_bytes_count = 0;
1241 Py_CLEAR(self->pending_bytes);
1242
1243 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1244 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245 if (b == NULL)
1246 return -1;
1247 ret = PyObject_CallMethodObjArgs(self->buffer,
1248 _PyIO_str_write, b, NULL);
1249 Py_DECREF(b);
1250 if (ret == NULL)
1251 return -1;
1252 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001253 return 0;
1254}
1255
1256static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001257textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258{
1259 PyObject *ret;
1260 PyObject *text; /* owned reference */
1261 PyObject *b;
1262 Py_ssize_t textlen;
1263 int haslf = 0;
1264 int needflush = 0;
1265
1266 CHECK_INITIALIZED(self);
1267
1268 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1269 return NULL;
1270 }
1271
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001272 if (PyUnicode_READY(text) == -1)
1273 return NULL;
1274
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275 CHECK_CLOSED(self);
1276
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001277 if (self->encoder == NULL)
1278 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001279
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001280 Py_INCREF(text);
1281
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001282 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283
1284 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 haslf = 1;
1287
1288 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001289 PyObject *newtext = _PyObject_CallMethodId(
1290 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 Py_DECREF(text);
1292 if (newtext == NULL)
1293 return NULL;
1294 text = newtext;
1295 }
1296
Antoine Pitroue96ec682011-07-23 21:46:35 +02001297 if (self->write_through)
1298 needflush = 1;
1299 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 needflush = 1;
1303
1304 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001305 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001307 self->encoding_start_of_stream = 0;
1308 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 else
1310 b = PyObject_CallMethodObjArgs(self->encoder,
1311 _PyIO_str_encode, text, NULL);
1312 Py_DECREF(text);
1313 if (b == NULL)
1314 return NULL;
1315
1316 if (self->pending_bytes == NULL) {
1317 self->pending_bytes = PyList_New(0);
1318 if (self->pending_bytes == NULL) {
1319 Py_DECREF(b);
1320 return NULL;
1321 }
1322 self->pending_bytes_count = 0;
1323 }
1324 if (PyList_Append(self->pending_bytes, b) < 0) {
1325 Py_DECREF(b);
1326 return NULL;
1327 }
1328 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1329 Py_DECREF(b);
1330 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001331 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001332 return NULL;
1333 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001334
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335 if (needflush) {
1336 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1337 if (ret == NULL)
1338 return NULL;
1339 Py_DECREF(ret);
1340 }
1341
1342 Py_CLEAR(self->snapshot);
1343
1344 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001345 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001346 if (ret == NULL)
1347 return NULL;
1348 Py_DECREF(ret);
1349 }
1350
1351 return PyLong_FromSsize_t(textlen);
1352}
1353
1354/* Steal a reference to chars and store it in the decoded_char buffer;
1355 */
1356static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001357textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001358{
1359 Py_CLEAR(self->decoded_chars);
1360 self->decoded_chars = chars;
1361 self->decoded_chars_used = 0;
1362}
1363
1364static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001365textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001366{
1367 PyObject *chars;
1368 Py_ssize_t avail;
1369
1370 if (self->decoded_chars == NULL)
1371 return PyUnicode_FromStringAndSize(NULL, 0);
1372
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001373 /* decoded_chars is guaranteed to be "ready". */
1374 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375 - self->decoded_chars_used);
1376
1377 assert(avail >= 0);
1378
1379 if (n < 0 || n > avail)
1380 n = avail;
1381
1382 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001383 chars = PyUnicode_Substring(self->decoded_chars,
1384 self->decoded_chars_used,
1385 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001386 if (chars == NULL)
1387 return NULL;
1388 }
1389 else {
1390 chars = self->decoded_chars;
1391 Py_INCREF(chars);
1392 }
1393
1394 self->decoded_chars_used += n;
1395 return chars;
1396}
1397
1398/* Read and decode the next chunk of data from the BufferedReader.
1399 */
1400static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001401textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402{
1403 PyObject *dec_buffer = NULL;
1404 PyObject *dec_flags = NULL;
1405 PyObject *input_chunk = NULL;
1406 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001407 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408 int eof;
1409
1410 /* The return value is True unless EOF was reached. The decoded string is
1411 * placed in self._decoded_chars (replacing its previous value). The
1412 * entire input chunk is sent to the decoder, though some of it may remain
1413 * buffered in the decoder, yet to be converted.
1414 */
1415
1416 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001417 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418 return -1;
1419 }
1420
1421 if (self->telling) {
1422 /* To prepare for tell(), we need to snapshot a point in the file
1423 * where the decoder's input buffer is empty.
1424 */
1425
1426 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1427 _PyIO_str_getstate, NULL);
1428 if (state == NULL)
1429 return -1;
1430 /* Given this, we know there was a valid snapshot point
1431 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1432 */
1433 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1434 Py_DECREF(state);
1435 return -1;
1436 }
1437 Py_INCREF(dec_buffer);
1438 Py_INCREF(dec_flags);
1439 Py_DECREF(state);
1440 }
1441
1442 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001443 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001444 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001445 }
1446 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001447 if (chunk_size == NULL)
1448 goto fail;
1449 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001450 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1451 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001452 Py_DECREF(chunk_size);
1453 if (input_chunk == NULL)
1454 goto fail;
1455 assert(PyBytes_Check(input_chunk));
1456
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001457 nbytes = PyBytes_Size(input_chunk);
1458 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001459
1460 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1461 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1462 self->decoder, input_chunk, eof);
1463 }
1464 else {
1465 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1466 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1467 }
1468
1469 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1470 if (decoded_chars == NULL)
1471 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001472 if (PyUnicode_READY(decoded_chars) == -1)
1473 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001474 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001475 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001476 if (nchars > 0)
1477 self->b2cratio = (double) nbytes / nchars;
1478 else
1479 self->b2cratio = 0.0;
1480 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481 eof = 0;
1482
1483 if (self->telling) {
1484 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1485 * next input to be decoded is dec_buffer + input_chunk.
1486 */
1487 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1488 if (next_input == NULL)
1489 goto fail;
1490 assert (PyBytes_Check(next_input));
1491 Py_DECREF(dec_buffer);
1492 Py_CLEAR(self->snapshot);
1493 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1494 }
1495 Py_DECREF(input_chunk);
1496
1497 return (eof == 0);
1498
1499 fail:
1500 Py_XDECREF(dec_buffer);
1501 Py_XDECREF(dec_flags);
1502 Py_XDECREF(input_chunk);
1503 return -1;
1504}
1505
1506static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001507textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001508{
1509 Py_ssize_t n = -1;
1510 PyObject *result = NULL, *chunks = NULL;
1511
1512 CHECK_INITIALIZED(self);
1513
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001514 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001515 return NULL;
1516
1517 CHECK_CLOSED(self);
1518
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001519 if (self->decoder == NULL)
1520 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001521
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001522 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001523 return NULL;
1524
1525 if (n < 0) {
1526 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001527 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001528 PyObject *decoded;
1529 if (bytes == NULL)
1530 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001531
1532 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1533 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1534 bytes, 1);
1535 else
1536 decoded = PyObject_CallMethodObjArgs(
1537 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001538 Py_DECREF(bytes);
1539 if (decoded == NULL)
1540 goto fail;
1541
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001542 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001543
1544 if (result == NULL) {
1545 Py_DECREF(decoded);
1546 return NULL;
1547 }
1548
1549 PyUnicode_AppendAndDel(&result, decoded);
1550 if (result == NULL)
1551 goto fail;
1552
1553 Py_CLEAR(self->snapshot);
1554 return result;
1555 }
1556 else {
1557 int res = 1;
1558 Py_ssize_t remaining = n;
1559
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001560 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001561 if (result == NULL)
1562 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001563 if (PyUnicode_READY(result) == -1)
1564 goto fail;
1565 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001566
1567 /* Keep reading chunks until we have n characters to return */
1568 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001569 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001570 if (res < 0) {
1571 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1572 when EINTR occurs so we needn't do it ourselves. */
1573 if (_PyIO_trap_eintr()) {
1574 continue;
1575 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001576 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001577 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001578 if (res == 0) /* EOF */
1579 break;
1580 if (chunks == NULL) {
1581 chunks = PyList_New(0);
1582 if (chunks == NULL)
1583 goto fail;
1584 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001585 if (PyUnicode_GET_LENGTH(result) > 0 &&
1586 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 goto fail;
1588 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001589 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001590 if (result == NULL)
1591 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001592 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 }
1594 if (chunks != NULL) {
1595 if (result != NULL && PyList_Append(chunks, result) < 0)
1596 goto fail;
1597 Py_CLEAR(result);
1598 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1599 if (result == NULL)
1600 goto fail;
1601 Py_CLEAR(chunks);
1602 }
1603 return result;
1604 }
1605 fail:
1606 Py_XDECREF(result);
1607 Py_XDECREF(chunks);
1608 return NULL;
1609}
1610
1611
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001612/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 that is to the NUL character. Otherwise the function will produce
1614 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001615static char *
1616find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001618 if (kind == PyUnicode_1BYTE_KIND) {
1619 assert(ch < 256);
1620 return (char *) memchr((void *) s, (char) ch, end - s);
1621 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001622 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001623 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001624 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001625 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001626 return s;
1627 if (s == end)
1628 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001629 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001630 }
1631}
1632
1633Py_ssize_t
1634_PyIO_find_line_ending(
1635 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001636 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001637{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001638 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639
1640 if (translated) {
1641 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001644 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645 else {
1646 *consumed = len;
1647 return -1;
1648 }
1649 }
1650 else if (universal) {
1651 /* Universal newline search. Find any of \r, \r\n, \n
1652 * The decoder ensures that \r\n are not split in two pieces
1653 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001654 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001655 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001656 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001658 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001660 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 if (s >= end) {
1662 *consumed = len;
1663 return -1;
1664 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001665 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001666 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001668 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001670 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001671 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001673 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 }
1675 }
1676 }
1677 else {
1678 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001679 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1680 char *nl = PyUnicode_DATA(readnl);
1681 /* Assume that readnl is an ASCII character. */
1682 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001684 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001685 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 *consumed = len;
1688 return -1;
1689 }
1690 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001693 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (e < s)
1695 e = s;
1696 while (s < e) {
1697 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001698 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 if (pos == NULL || pos >= e)
1700 break;
1701 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001702 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 break;
1704 }
1705 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001706 return (pos - start)/kind + readnl_len;
1707 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001708 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001709 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 if (pos == NULL)
1711 *consumed = len;
1712 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001713 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 return -1;
1715 }
1716 }
1717}
1718
1719static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001720_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721{
1722 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1723 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1724 int res;
1725
1726 CHECK_CLOSED(self);
1727
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001728 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 return NULL;
1730
1731 chunked = 0;
1732
1733 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001734 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001736 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 Py_ssize_t consumed = 0;
1738
1739 /* First, get some data if necessary */
1740 res = 1;
1741 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001742 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001743 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001744 if (res < 0) {
1745 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1746 when EINTR occurs so we needn't do it ourselves. */
1747 if (_PyIO_trap_eintr()) {
1748 continue;
1749 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001750 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001751 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752 if (res == 0)
1753 break;
1754 }
1755 if (res == 0) {
1756 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001757 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 Py_CLEAR(self->snapshot);
1759 start = endpos = offset_to_buffer = 0;
1760 break;
1761 }
1762
1763 if (remaining == NULL) {
1764 line = self->decoded_chars;
1765 start = self->decoded_chars_used;
1766 offset_to_buffer = 0;
1767 Py_INCREF(line);
1768 }
1769 else {
1770 assert(self->decoded_chars_used == 0);
1771 line = PyUnicode_Concat(remaining, self->decoded_chars);
1772 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001773 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001774 Py_CLEAR(remaining);
1775 if (line == NULL)
1776 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001777 if (PyUnicode_READY(line) == -1)
1778 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001779 }
1780
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001781 ptr = PyUnicode_DATA(line);
1782 line_len = PyUnicode_GET_LENGTH(line);
1783 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784
1785 endpos = _PyIO_find_line_ending(
1786 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001787 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001788 ptr + kind * start,
1789 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001791 if (endpos >= 0) {
1792 endpos += start;
1793 if (limit >= 0 && (endpos - start) + chunked >= limit)
1794 endpos = start + limit - chunked;
1795 break;
1796 }
1797
1798 /* We can put aside up to `endpos` */
1799 endpos = consumed + start;
1800 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1801 /* Didn't find line ending, but reached length limit */
1802 endpos = start + limit - chunked;
1803 break;
1804 }
1805
1806 if (endpos > start) {
1807 /* No line ending seen yet - put aside current data */
1808 PyObject *s;
1809 if (chunks == NULL) {
1810 chunks = PyList_New(0);
1811 if (chunks == NULL)
1812 goto error;
1813 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001814 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815 if (s == NULL)
1816 goto error;
1817 if (PyList_Append(chunks, s) < 0) {
1818 Py_DECREF(s);
1819 goto error;
1820 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001821 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 Py_DECREF(s);
1823 }
1824 /* There may be some remaining bytes we'll have to prepend to the
1825 next chunk of data */
1826 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001827 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 if (remaining == NULL)
1829 goto error;
1830 }
1831 Py_CLEAR(line);
1832 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001833 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 }
1835
1836 if (line != NULL) {
1837 /* Our line ends in the current buffer */
1838 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001839 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1840 PyObject *s = PyUnicode_Substring(line, start, endpos);
1841 Py_CLEAR(line);
1842 if (s == NULL)
1843 goto error;
1844 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001845 }
1846 }
1847 if (remaining != NULL) {
1848 if (chunks == NULL) {
1849 chunks = PyList_New(0);
1850 if (chunks == NULL)
1851 goto error;
1852 }
1853 if (PyList_Append(chunks, remaining) < 0)
1854 goto error;
1855 Py_CLEAR(remaining);
1856 }
1857 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001858 if (line != NULL) {
1859 if (PyList_Append(chunks, line) < 0)
1860 goto error;
1861 Py_DECREF(line);
1862 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001863 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1864 if (line == NULL)
1865 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001866 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001867 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001868 if (line == NULL) {
1869 Py_INCREF(_PyIO_empty_str);
1870 line = _PyIO_empty_str;
1871 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872
1873 return line;
1874
1875 error:
1876 Py_XDECREF(chunks);
1877 Py_XDECREF(remaining);
1878 Py_XDECREF(line);
1879 return NULL;
1880}
1881
1882static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001883textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001884{
1885 Py_ssize_t limit = -1;
1886
1887 CHECK_INITIALIZED(self);
1888 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1889 return NULL;
1890 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001891 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001892}
1893
1894/* Seek and Tell */
1895
1896typedef struct {
1897 Py_off_t start_pos;
1898 int dec_flags;
1899 int bytes_to_feed;
1900 int chars_to_skip;
1901 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001902} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903
1904/*
1905 To speed up cookie packing/unpacking, we store the fields in a temporary
1906 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1907 The following macros define at which offsets in the intermediary byte
1908 string the various CookieStruct fields will be stored.
1909 */
1910
1911#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1912
1913#if defined(WORDS_BIGENDIAN)
1914
1915# define IS_LITTLE_ENDIAN 0
1916
1917/* We want the least significant byte of start_pos to also be the least
1918 significant byte of the cookie, which means that in big-endian mode we
1919 must copy the fields in reverse order. */
1920
1921# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1922# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1923# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1924# define OFF_CHARS_TO_SKIP (sizeof(char))
1925# define OFF_NEED_EOF 0
1926
1927#else
1928
1929# define IS_LITTLE_ENDIAN 1
1930
1931/* Little-endian mode: the least significant byte of start_pos will
1932 naturally end up the least significant byte of the cookie. */
1933
1934# define OFF_START_POS 0
1935# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1936# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1937# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1938# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1939
1940#endif
1941
1942static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001943textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001944{
1945 unsigned char buffer[COOKIE_BUF_LEN];
1946 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1947 if (cookieLong == NULL)
1948 return -1;
1949
1950 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1951 IS_LITTLE_ENDIAN, 0) < 0) {
1952 Py_DECREF(cookieLong);
1953 return -1;
1954 }
1955 Py_DECREF(cookieLong);
1956
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001957 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1958 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1959 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1960 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1961 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001962
1963 return 0;
1964}
1965
1966static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001967textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968{
1969 unsigned char buffer[COOKIE_BUF_LEN];
1970
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001971 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1972 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1973 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1974 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1975 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976
1977 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1978}
1979#undef IS_LITTLE_ENDIAN
1980
1981static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001982_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983{
1984 PyObject *res;
1985 /* When seeking to the start of the stream, we call decoder.reset()
1986 rather than decoder.getstate().
1987 This is for a few decoders such as utf-16 for which the state value
1988 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1989 utf-16, that we are expecting a BOM).
1990 */
1991 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1992 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1993 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001994 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1995 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996 if (res == NULL)
1997 return -1;
1998 Py_DECREF(res);
1999 return 0;
2000}
2001
Antoine Pitroue4501852009-05-14 18:55:55 +00002002static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002003_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002004{
2005 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002006 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002007 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2008 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2009 self->encoding_start_of_stream = 1;
2010 }
2011 else {
2012 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2013 _PyIO_zero, NULL);
2014 self->encoding_start_of_stream = 0;
2015 }
2016 if (res == NULL)
2017 return -1;
2018 Py_DECREF(res);
2019 return 0;
2020}
2021
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002023textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024{
2025 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002026 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028 PyObject *res;
2029 int cmp;
2030
2031 CHECK_INITIALIZED(self);
2032
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2034 return NULL;
2035 CHECK_CLOSED(self);
2036
2037 Py_INCREF(cookieObj);
2038
2039 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002040 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002041 goto fail;
2042 }
2043
2044 if (whence == 1) {
2045 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002046 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 if (cmp < 0)
2048 goto fail;
2049
2050 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002051 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 goto fail;
2053 }
2054
2055 /* Seeking to the current position should attempt to
2056 * sync the underlying buffer with the current position.
2057 */
2058 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002059 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060 if (cookieObj == NULL)
2061 goto fail;
2062 }
2063 else if (whence == 2) {
2064 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002065 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 if (cmp < 0)
2067 goto fail;
2068
2069 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002070 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002071 goto fail;
2072 }
2073
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002074 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002075 if (res == NULL)
2076 goto fail;
2077 Py_DECREF(res);
2078
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002079 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 Py_CLEAR(self->snapshot);
2081 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002082 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 if (res == NULL)
2084 goto fail;
2085 Py_DECREF(res);
2086 }
2087
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002088 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 Py_XDECREF(cookieObj);
2090 return res;
2091 }
2092 else if (whence != 0) {
2093 PyErr_Format(PyExc_ValueError,
2094 "invalid whence (%d, should be 0, 1 or 2)", whence);
2095 goto fail;
2096 }
2097
Antoine Pitroue4501852009-05-14 18:55:55 +00002098 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002099 if (cmp < 0)
2100 goto fail;
2101
2102 if (cmp == 1) {
2103 PyErr_Format(PyExc_ValueError,
2104 "negative seek position %R", cookieObj);
2105 goto fail;
2106 }
2107
2108 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2109 if (res == NULL)
2110 goto fail;
2111 Py_DECREF(res);
2112
2113 /* The strategy of seek() is to go back to the safe start point
2114 * and replay the effect of read(chars_to_skip) from there.
2115 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002116 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002117 goto fail;
2118
2119 /* Seek back to the safe start point. */
2120 posobj = PyLong_FromOff_t(cookie.start_pos);
2121 if (posobj == NULL)
2122 goto fail;
2123 res = PyObject_CallMethodObjArgs(self->buffer,
2124 _PyIO_str_seek, posobj, NULL);
2125 Py_DECREF(posobj);
2126 if (res == NULL)
2127 goto fail;
2128 Py_DECREF(res);
2129
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002130 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002131 Py_CLEAR(self->snapshot);
2132
2133 /* Restore the decoder to its state from the safe start point. */
2134 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002135 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002136 goto fail;
2137 }
2138
2139 if (cookie.chars_to_skip) {
2140 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002141 PyObject *input_chunk = _PyObject_CallMethodId(
2142 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 PyObject *decoded;
2144
2145 if (input_chunk == NULL)
2146 goto fail;
2147
2148 assert (PyBytes_Check(input_chunk));
2149
2150 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2151 if (self->snapshot == NULL) {
2152 Py_DECREF(input_chunk);
2153 goto fail;
2154 }
2155
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002156 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2157 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158
2159 if (decoded == NULL)
2160 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002161 if (PyUnicode_READY(decoded) == -1) {
2162 Py_DECREF(decoded);
2163 goto fail;
2164 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002165
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002166 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167
2168 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002169 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2171 goto fail;
2172 }
2173 self->decoded_chars_used = cookie.chars_to_skip;
2174 }
2175 else {
2176 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2177 if (self->snapshot == NULL)
2178 goto fail;
2179 }
2180
Antoine Pitroue4501852009-05-14 18:55:55 +00002181 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2182 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002183 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002184 goto fail;
2185 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002186 return cookieObj;
2187 fail:
2188 Py_XDECREF(cookieObj);
2189 return NULL;
2190
2191}
2192
2193static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002194textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195{
2196 PyObject *res;
2197 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002198 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199 PyObject *next_input;
2200 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002201 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 PyObject *saved_state = NULL;
2203 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002204 char *dec_buffer;
2205 Py_ssize_t dec_buffer_len;
2206 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002207
2208 CHECK_INITIALIZED(self);
2209 CHECK_CLOSED(self);
2210
2211 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002212 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213 goto fail;
2214 }
2215 if (!self->telling) {
2216 PyErr_SetString(PyExc_IOError,
2217 "telling position disabled by next() call");
2218 goto fail;
2219 }
2220
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002221 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002223 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224 if (res == NULL)
2225 goto fail;
2226 Py_DECREF(res);
2227
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002228 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 if (posobj == NULL)
2230 goto fail;
2231
2232 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002233 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002234 return posobj;
2235 }
2236
2237#if defined(HAVE_LARGEFILE_SUPPORT)
2238 cookie.start_pos = PyLong_AsLongLong(posobj);
2239#else
2240 cookie.start_pos = PyLong_AsLong(posobj);
2241#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002242 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 if (PyErr_Occurred())
2244 goto fail;
2245
2246 /* Skip backward to the snapshot point (see _read_chunk). */
2247 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2248 goto fail;
2249
2250 assert (PyBytes_Check(next_input));
2251
2252 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2253
2254 /* How many decoded characters have been used up since the snapshot? */
2255 if (self->decoded_chars_used == 0) {
2256 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002257 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258 }
2259
2260 chars_to_skip = self->decoded_chars_used;
2261
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002262 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2264 _PyIO_str_getstate, NULL);
2265 if (saved_state == NULL)
2266 goto fail;
2267
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002268#define DECODER_GETSTATE() do { \
2269 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2270 _PyIO_str_getstate, NULL); \
2271 if (_state == NULL) \
2272 goto fail; \
2273 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2274 Py_DECREF(_state); \
2275 goto fail; \
2276 } \
2277 Py_DECREF(_state); \
2278 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002279
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002280 /* TODO: replace assert with exception */
2281#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002282 PyObject *_decoded = _PyObject_CallMethodId( \
2283 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002284 if (_decoded == NULL) \
2285 goto fail; \
2286 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002287 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002288 Py_DECREF(_decoded); \
2289 } while (0)
2290
2291 /* Fast search for an acceptable start point, close to our
2292 current pos */
2293 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2294 skip_back = 1;
2295 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2296 input = PyBytes_AS_STRING(next_input);
2297 while (skip_bytes > 0) {
2298 /* Decode up to temptative start point */
2299 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2300 goto fail;
2301 DECODER_DECODE(input, skip_bytes, chars_decoded);
2302 if (chars_decoded <= chars_to_skip) {
2303 DECODER_GETSTATE();
2304 if (dec_buffer_len == 0) {
2305 /* Before pos and no bytes buffered in decoder => OK */
2306 cookie.dec_flags = dec_flags;
2307 chars_to_skip -= chars_decoded;
2308 break;
2309 }
2310 /* Skip back by buffered amount and reset heuristic */
2311 skip_bytes -= dec_buffer_len;
2312 skip_back = 1;
2313 }
2314 else {
2315 /* We're too far ahead, skip back a bit */
2316 skip_bytes -= skip_back;
2317 skip_back *= 2;
2318 }
2319 }
2320 if (skip_bytes <= 0) {
2321 skip_bytes = 0;
2322 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2323 goto fail;
2324 }
2325
2326 /* Note our initial start point. */
2327 cookie.start_pos += skip_bytes;
2328 cookie.chars_to_skip = chars_to_skip;
2329 if (chars_to_skip == 0)
2330 goto finally;
2331
2332 /* We should be close to the desired position. Now feed the decoder one
2333 * byte at a time until we reach the `chars_to_skip` target.
2334 * As we go, note the nearest "safe start point" before the current
2335 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002336 * can safely start from there and advance to this location).
2337 */
2338 chars_decoded = 0;
2339 input = PyBytes_AS_STRING(next_input);
2340 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002341 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002342 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002343 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002344
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002345 DECODER_DECODE(input, 1, n);
2346 /* We got n chars for 1 byte */
2347 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002349 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350
2351 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2352 /* Decoder buffer is empty, so this is a safe start point. */
2353 cookie.start_pos += cookie.bytes_to_feed;
2354 chars_to_skip -= chars_decoded;
2355 cookie.dec_flags = dec_flags;
2356 cookie.bytes_to_feed = 0;
2357 chars_decoded = 0;
2358 }
2359 if (chars_decoded >= chars_to_skip)
2360 break;
2361 input++;
2362 }
2363 if (input == input_end) {
2364 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002365 PyObject *decoded = _PyObject_CallMethodId(
2366 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002367 if (decoded == NULL)
2368 goto fail;
2369 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002370 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371 Py_DECREF(decoded);
2372 cookie.need_eof = 1;
2373
2374 if (chars_decoded < chars_to_skip) {
2375 PyErr_SetString(PyExc_IOError,
2376 "can't reconstruct logical file position");
2377 goto fail;
2378 }
2379 }
2380
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002381finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002382 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002383 Py_DECREF(saved_state);
2384 if (res == NULL)
2385 return NULL;
2386 Py_DECREF(res);
2387
2388 /* The returned cookie corresponds to the last safe start point. */
2389 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002390 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002392fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002393 if (saved_state) {
2394 PyObject *type, *value, *traceback;
2395 PyErr_Fetch(&type, &value, &traceback);
2396
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002397 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 Py_DECREF(saved_state);
2399 if (res == NULL)
2400 return NULL;
2401 Py_DECREF(res);
2402
2403 PyErr_Restore(type, value, traceback);
2404 }
2405 return NULL;
2406}
2407
2408static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002409textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410{
2411 PyObject *pos = Py_None;
2412 PyObject *res;
2413
2414 CHECK_INITIALIZED(self)
2415 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2416 return NULL;
2417 }
2418
2419 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2420 if (res == NULL)
2421 return NULL;
2422 Py_DECREF(res);
2423
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002424 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425}
2426
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002427static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002428textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002429{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002430 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002431
2432 CHECK_INITIALIZED(self);
2433
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002434 res = PyUnicode_FromString("<_io.TextIOWrapper");
2435 if (res == NULL)
2436 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002437 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002438 if (nameobj == NULL) {
2439 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2440 PyErr_Clear();
2441 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002442 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002443 }
2444 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002445 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002446 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002447 if (s == NULL)
2448 goto error;
2449 PyUnicode_AppendAndDel(&res, s);
2450 if (res == NULL)
2451 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002452 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002453 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002454 if (modeobj == NULL) {
2455 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2456 PyErr_Clear();
2457 else
2458 goto error;
2459 }
2460 else {
2461 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2462 Py_DECREF(modeobj);
2463 if (s == NULL)
2464 goto error;
2465 PyUnicode_AppendAndDel(&res, s);
2466 if (res == NULL)
2467 return NULL;
2468 }
2469 s = PyUnicode_FromFormat("%U encoding=%R>",
2470 res, self->encoding);
2471 Py_DECREF(res);
2472 return s;
2473error:
2474 Py_XDECREF(res);
2475 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002476}
2477
2478
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002479/* Inquiries */
2480
2481static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002482textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483{
2484 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002485 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486}
2487
2488static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002489textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490{
2491 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002492 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493}
2494
2495static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002496textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497{
2498 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002499 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500}
2501
2502static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002506 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507}
2508
2509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002510textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511{
2512 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002513 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514}
2515
2516static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002517textiowrapper_getstate(textio *self, PyObject *args)
2518{
2519 PyErr_Format(PyExc_TypeError,
2520 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2521 return NULL;
2522}
2523
2524static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002525textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526{
2527 CHECK_INITIALIZED(self);
2528 CHECK_CLOSED(self);
2529 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002530 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002532 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533}
2534
2535static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002536textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537{
2538 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002539 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541
Antoine Pitrou6be88762010-05-03 16:48:20 +00002542 res = textiowrapper_closed_get(self, NULL);
2543 if (res == NULL)
2544 return NULL;
2545 r = PyObject_IsTrue(res);
2546 Py_DECREF(res);
2547 if (r < 0)
2548 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002549
Antoine Pitrou6be88762010-05-03 16:48:20 +00002550 if (r > 0) {
2551 Py_RETURN_NONE; /* stream already closed */
2552 }
2553 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002554 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002555 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002556 if (res)
2557 Py_DECREF(res);
2558 else
2559 PyErr_Clear();
2560 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002561 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002562 if (res == NULL) {
2563 return NULL;
2564 }
2565 else
2566 Py_DECREF(res);
2567
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002568 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002569 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002570}
2571
2572static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002573textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002574{
2575 PyObject *line;
2576
2577 CHECK_INITIALIZED(self);
2578
2579 self->telling = 0;
2580 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2581 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002582 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002583 }
2584 else {
2585 line = PyObject_CallMethodObjArgs((PyObject *)self,
2586 _PyIO_str_readline, NULL);
2587 if (line && !PyUnicode_Check(line)) {
2588 PyErr_Format(PyExc_IOError,
2589 "readline() should have returned an str object, "
2590 "not '%.200s'", Py_TYPE(line)->tp_name);
2591 Py_DECREF(line);
2592 return NULL;
2593 }
2594 }
2595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002596 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597 return NULL;
2598
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002599 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600 /* Reached EOF or would have blocked */
2601 Py_DECREF(line);
2602 Py_CLEAR(self->snapshot);
2603 self->telling = self->seekable;
2604 return NULL;
2605 }
2606
2607 return line;
2608}
2609
2610static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002611textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612{
2613 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002614 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615}
2616
2617static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002618textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619{
2620 CHECK_INITIALIZED(self);
2621 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2622}
2623
2624static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002625textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626{
2627 PyObject *res;
2628 CHECK_INITIALIZED(self);
2629 if (self->decoder == NULL)
2630 Py_RETURN_NONE;
2631 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2632 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002633 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2634 PyErr_Clear();
2635 Py_RETURN_NONE;
2636 }
2637 else {
2638 return NULL;
2639 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640 }
2641 return res;
2642}
2643
2644static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002645textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002646{
2647 CHECK_INITIALIZED(self);
2648 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2649}
2650
2651static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002652textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653{
2654 CHECK_INITIALIZED(self);
2655 return PyLong_FromSsize_t(self->chunk_size);
2656}
2657
2658static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002659textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660{
2661 Py_ssize_t n;
2662 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002663 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002664 if (n == -1 && PyErr_Occurred())
2665 return -1;
2666 if (n <= 0) {
2667 PyErr_SetString(PyExc_ValueError,
2668 "a strictly positive integer is required");
2669 return -1;
2670 }
2671 self->chunk_size = n;
2672 return 0;
2673}
2674
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002675static PyMethodDef textiowrapper_methods[] = {
2676 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2677 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2678 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2679 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2680 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2681 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002683 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2684 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2685 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2686 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2687 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002688 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002690 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2691 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2692 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002693 {NULL, NULL}
2694};
2695
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696static PyMemberDef textiowrapper_members[] = {
2697 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2698 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2699 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 {NULL}
2701};
2702
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002703static PyGetSetDef textiowrapper_getset[] = {
2704 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2705 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2707*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002708 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2709 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2710 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2711 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002712 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002713};
2714
2715PyTypeObject PyTextIOWrapper_Type = {
2716 PyVarObject_HEAD_INIT(NULL, 0)
2717 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002720 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002721 0, /*tp_print*/
2722 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002723 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002725 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002726 0, /*tp_as_number*/
2727 0, /*tp_as_sequence*/
2728 0, /*tp_as_mapping*/
2729 0, /*tp_hash */
2730 0, /*tp_call*/
2731 0, /*tp_str*/
2732 0, /*tp_getattro*/
2733 0, /*tp_setattro*/
2734 0, /*tp_as_buffer*/
2735 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2736 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002737 textiowrapper_doc, /* tp_doc */
2738 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2739 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002740 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002741 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002742 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002743 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2744 textiowrapper_methods, /* tp_methods */
2745 textiowrapper_members, /* tp_members */
2746 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002747 0, /* tp_base */
2748 0, /* tp_dict */
2749 0, /* tp_descr_get */
2750 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002751 offsetof(textio, dict), /*tp_dictoffset*/
2752 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002753 0, /* tp_alloc */
2754 PyType_GenericNew, /* tp_new */
2755};