blob: 2db37d3fbe8e13cdd982b93ed4a46ad3ea8a2c6d [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
Antoine Pitrou796564c2013-07-30 19:59:21 +0200176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
177 | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000178 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000179 0, /* tp_traverse */
180 0, /* tp_clear */
181 0, /* tp_richcompare */
182 0, /* tp_weaklistoffset */
183 0, /* tp_iter */
184 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000185 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000186 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000187 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000188 &PyIOBase_Type, /* tp_base */
189 0, /* tp_dict */
190 0, /* tp_descr_get */
191 0, /* tp_descr_set */
192 0, /* tp_dictoffset */
193 0, /* tp_init */
194 0, /* tp_alloc */
195 0, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +0200196 0, /* tp_free */
197 0, /* tp_is_gc */
198 0, /* tp_bases */
199 0, /* tp_mro */
200 0, /* tp_cache */
201 0, /* tp_subclasses */
202 0, /* tp_weaklist */
203 0, /* tp_del */
204 0, /* tp_version_tag */
205 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000206};
207
208
209/* IncrementalNewlineDecoder */
210
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000211PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000212 "Codec used when reading a file in universal newlines mode. It wraps\n"
213 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
214 "records the types of newlines encountered. When used with\n"
215 "translate=False, it ensures that the newline sequence is returned in\n"
216 "one piece. When used with decoder=None, it expects unicode strings as\n"
217 "decode input and translates newlines without first invoking an external\n"
218 "decoder.\n"
219 );
220
221typedef struct {
222 PyObject_HEAD
223 PyObject *decoder;
224 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000225 signed int pendingcr: 1;
226 signed int translate: 1;
227 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000228} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000229
230static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000231incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000232 PyObject *args, PyObject *kwds)
233{
234 PyObject *decoder;
235 int translate;
236 PyObject *errors = NULL;
237 char *kwlist[] = {"decoder", "translate", "errors", NULL};
238
239 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
240 kwlist, &decoder, &translate, &errors))
241 return -1;
242
243 self->decoder = decoder;
244 Py_INCREF(decoder);
245
246 if (errors == NULL) {
247 self->errors = PyUnicode_FromString("strict");
248 if (self->errors == NULL)
249 return -1;
250 }
251 else {
252 Py_INCREF(errors);
253 self->errors = errors;
254 }
255
256 self->translate = translate;
257 self->seennl = 0;
258 self->pendingcr = 0;
259
260 return 0;
261}
262
263static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000264incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000265{
266 Py_CLEAR(self->decoder);
267 Py_CLEAR(self->errors);
268 Py_TYPE(self)->tp_free((PyObject *)self);
269}
270
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200271static int
272check_decoded(PyObject *decoded)
273{
274 if (decoded == NULL)
275 return -1;
276 if (!PyUnicode_Check(decoded)) {
277 PyErr_Format(PyExc_TypeError,
278 "decoder should return a string result, not '%.200s'",
279 Py_TYPE(decoded)->tp_name);
280 Py_DECREF(decoded);
281 return -1;
282 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200283 if (PyUnicode_READY(decoded) < 0) {
284 Py_DECREF(decoded);
285 return -1;
286 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200287 return 0;
288}
289
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000290#define SEEN_CR 1
291#define SEEN_LF 2
292#define SEEN_CRLF 4
293#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
294
295PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000296_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000297 PyObject *input, int final)
298{
299 PyObject *output;
300 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000301 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302
303 if (self->decoder == NULL) {
304 PyErr_SetString(PyExc_ValueError,
305 "IncrementalNewlineDecoder.__init__ not called");
306 return NULL;
307 }
308
309 /* decode input (with the eventual \r from a previous pass) */
310 if (self->decoder != Py_None) {
311 output = PyObject_CallMethodObjArgs(self->decoder,
312 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
313 }
314 else {
315 output = input;
316 Py_INCREF(output);
317 }
318
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200319 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 return NULL;
321
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000323 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200324 /* Prefix output with CR */
325 int kind;
326 PyObject *modified;
327 char *out;
328
329 modified = PyUnicode_New(output_len + 1,
330 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000331 if (modified == NULL)
332 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200333 kind = PyUnicode_KIND(modified);
334 out = PyUnicode_DATA(modified);
335 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200336 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000337 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200338 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000339 self->pendingcr = 0;
340 output_len++;
341 }
342
343 /* retain last \r even when not translating data:
344 * then readline() is sure to get \r\n in one pass
345 */
346 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000347 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200348 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
349 {
350 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
351 if (modified == NULL)
352 goto error;
353 Py_DECREF(output);
354 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000355 self->pendingcr = 1;
356 }
357 }
358
359 /* Record which newlines are read and do newline translation if desired,
360 all in one pass. */
361 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200362 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000363 Py_ssize_t len;
364 int seennl = self->seennl;
365 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200366 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000367
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200368 in_str = PyUnicode_DATA(output);
369 len = PyUnicode_GET_LENGTH(output);
370 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000371
372 if (len == 0)
373 return output;
374
375 /* If, up to now, newlines are consistently \n, do a quick check
376 for the \r *byte* with the libc's optimized memchr.
377 */
378 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200379 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000380 }
381
Antoine Pitrou66913e22009-03-06 23:40:56 +0000382 if (only_lf) {
383 /* If not already seen, quick scan for a possible "\n" character.
384 (there's nothing else to be done, even when in translation mode)
385 */
386 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200387 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100388 if (kind == PyUnicode_1BYTE_KIND)
389 seennl |= SEEN_LF;
390 else {
391 Py_ssize_t i = 0;
392 for (;;) {
393 Py_UCS4 c;
394 /* Fast loop for non-control characters */
395 while (PyUnicode_READ(kind, in_str, i) > '\n')
396 i++;
397 c = PyUnicode_READ(kind, in_str, i++);
398 if (c == '\n') {
399 seennl |= SEEN_LF;
400 break;
401 }
402 if (i >= len)
403 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000404 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000405 }
406 }
407 /* Finished: we have scanned for newlines, and none of them
408 need translating */
409 }
410 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200411 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000412 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000413 if (seennl == SEEN_ALL)
414 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000415 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200416 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000417 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200418 while (PyUnicode_READ(kind, in_str, i) > '\r')
419 i++;
420 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 if (c == '\n')
422 seennl |= SEEN_LF;
423 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200424 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000425 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200426 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000427 }
428 else
429 seennl |= SEEN_CR;
430 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200431 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000432 break;
433 if (seennl == SEEN_ALL)
434 break;
435 }
436 endscan:
437 ;
438 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000439 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 void *translated;
441 int kind = PyUnicode_KIND(output);
442 void *in_str = PyUnicode_DATA(output);
443 Py_ssize_t in, out;
444 /* XXX: Previous in-place translation here is disabled as
445 resizing is not possible anymore */
446 /* We could try to optimize this so that we only do a copy
447 when there is something to translate. On the other hand,
448 we already know there is a \r byte, so chances are high
449 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200450 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 if (translated == NULL) {
452 PyErr_NoMemory();
453 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
460 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000461 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 seennl |= SEEN_LF;
464 continue;
465 }
466 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 in++;
469 seennl |= SEEN_CRLF;
470 }
471 else
472 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200473 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 continue;
475 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200476 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200478 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000479 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200480 Py_DECREF(output);
481 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100482 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200483 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200484 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000485 }
486 self->seennl |= seennl;
487 }
488
489 return output;
490
491 error:
492 Py_DECREF(output);
493 return NULL;
494}
495
496static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000497incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000498 PyObject *args, PyObject *kwds)
499{
500 char *kwlist[] = {"input", "final", NULL};
501 PyObject *input;
502 int final = 0;
503
504 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
505 kwlist, &input, &final))
506 return NULL;
507 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
508}
509
510static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000511incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000512{
513 PyObject *buffer;
514 unsigned PY_LONG_LONG flag;
515
516 if (self->decoder != Py_None) {
517 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
518 _PyIO_str_getstate, NULL);
519 if (state == NULL)
520 return NULL;
521 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
522 Py_DECREF(state);
523 return NULL;
524 }
525 Py_INCREF(buffer);
526 Py_DECREF(state);
527 }
528 else {
529 buffer = PyBytes_FromString("");
530 flag = 0;
531 }
532 flag <<= 1;
533 if (self->pendingcr)
534 flag |= 1;
535 return Py_BuildValue("NK", buffer, flag);
536}
537
538static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000539incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000540{
541 PyObject *buffer;
542 unsigned PY_LONG_LONG flag;
543
544 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
545 return NULL;
546
547 self->pendingcr = (int) flag & 1;
548 flag >>= 1;
549
550 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200551 return _PyObject_CallMethodId(self->decoder,
552 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000553 else
554 Py_RETURN_NONE;
555}
556
557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000558incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559{
560 self->seennl = 0;
561 self->pendingcr = 0;
562 if (self->decoder != Py_None)
563 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
564 else
565 Py_RETURN_NONE;
566}
567
568static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000569incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000570{
571 switch (self->seennl) {
572 case SEEN_CR:
573 return PyUnicode_FromString("\r");
574 case SEEN_LF:
575 return PyUnicode_FromString("\n");
576 case SEEN_CRLF:
577 return PyUnicode_FromString("\r\n");
578 case SEEN_CR | SEEN_LF:
579 return Py_BuildValue("ss", "\r", "\n");
580 case SEEN_CR | SEEN_CRLF:
581 return Py_BuildValue("ss", "\r", "\r\n");
582 case SEEN_LF | SEEN_CRLF:
583 return Py_BuildValue("ss", "\n", "\r\n");
584 case SEEN_CR | SEEN_LF | SEEN_CRLF:
585 return Py_BuildValue("sss", "\r", "\n", "\r\n");
586 default:
587 Py_RETURN_NONE;
588 }
589
590}
591
592
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000593static PyMethodDef incrementalnewlinedecoder_methods[] = {
594 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
595 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
596 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
597 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000598 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599};
600
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000601static PyGetSetDef incrementalnewlinedecoder_getset[] = {
602 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000603 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000604};
605
606PyTypeObject PyIncrementalNewlineDecoder_Type = {
607 PyVarObject_HEAD_INIT(NULL, 0)
608 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000609 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000610 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000611 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000612 0, /*tp_print*/
613 0, /*tp_getattr*/
614 0, /*tp_setattr*/
615 0, /*tp_compare */
616 0, /*tp_repr*/
617 0, /*tp_as_number*/
618 0, /*tp_as_sequence*/
619 0, /*tp_as_mapping*/
620 0, /*tp_hash */
621 0, /*tp_call*/
622 0, /*tp_str*/
623 0, /*tp_getattro*/
624 0, /*tp_setattro*/
625 0, /*tp_as_buffer*/
626 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000627 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000628 0, /* tp_traverse */
629 0, /* tp_clear */
630 0, /* tp_richcompare */
631 0, /*tp_weaklistoffset*/
632 0, /* tp_iter */
633 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000634 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000635 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000636 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000637 0, /* tp_base */
638 0, /* tp_dict */
639 0, /* tp_descr_get */
640 0, /* tp_descr_set */
641 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000642 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000643 0, /* tp_alloc */
644 PyType_GenericNew, /* tp_new */
645};
646
647
648/* TextIOWrapper */
649
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000650PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000651 "Character and line based layer over a BufferedIOBase object, buffer.\n"
652 "\n"
653 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200654 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000655 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400656 "errors determines the strictness of encoding and decoding (see\n"
657 "help(codecs.Codec) or the documentation for codecs.register) and\n"
658 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000659 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200660 "newline controls how line endings are handled. It can be None, '',\n"
661 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
662 "\n"
663 "* On input, if newline is None, universal newlines mode is\n"
664 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
665 " these are translated into '\\n' before being returned to the\n"
666 " caller. If it is '', universal newline mode is enabled, but line\n"
667 " endings are returned to the caller untranslated. If it has any of\n"
668 " the other legal values, input lines are only terminated by the given\n"
669 " string, and the line ending is returned to the caller untranslated.\n"
670 "\n"
671 "* On output, if newline is None, any '\\n' characters written are\n"
672 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300673 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200674 " of the other legal values, any '\\n' characters written are translated\n"
675 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000676 "\n"
677 "If line_buffering is True, a call to flush is implied when a call to\n"
678 "write contains a newline character."
679 );
680
681typedef PyObject *
682 (*encodefunc_t)(PyObject *, PyObject *);
683
684typedef struct
685{
686 PyObject_HEAD
687 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000688 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000689 Py_ssize_t chunk_size;
690 PyObject *buffer;
691 PyObject *encoding;
692 PyObject *encoder;
693 PyObject *decoder;
694 PyObject *readnl;
695 PyObject *errors;
696 const char *writenl; /* utf-8 encoded, NULL stands for \n */
697 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200698 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699 char readuniversal;
700 char readtranslate;
701 char writetranslate;
702 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200703 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 char telling;
Antoine Pitrou796564c2013-07-30 19:59:21 +0200705 char finalizing;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000706 /* Specialized encoding func (see below) */
707 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000708 /* Whether or not it's the start of the stream */
709 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000710
711 /* Reads and writes are internally buffered in order to speed things up.
712 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000713
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714 Please also note that text to be written is first encoded before being
715 buffered. This is necessary so that encoding errors are immediately
716 reported to the caller, but it unfortunately means that the
717 IncrementalEncoder (whose encode() method is always written in Python)
718 becomes a bottleneck for small writes.
719 */
720 PyObject *decoded_chars; /* buffer for text returned from decoder */
721 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
722 PyObject *pending_bytes; /* list of bytes objects waiting to be
723 written, or NULL */
724 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000725
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000726 /* snapshot is either None, or a tuple (dec_flags, next_input) where
727 * dec_flags is the second (integer) item of the decoder state and
728 * next_input is the chunk of input bytes that comes next after the
729 * snapshot point. We use this to reconstruct decoder states in tell().
730 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000731 PyObject *snapshot;
732 /* Bytes-to-characters ratio for the current chunk. Serves as input for
733 the heuristic in tell(). */
734 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000735
736 /* Cache raw object if it's a FileIO object */
737 PyObject *raw;
738
739 PyObject *weakreflist;
740 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000741} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000742
743
744/* A couple of specialized cases in order to bypass the slow incremental
745 encoding methods for the most popular encodings. */
746
747static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000748ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000749{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200750 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751}
752
753static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000754utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000755{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF16(text,
757 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758}
759
760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF16(text,
764 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769{
Antoine Pitroue4501852009-05-14 18:55:55 +0000770 if (!self->encoding_start_of_stream) {
771 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200772#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000773 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000774#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000775 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000776#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000777 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100778 return _PyUnicode_EncodeUTF16(text,
779 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000780}
781
Antoine Pitroue4501852009-05-14 18:55:55 +0000782static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000783utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000784{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100785 return _PyUnicode_EncodeUTF32(text,
786 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000787}
788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000791{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100792 return _PyUnicode_EncodeUTF32(text,
793 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000794}
795
796static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000797utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000798{
799 if (!self->encoding_start_of_stream) {
800 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200801#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000802 return utf32be_encode(self, text);
803#else
804 return utf32le_encode(self, text);
805#endif
806 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100807 return _PyUnicode_EncodeUTF32(text,
808 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000809}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810
811static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000812utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000813{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200814 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000815}
816
817static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000818latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200820 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000821}
822
823/* Map normalized encoding names onto the specialized encoding funcs */
824
825typedef struct {
826 const char *name;
827 encodefunc_t encodefunc;
828} encodefuncentry;
829
Antoine Pitrou24f36292009-03-28 22:16:42 +0000830static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000831 {"ascii", (encodefunc_t) ascii_encode},
832 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000833 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834 {"utf-16-be", (encodefunc_t) utf16be_encode},
835 {"utf-16-le", (encodefunc_t) utf16le_encode},
836 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000837 {"utf-32-be", (encodefunc_t) utf32be_encode},
838 {"utf-32-le", (encodefunc_t) utf32le_encode},
839 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000840 {NULL, NULL}
841};
842
843
844static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000845textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000846{
847 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200848 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000849 NULL};
850 PyObject *buffer, *raw;
851 char *encoding = NULL;
852 char *errors = NULL;
853 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200854 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000855 _PyIO_State *state = IO_STATE;
856
857 PyObject *res;
858 int r;
859
860 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000861 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200862 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000863 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200864 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000865 return -1;
866
867 if (newline && newline[0] != '\0'
868 && !(newline[0] == '\n' && newline[1] == '\0')
869 && !(newline[0] == '\r' && newline[1] == '\0')
870 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
871 PyErr_Format(PyExc_ValueError,
872 "illegal newline value: %s", newline);
873 return -1;
874 }
875
876 Py_CLEAR(self->buffer);
877 Py_CLEAR(self->encoding);
878 Py_CLEAR(self->encoder);
879 Py_CLEAR(self->decoder);
880 Py_CLEAR(self->readnl);
881 Py_CLEAR(self->decoded_chars);
882 Py_CLEAR(self->pending_bytes);
883 Py_CLEAR(self->snapshot);
884 Py_CLEAR(self->errors);
885 Py_CLEAR(self->raw);
886 self->decoded_chars_used = 0;
887 self->pending_bytes_count = 0;
888 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000889 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000890
891 if (encoding == NULL) {
892 /* Try os.device_encoding(fileno) */
893 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200894 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000895 /* Ignore only AttributeError and UnsupportedOperation */
896 if (fileno == NULL) {
897 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
898 PyErr_ExceptionMatches(state->unsupported_operation)) {
899 PyErr_Clear();
900 }
901 else {
902 goto error;
903 }
904 }
905 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200906 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500907 Py_DECREF(fileno);
908 if (fd == -1 && PyErr_Occurred()) {
909 goto error;
910 }
911
912 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000913 if (self->encoding == NULL)
914 goto error;
915 else if (!PyUnicode_Check(self->encoding))
916 Py_CLEAR(self->encoding);
917 }
918 }
919 if (encoding == NULL && self->encoding == NULL) {
Antoine Pitrou932ff832013-08-01 21:04:50 +0200920 PyObject *locale_module = _PyIO_get_locale_module(state);
921 if (locale_module == NULL)
922 goto catch_ImportError;
923 self->encoding = _PyObject_CallMethodId(
924 locale_module, &PyId_getpreferredencoding, "O", Py_False);
925 Py_DECREF(locale_module);
926 if (self->encoding == NULL) {
927 catch_ImportError:
928 /*
929 Importing locale can raise a ImportError because of
930 _functools, and locale.getpreferredencoding can raise a
931 ImportError if _locale is not available. These will happen
932 during module building.
933 */
934 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
935 PyErr_Clear();
936 self->encoding = PyUnicode_FromString("ascii");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000937 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200938 else
939 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 }
Antoine Pitrou932ff832013-08-01 21:04:50 +0200941 else if (!PyUnicode_Check(self->encoding))
942 Py_CLEAR(self->encoding);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000944 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000945 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000946 if (encoding == NULL)
947 goto error;
948 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000949 else if (encoding != NULL) {
950 self->encoding = PyUnicode_FromString(encoding);
951 if (self->encoding == NULL)
952 goto error;
953 }
954 else {
955 PyErr_SetString(PyExc_IOError,
956 "could not determine default encoding");
957 }
958
959 if (errors == NULL)
960 errors = "strict";
961 self->errors = PyBytes_FromString(errors);
962 if (self->errors == NULL)
963 goto error;
964
965 self->chunk_size = 8192;
966 self->readuniversal = (newline == NULL || newline[0] == '\0');
967 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200968 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000969 self->readtranslate = (newline == NULL);
970 if (newline) {
971 self->readnl = PyUnicode_FromString(newline);
972 if (self->readnl == NULL)
973 return -1;
974 }
975 self->writetranslate = (newline == NULL || newline[0] != '\0');
976 if (!self->readuniversal && self->readnl) {
977 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000978 if (self->writenl == NULL)
979 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000980 if (!strcmp(self->writenl, "\n"))
981 self->writenl = NULL;
982 }
983#ifdef MS_WINDOWS
984 else
985 self->writenl = "\r\n";
986#endif
987
988 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200989 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000990 if (res == NULL)
991 goto error;
992 r = PyObject_IsTrue(res);
993 Py_DECREF(res);
994 if (r == -1)
995 goto error;
996 if (r == 1) {
997 self->decoder = PyCodec_IncrementalDecoder(
998 encoding, errors);
999 if (self->decoder == NULL)
1000 goto error;
1001
1002 if (self->readuniversal) {
1003 PyObject *incrementalDecoder = PyObject_CallFunction(
1004 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1005 "Oi", self->decoder, (int)self->readtranslate);
1006 if (incrementalDecoder == NULL)
1007 goto error;
1008 Py_CLEAR(self->decoder);
1009 self->decoder = incrementalDecoder;
1010 }
1011 }
1012
1013 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001014 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001015 if (res == NULL)
1016 goto error;
1017 r = PyObject_IsTrue(res);
1018 Py_DECREF(res);
1019 if (r == -1)
1020 goto error;
1021 if (r == 1) {
1022 PyObject *ci;
1023 self->encoder = PyCodec_IncrementalEncoder(
1024 encoding, errors);
1025 if (self->encoder == NULL)
1026 goto error;
1027 /* Get the normalized named of the codec */
1028 ci = _PyCodec_Lookup(encoding);
1029 if (ci == NULL)
1030 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001031 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001032 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001033 if (res == NULL) {
1034 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1035 PyErr_Clear();
1036 else
1037 goto error;
1038 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 else if (PyUnicode_Check(res)) {
1040 encodefuncentry *e = encodefuncs;
1041 while (e->name != NULL) {
1042 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1043 self->encodefunc = e->encodefunc;
1044 break;
1045 }
1046 e++;
1047 }
1048 }
1049 Py_XDECREF(res);
1050 }
1051
1052 self->buffer = buffer;
1053 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001054
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001055 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1056 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1057 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001058 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001059 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001060 if (raw == NULL) {
1061 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1062 PyErr_Clear();
1063 else
1064 goto error;
1065 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001066 else if (Py_TYPE(raw) == &PyFileIO_Type)
1067 self->raw = raw;
1068 else
1069 Py_DECREF(raw);
1070 }
1071
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001072 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001073 if (res == NULL)
1074 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001075 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001076 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001077 if (r < 0)
1078 goto error;
1079 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001080
Martin v. Löwis767046a2011-10-14 15:35:36 +02001081 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001082
Antoine Pitroue4501852009-05-14 18:55:55 +00001083 self->encoding_start_of_stream = 0;
1084 if (self->seekable && self->encoder) {
1085 PyObject *cookieObj;
1086 int cmp;
1087
1088 self->encoding_start_of_stream = 1;
1089
1090 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1091 if (cookieObj == NULL)
1092 goto error;
1093
1094 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1095 Py_DECREF(cookieObj);
1096 if (cmp < 0) {
1097 goto error;
1098 }
1099
1100 if (cmp == 0) {
1101 self->encoding_start_of_stream = 0;
1102 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1103 _PyIO_zero, NULL);
1104 if (res == NULL)
1105 goto error;
1106 Py_DECREF(res);
1107 }
1108 }
1109
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001110 self->ok = 1;
1111 return 0;
1112
1113 error:
1114 return -1;
1115}
1116
1117static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001118_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001119{
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001120 self->ok = 0;
1121 Py_CLEAR(self->buffer);
1122 Py_CLEAR(self->encoding);
1123 Py_CLEAR(self->encoder);
1124 Py_CLEAR(self->decoder);
1125 Py_CLEAR(self->readnl);
1126 Py_CLEAR(self->decoded_chars);
1127 Py_CLEAR(self->pending_bytes);
1128 Py_CLEAR(self->snapshot);
1129 Py_CLEAR(self->errors);
1130 Py_CLEAR(self->raw);
1131 return 0;
1132}
1133
1134static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001135textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136{
Antoine Pitrou796564c2013-07-30 19:59:21 +02001137 self->finalizing = 1;
1138 if (_PyIOBase_finalize((PyObject *) self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001139 return;
Antoine Pitrou796564c2013-07-30 19:59:21 +02001140 _textiowrapper_clear(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001141 _PyObject_GC_UNTRACK(self);
1142 if (self->weakreflist != NULL)
1143 PyObject_ClearWeakRefs((PyObject *)self);
1144 Py_CLEAR(self->dict);
1145 Py_TYPE(self)->tp_free((PyObject *)self);
1146}
1147
1148static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001149textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001150{
1151 Py_VISIT(self->buffer);
1152 Py_VISIT(self->encoding);
1153 Py_VISIT(self->encoder);
1154 Py_VISIT(self->decoder);
1155 Py_VISIT(self->readnl);
1156 Py_VISIT(self->decoded_chars);
1157 Py_VISIT(self->pending_bytes);
1158 Py_VISIT(self->snapshot);
1159 Py_VISIT(self->errors);
1160 Py_VISIT(self->raw);
1161
1162 Py_VISIT(self->dict);
1163 return 0;
1164}
1165
1166static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001167textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001168{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001169 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001170 return -1;
1171 Py_CLEAR(self->dict);
1172 return 0;
1173}
1174
1175static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001176textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001177
1178/* This macro takes some shortcuts to make the common case faster. */
1179#define CHECK_CLOSED(self) \
1180 do { \
1181 int r; \
1182 PyObject *_res; \
1183 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1184 if (self->raw != NULL) \
1185 r = _PyFileIO_closed(self->raw); \
1186 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001187 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001188 if (_res == NULL) \
1189 return NULL; \
1190 r = PyObject_IsTrue(_res); \
1191 Py_DECREF(_res); \
1192 if (r < 0) \
1193 return NULL; \
1194 } \
1195 if (r > 0) { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on closed file."); \
1198 return NULL; \
1199 } \
1200 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001201 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001202 return NULL; \
1203 } while (0)
1204
1205#define CHECK_INITIALIZED(self) \
1206 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001207 if (self->detached) { \
1208 PyErr_SetString(PyExc_ValueError, \
1209 "underlying buffer has been detached"); \
1210 } else { \
1211 PyErr_SetString(PyExc_ValueError, \
1212 "I/O operation on uninitialized object"); \
1213 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001214 return NULL; \
1215 }
1216
1217#define CHECK_INITIALIZED_INT(self) \
1218 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001219 if (self->detached) { \
1220 PyErr_SetString(PyExc_ValueError, \
1221 "underlying buffer has been detached"); \
1222 } else { \
1223 PyErr_SetString(PyExc_ValueError, \
1224 "I/O operation on uninitialized object"); \
1225 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001226 return -1; \
1227 }
1228
1229
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001230static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001231textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001232{
1233 PyObject *buffer, *res;
1234 CHECK_INITIALIZED(self);
1235 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1236 if (res == NULL)
1237 return NULL;
1238 Py_DECREF(res);
1239 buffer = self->buffer;
1240 self->buffer = NULL;
1241 self->detached = 1;
1242 self->ok = 0;
1243 return buffer;
1244}
1245
Antoine Pitrou24f36292009-03-28 22:16:42 +00001246/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247 underlying buffered object, though. */
1248static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001249_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001250{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001251 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001252
1253 if (self->pending_bytes == NULL)
1254 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001255
1256 pending = self->pending_bytes;
1257 Py_INCREF(pending);
1258 self->pending_bytes_count = 0;
1259 Py_CLEAR(self->pending_bytes);
1260
1261 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1262 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001263 if (b == NULL)
1264 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001265 ret = NULL;
1266 do {
1267 ret = PyObject_CallMethodObjArgs(self->buffer,
1268 _PyIO_str_write, b, NULL);
1269 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 Py_DECREF(b);
1271 if (ret == NULL)
1272 return -1;
1273 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274 return 0;
1275}
1276
1277static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001278textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001279{
1280 PyObject *ret;
1281 PyObject *text; /* owned reference */
1282 PyObject *b;
1283 Py_ssize_t textlen;
1284 int haslf = 0;
1285 int needflush = 0;
1286
1287 CHECK_INITIALIZED(self);
1288
1289 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1290 return NULL;
1291 }
1292
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001293 if (PyUnicode_READY(text) == -1)
1294 return NULL;
1295
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 CHECK_CLOSED(self);
1297
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001298 if (self->encoder == NULL)
1299 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001300
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001301 Py_INCREF(text);
1302
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001303 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001304
1305 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001306 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 haslf = 1;
1308
1309 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001310 PyObject *newtext = _PyObject_CallMethodId(
1311 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 Py_DECREF(text);
1313 if (newtext == NULL)
1314 return NULL;
1315 text = newtext;
1316 }
1317
Antoine Pitroue96ec682011-07-23 21:46:35 +02001318 if (self->write_through)
1319 needflush = 1;
1320 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001321 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001322 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 needflush = 1;
1324
1325 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001326 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001327 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001328 self->encoding_start_of_stream = 0;
1329 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001330 else
1331 b = PyObject_CallMethodObjArgs(self->encoder,
1332 _PyIO_str_encode, text, NULL);
1333 Py_DECREF(text);
1334 if (b == NULL)
1335 return NULL;
1336
1337 if (self->pending_bytes == NULL) {
1338 self->pending_bytes = PyList_New(0);
1339 if (self->pending_bytes == NULL) {
1340 Py_DECREF(b);
1341 return NULL;
1342 }
1343 self->pending_bytes_count = 0;
1344 }
1345 if (PyList_Append(self->pending_bytes, b) < 0) {
1346 Py_DECREF(b);
1347 return NULL;
1348 }
1349 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1350 Py_DECREF(b);
1351 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001352 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001353 return NULL;
1354 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001355
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001356 if (needflush) {
1357 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1358 if (ret == NULL)
1359 return NULL;
1360 Py_DECREF(ret);
1361 }
1362
1363 Py_CLEAR(self->snapshot);
1364
1365 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001366 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001367 if (ret == NULL)
1368 return NULL;
1369 Py_DECREF(ret);
1370 }
1371
1372 return PyLong_FromSsize_t(textlen);
1373}
1374
1375/* Steal a reference to chars and store it in the decoded_char buffer;
1376 */
1377static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001378textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001379{
1380 Py_CLEAR(self->decoded_chars);
1381 self->decoded_chars = chars;
1382 self->decoded_chars_used = 0;
1383}
1384
1385static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001386textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001387{
1388 PyObject *chars;
1389 Py_ssize_t avail;
1390
1391 if (self->decoded_chars == NULL)
1392 return PyUnicode_FromStringAndSize(NULL, 0);
1393
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001394 /* decoded_chars is guaranteed to be "ready". */
1395 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001396 - self->decoded_chars_used);
1397
1398 assert(avail >= 0);
1399
1400 if (n < 0 || n > avail)
1401 n = avail;
1402
1403 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001404 chars = PyUnicode_Substring(self->decoded_chars,
1405 self->decoded_chars_used,
1406 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001407 if (chars == NULL)
1408 return NULL;
1409 }
1410 else {
1411 chars = self->decoded_chars;
1412 Py_INCREF(chars);
1413 }
1414
1415 self->decoded_chars_used += n;
1416 return chars;
1417}
1418
1419/* Read and decode the next chunk of data from the BufferedReader.
1420 */
1421static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001422textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001423{
1424 PyObject *dec_buffer = NULL;
1425 PyObject *dec_flags = NULL;
1426 PyObject *input_chunk = NULL;
1427 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001428 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001429 int eof;
1430
1431 /* The return value is True unless EOF was reached. The decoded string is
1432 * placed in self._decoded_chars (replacing its previous value). The
1433 * entire input chunk is sent to the decoder, though some of it may remain
1434 * buffered in the decoder, yet to be converted.
1435 */
1436
1437 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001438 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001439 return -1;
1440 }
1441
1442 if (self->telling) {
1443 /* To prepare for tell(), we need to snapshot a point in the file
1444 * where the decoder's input buffer is empty.
1445 */
1446
1447 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1448 _PyIO_str_getstate, NULL);
1449 if (state == NULL)
1450 return -1;
1451 /* Given this, we know there was a valid snapshot point
1452 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1453 */
1454 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1455 Py_DECREF(state);
1456 return -1;
1457 }
1458 Py_INCREF(dec_buffer);
1459 Py_INCREF(dec_flags);
1460 Py_DECREF(state);
1461 }
1462
1463 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001464 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001465 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001466 }
1467 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 if (chunk_size == NULL)
1469 goto fail;
1470 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001471 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1472 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001473 Py_DECREF(chunk_size);
1474 if (input_chunk == NULL)
1475 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001476 if (!PyBytes_Check(input_chunk)) {
1477 PyErr_Format(PyExc_TypeError,
1478 "underlying %s() should have returned a bytes object, "
1479 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1480 Py_TYPE(input_chunk)->tp_name);
1481 goto fail;
1482 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001483
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001484 nbytes = PyBytes_Size(input_chunk);
1485 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001486
1487 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1488 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1489 self->decoder, input_chunk, eof);
1490 }
1491 else {
1492 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1493 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1494 }
1495
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001496 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001497 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001498 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001499 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001500 if (nchars > 0)
1501 self->b2cratio = (double) nbytes / nchars;
1502 else
1503 self->b2cratio = 0.0;
1504 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001505 eof = 0;
1506
1507 if (self->telling) {
1508 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1509 * next input to be decoded is dec_buffer + input_chunk.
1510 */
1511 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1512 if (next_input == NULL)
1513 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001514 if (!PyBytes_Check(next_input)) {
1515 PyErr_Format(PyExc_TypeError,
1516 "decoder getstate() should have returned a bytes "
1517 "object, not '%.200s'",
1518 Py_TYPE(next_input)->tp_name);
1519 Py_DECREF(next_input);
1520 goto fail;
1521 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001522 Py_DECREF(dec_buffer);
1523 Py_CLEAR(self->snapshot);
1524 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1525 }
1526 Py_DECREF(input_chunk);
1527
1528 return (eof == 0);
1529
1530 fail:
1531 Py_XDECREF(dec_buffer);
1532 Py_XDECREF(dec_flags);
1533 Py_XDECREF(input_chunk);
1534 return -1;
1535}
1536
1537static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001538textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001539{
1540 Py_ssize_t n = -1;
1541 PyObject *result = NULL, *chunks = NULL;
1542
1543 CHECK_INITIALIZED(self);
1544
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001545 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546 return NULL;
1547
1548 CHECK_CLOSED(self);
1549
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001550 if (self->decoder == NULL)
1551 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001552
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001553 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 return NULL;
1555
1556 if (n < 0) {
1557 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001558 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001559 PyObject *decoded;
1560 if (bytes == NULL)
1561 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001562
1563 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1564 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1565 bytes, 1);
1566 else
1567 decoded = PyObject_CallMethodObjArgs(
1568 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001570 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001571 goto fail;
1572
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001573 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001574
1575 if (result == NULL) {
1576 Py_DECREF(decoded);
1577 return NULL;
1578 }
1579
1580 PyUnicode_AppendAndDel(&result, decoded);
1581 if (result == NULL)
1582 goto fail;
1583
1584 Py_CLEAR(self->snapshot);
1585 return result;
1586 }
1587 else {
1588 int res = 1;
1589 Py_ssize_t remaining = n;
1590
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001591 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592 if (result == NULL)
1593 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001594 if (PyUnicode_READY(result) == -1)
1595 goto fail;
1596 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001597
1598 /* Keep reading chunks until we have n characters to return */
1599 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001600 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001601 if (res < 0) {
1602 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1603 when EINTR occurs so we needn't do it ourselves. */
1604 if (_PyIO_trap_eintr()) {
1605 continue;
1606 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001607 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001608 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001609 if (res == 0) /* EOF */
1610 break;
1611 if (chunks == NULL) {
1612 chunks = PyList_New(0);
1613 if (chunks == NULL)
1614 goto fail;
1615 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001616 if (PyUnicode_GET_LENGTH(result) > 0 &&
1617 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001618 goto fail;
1619 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001620 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001621 if (result == NULL)
1622 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001623 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001624 }
1625 if (chunks != NULL) {
1626 if (result != NULL && PyList_Append(chunks, result) < 0)
1627 goto fail;
1628 Py_CLEAR(result);
1629 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1630 if (result == NULL)
1631 goto fail;
1632 Py_CLEAR(chunks);
1633 }
1634 return result;
1635 }
1636 fail:
1637 Py_XDECREF(result);
1638 Py_XDECREF(chunks);
1639 return NULL;
1640}
1641
1642
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001643/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644 that is to the NUL character. Otherwise the function will produce
1645 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001646static char *
1647find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001649 if (kind == PyUnicode_1BYTE_KIND) {
1650 assert(ch < 256);
1651 return (char *) memchr((void *) s, (char) ch, end - s);
1652 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001654 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001655 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001656 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 return s;
1658 if (s == end)
1659 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001660 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 }
1662}
1663
1664Py_ssize_t
1665_PyIO_find_line_ending(
1666 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001667 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001668{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001669 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670
1671 if (translated) {
1672 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001674 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001675 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001676 else {
1677 *consumed = len;
1678 return -1;
1679 }
1680 }
1681 else if (universal) {
1682 /* Universal newline search. Find any of \r, \r\n, \n
1683 * The decoder ensures that \r\n are not split in two pieces
1684 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001685 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001687 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001689 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001691 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001692 if (s >= end) {
1693 *consumed = len;
1694 return -1;
1695 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001697 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001699 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001702 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001703 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001704 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 }
1706 }
1707 }
1708 else {
1709 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001710 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1711 char *nl = PyUnicode_DATA(readnl);
1712 /* Assume that readnl is an ASCII character. */
1713 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001715 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001717 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001718 *consumed = len;
1719 return -1;
1720 }
1721 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001722 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001723 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001724 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if (e < s)
1726 e = s;
1727 while (s < e) {
1728 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 if (pos == NULL || pos >= e)
1731 break;
1732 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001733 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 break;
1735 }
1736 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001737 return (pos - start)/kind + readnl_len;
1738 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001739 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001740 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 if (pos == NULL)
1742 *consumed = len;
1743 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001744 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001745 return -1;
1746 }
1747 }
1748}
1749
1750static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001751_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001752{
1753 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1754 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1755 int res;
1756
1757 CHECK_CLOSED(self);
1758
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001759 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001760 return NULL;
1761
1762 chunked = 0;
1763
1764 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001765 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001766 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001767 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001768 Py_ssize_t consumed = 0;
1769
1770 /* First, get some data if necessary */
1771 res = 1;
1772 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001773 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001774 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001775 if (res < 0) {
1776 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1777 when EINTR occurs so we needn't do it ourselves. */
1778 if (_PyIO_trap_eintr()) {
1779 continue;
1780 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001781 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001782 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001783 if (res == 0)
1784 break;
1785 }
1786 if (res == 0) {
1787 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001788 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001789 Py_CLEAR(self->snapshot);
1790 start = endpos = offset_to_buffer = 0;
1791 break;
1792 }
1793
1794 if (remaining == NULL) {
1795 line = self->decoded_chars;
1796 start = self->decoded_chars_used;
1797 offset_to_buffer = 0;
1798 Py_INCREF(line);
1799 }
1800 else {
1801 assert(self->decoded_chars_used == 0);
1802 line = PyUnicode_Concat(remaining, self->decoded_chars);
1803 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001804 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 Py_CLEAR(remaining);
1806 if (line == NULL)
1807 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001808 if (PyUnicode_READY(line) == -1)
1809 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810 }
1811
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001812 ptr = PyUnicode_DATA(line);
1813 line_len = PyUnicode_GET_LENGTH(line);
1814 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001815
1816 endpos = _PyIO_find_line_ending(
1817 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001818 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001819 ptr + kind * start,
1820 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001821 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001822 if (endpos >= 0) {
1823 endpos += start;
1824 if (limit >= 0 && (endpos - start) + chunked >= limit)
1825 endpos = start + limit - chunked;
1826 break;
1827 }
1828
1829 /* We can put aside up to `endpos` */
1830 endpos = consumed + start;
1831 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1832 /* Didn't find line ending, but reached length limit */
1833 endpos = start + limit - chunked;
1834 break;
1835 }
1836
1837 if (endpos > start) {
1838 /* No line ending seen yet - put aside current data */
1839 PyObject *s;
1840 if (chunks == NULL) {
1841 chunks = PyList_New(0);
1842 if (chunks == NULL)
1843 goto error;
1844 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001845 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001846 if (s == NULL)
1847 goto error;
1848 if (PyList_Append(chunks, s) < 0) {
1849 Py_DECREF(s);
1850 goto error;
1851 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001852 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001853 Py_DECREF(s);
1854 }
1855 /* There may be some remaining bytes we'll have to prepend to the
1856 next chunk of data */
1857 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001858 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001859 if (remaining == NULL)
1860 goto error;
1861 }
1862 Py_CLEAR(line);
1863 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001864 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001865 }
1866
1867 if (line != NULL) {
1868 /* Our line ends in the current buffer */
1869 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001870 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1871 PyObject *s = PyUnicode_Substring(line, start, endpos);
1872 Py_CLEAR(line);
1873 if (s == NULL)
1874 goto error;
1875 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001876 }
1877 }
1878 if (remaining != NULL) {
1879 if (chunks == NULL) {
1880 chunks = PyList_New(0);
1881 if (chunks == NULL)
1882 goto error;
1883 }
1884 if (PyList_Append(chunks, remaining) < 0)
1885 goto error;
1886 Py_CLEAR(remaining);
1887 }
1888 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001889 if (line != NULL) {
1890 if (PyList_Append(chunks, line) < 0)
1891 goto error;
1892 Py_DECREF(line);
1893 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1895 if (line == NULL)
1896 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001897 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001899 if (line == NULL) {
1900 Py_INCREF(_PyIO_empty_str);
1901 line = _PyIO_empty_str;
1902 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001903
1904 return line;
1905
1906 error:
1907 Py_XDECREF(chunks);
1908 Py_XDECREF(remaining);
1909 Py_XDECREF(line);
1910 return NULL;
1911}
1912
1913static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001914textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001915{
1916 Py_ssize_t limit = -1;
1917
1918 CHECK_INITIALIZED(self);
1919 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1920 return NULL;
1921 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001922 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001923}
1924
1925/* Seek and Tell */
1926
1927typedef struct {
1928 Py_off_t start_pos;
1929 int dec_flags;
1930 int bytes_to_feed;
1931 int chars_to_skip;
1932 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001933} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001934
1935/*
1936 To speed up cookie packing/unpacking, we store the fields in a temporary
1937 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1938 The following macros define at which offsets in the intermediary byte
1939 string the various CookieStruct fields will be stored.
1940 */
1941
1942#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1943
Christian Heimes743e0cd2012-10-17 23:52:17 +02001944#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001945/* We want the least significant byte of start_pos to also be the least
1946 significant byte of the cookie, which means that in big-endian mode we
1947 must copy the fields in reverse order. */
1948
1949# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1950# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1951# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1952# define OFF_CHARS_TO_SKIP (sizeof(char))
1953# define OFF_NEED_EOF 0
1954
1955#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001956/* Little-endian mode: the least significant byte of start_pos will
1957 naturally end up the least significant byte of the cookie. */
1958
1959# define OFF_START_POS 0
1960# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1961# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1962# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1963# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1964
1965#endif
1966
1967static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001968textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001969{
1970 unsigned char buffer[COOKIE_BUF_LEN];
1971 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1972 if (cookieLong == NULL)
1973 return -1;
1974
1975 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001976 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977 Py_DECREF(cookieLong);
1978 return -1;
1979 }
1980 Py_DECREF(cookieLong);
1981
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001982 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1983 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1984 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1985 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1986 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001987
1988 return 0;
1989}
1990
1991static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001992textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993{
1994 unsigned char buffer[COOKIE_BUF_LEN];
1995
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001996 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1997 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1998 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1999 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2000 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001
Christian Heimes743e0cd2012-10-17 23:52:17 +02002002 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2003 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002005
2006static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002007_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002008{
2009 PyObject *res;
2010 /* When seeking to the start of the stream, we call decoder.reset()
2011 rather than decoder.getstate().
2012 This is for a few decoders such as utf-16 for which the state value
2013 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2014 utf-16, that we are expecting a BOM).
2015 */
2016 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2017 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2018 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002019 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2020 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021 if (res == NULL)
2022 return -1;
2023 Py_DECREF(res);
2024 return 0;
2025}
2026
Antoine Pitroue4501852009-05-14 18:55:55 +00002027static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002028_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002029{
2030 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002031 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002032 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2033 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2034 self->encoding_start_of_stream = 1;
2035 }
2036 else {
2037 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2038 _PyIO_zero, NULL);
2039 self->encoding_start_of_stream = 0;
2040 }
2041 if (res == NULL)
2042 return -1;
2043 Py_DECREF(res);
2044 return 0;
2045}
2046
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002048textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049{
2050 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002051 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002052 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 PyObject *res;
2054 int cmp;
2055
2056 CHECK_INITIALIZED(self);
2057
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002058 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2059 return NULL;
2060 CHECK_CLOSED(self);
2061
2062 Py_INCREF(cookieObj);
2063
2064 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002065 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 goto fail;
2067 }
2068
2069 if (whence == 1) {
2070 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002071 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 if (cmp < 0)
2073 goto fail;
2074
2075 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002076 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 goto fail;
2078 }
2079
2080 /* Seeking to the current position should attempt to
2081 * sync the underlying buffer with the current position.
2082 */
2083 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002084 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002085 if (cookieObj == NULL)
2086 goto fail;
2087 }
2088 else if (whence == 2) {
2089 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002090 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002091 if (cmp < 0)
2092 goto fail;
2093
2094 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002095 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 goto fail;
2097 }
2098
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002099 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002100 if (res == NULL)
2101 goto fail;
2102 Py_DECREF(res);
2103
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002104 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 Py_CLEAR(self->snapshot);
2106 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002107 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002108 if (res == NULL)
2109 goto fail;
2110 Py_DECREF(res);
2111 }
2112
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002113 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 Py_XDECREF(cookieObj);
2115 return res;
2116 }
2117 else if (whence != 0) {
2118 PyErr_Format(PyExc_ValueError,
2119 "invalid whence (%d, should be 0, 1 or 2)", whence);
2120 goto fail;
2121 }
2122
Antoine Pitroue4501852009-05-14 18:55:55 +00002123 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002124 if (cmp < 0)
2125 goto fail;
2126
2127 if (cmp == 1) {
2128 PyErr_Format(PyExc_ValueError,
2129 "negative seek position %R", cookieObj);
2130 goto fail;
2131 }
2132
2133 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2134 if (res == NULL)
2135 goto fail;
2136 Py_DECREF(res);
2137
2138 /* The strategy of seek() is to go back to the safe start point
2139 * and replay the effect of read(chars_to_skip) from there.
2140 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002141 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142 goto fail;
2143
2144 /* Seek back to the safe start point. */
2145 posobj = PyLong_FromOff_t(cookie.start_pos);
2146 if (posobj == NULL)
2147 goto fail;
2148 res = PyObject_CallMethodObjArgs(self->buffer,
2149 _PyIO_str_seek, posobj, NULL);
2150 Py_DECREF(posobj);
2151 if (res == NULL)
2152 goto fail;
2153 Py_DECREF(res);
2154
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002155 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156 Py_CLEAR(self->snapshot);
2157
2158 /* Restore the decoder to its state from the safe start point. */
2159 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002160 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161 goto fail;
2162 }
2163
2164 if (cookie.chars_to_skip) {
2165 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002166 PyObject *input_chunk = _PyObject_CallMethodId(
2167 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168 PyObject *decoded;
2169
2170 if (input_chunk == NULL)
2171 goto fail;
2172
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002173 if (!PyBytes_Check(input_chunk)) {
2174 PyErr_Format(PyExc_TypeError,
2175 "underlying read() should have returned a bytes "
2176 "object, not '%.200s'",
2177 Py_TYPE(input_chunk)->tp_name);
2178 Py_DECREF(input_chunk);
2179 goto fail;
2180 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002181
2182 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2183 if (self->snapshot == NULL) {
2184 Py_DECREF(input_chunk);
2185 goto fail;
2186 }
2187
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002188 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2189 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002191 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 goto fail;
2193
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002194 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002195
2196 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002197 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002198 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2199 goto fail;
2200 }
2201 self->decoded_chars_used = cookie.chars_to_skip;
2202 }
2203 else {
2204 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2205 if (self->snapshot == NULL)
2206 goto fail;
2207 }
2208
Antoine Pitroue4501852009-05-14 18:55:55 +00002209 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2210 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002211 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002212 goto fail;
2213 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002214 return cookieObj;
2215 fail:
2216 Py_XDECREF(cookieObj);
2217 return NULL;
2218
2219}
2220
2221static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002222textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002223{
2224 PyObject *res;
2225 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002226 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227 PyObject *next_input;
2228 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002229 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 PyObject *saved_state = NULL;
2231 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002232 char *dec_buffer;
2233 Py_ssize_t dec_buffer_len;
2234 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235
2236 CHECK_INITIALIZED(self);
2237 CHECK_CLOSED(self);
2238
2239 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002240 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002241 goto fail;
2242 }
2243 if (!self->telling) {
2244 PyErr_SetString(PyExc_IOError,
2245 "telling position disabled by next() call");
2246 goto fail;
2247 }
2248
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002249 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002250 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002251 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 if (res == NULL)
2253 goto fail;
2254 Py_DECREF(res);
2255
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002256 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257 if (posobj == NULL)
2258 goto fail;
2259
2260 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002261 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002262 return posobj;
2263 }
2264
2265#if defined(HAVE_LARGEFILE_SUPPORT)
2266 cookie.start_pos = PyLong_AsLongLong(posobj);
2267#else
2268 cookie.start_pos = PyLong_AsLong(posobj);
2269#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002270 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002271 if (PyErr_Occurred())
2272 goto fail;
2273
2274 /* Skip backward to the snapshot point (see _read_chunk). */
2275 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2276 goto fail;
2277
2278 assert (PyBytes_Check(next_input));
2279
2280 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2281
2282 /* How many decoded characters have been used up since the snapshot? */
2283 if (self->decoded_chars_used == 0) {
2284 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002285 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286 }
2287
2288 chars_to_skip = self->decoded_chars_used;
2289
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002290 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002291 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2292 _PyIO_str_getstate, NULL);
2293 if (saved_state == NULL)
2294 goto fail;
2295
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002296#define DECODER_GETSTATE() do { \
2297 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2298 _PyIO_str_getstate, NULL); \
2299 if (_state == NULL) \
2300 goto fail; \
2301 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2302 Py_DECREF(_state); \
2303 goto fail; \
2304 } \
2305 Py_DECREF(_state); \
2306 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002307
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002308#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002309 PyObject *_decoded = _PyObject_CallMethodId( \
2310 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002311 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002312 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002313 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002314 Py_DECREF(_decoded); \
2315 } while (0)
2316
2317 /* Fast search for an acceptable start point, close to our
2318 current pos */
2319 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2320 skip_back = 1;
2321 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2322 input = PyBytes_AS_STRING(next_input);
2323 while (skip_bytes > 0) {
2324 /* Decode up to temptative start point */
2325 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2326 goto fail;
2327 DECODER_DECODE(input, skip_bytes, chars_decoded);
2328 if (chars_decoded <= chars_to_skip) {
2329 DECODER_GETSTATE();
2330 if (dec_buffer_len == 0) {
2331 /* Before pos and no bytes buffered in decoder => OK */
2332 cookie.dec_flags = dec_flags;
2333 chars_to_skip -= chars_decoded;
2334 break;
2335 }
2336 /* Skip back by buffered amount and reset heuristic */
2337 skip_bytes -= dec_buffer_len;
2338 skip_back = 1;
2339 }
2340 else {
2341 /* We're too far ahead, skip back a bit */
2342 skip_bytes -= skip_back;
2343 skip_back *= 2;
2344 }
2345 }
2346 if (skip_bytes <= 0) {
2347 skip_bytes = 0;
2348 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2349 goto fail;
2350 }
2351
2352 /* Note our initial start point. */
2353 cookie.start_pos += skip_bytes;
Victor Stinner9a282972013-06-24 23:01:33 +02002354 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002355 if (chars_to_skip == 0)
2356 goto finally;
2357
2358 /* We should be close to the desired position. Now feed the decoder one
2359 * byte at a time until we reach the `chars_to_skip` target.
2360 * As we go, note the nearest "safe start point" before the current
2361 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002362 * can safely start from there and advance to this location).
2363 */
2364 chars_decoded = 0;
2365 input = PyBytes_AS_STRING(next_input);
2366 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002367 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002369 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370
Serhiy Storchakaec67d182013-08-20 20:04:47 +03002371 DECODER_DECODE(input, (Py_ssize_t)1, n);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002372 /* We got n chars for 1 byte */
2373 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002375 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376
2377 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2378 /* Decoder buffer is empty, so this is a safe start point. */
2379 cookie.start_pos += cookie.bytes_to_feed;
2380 chars_to_skip -= chars_decoded;
2381 cookie.dec_flags = dec_flags;
2382 cookie.bytes_to_feed = 0;
2383 chars_decoded = 0;
2384 }
2385 if (chars_decoded >= chars_to_skip)
2386 break;
2387 input++;
2388 }
2389 if (input == input_end) {
2390 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002391 PyObject *decoded = _PyObject_CallMethodId(
2392 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002393 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002394 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002395 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 Py_DECREF(decoded);
2397 cookie.need_eof = 1;
2398
2399 if (chars_decoded < chars_to_skip) {
2400 PyErr_SetString(PyExc_IOError,
2401 "can't reconstruct logical file position");
2402 goto fail;
2403 }
2404 }
2405
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002406finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002407 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002408 Py_DECREF(saved_state);
2409 if (res == NULL)
2410 return NULL;
2411 Py_DECREF(res);
2412
2413 /* The returned cookie corresponds to the last safe start point. */
2414 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002415 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002416
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002417fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 if (saved_state) {
2419 PyObject *type, *value, *traceback;
2420 PyErr_Fetch(&type, &value, &traceback);
2421
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002422 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002423 Py_DECREF(saved_state);
2424 if (res == NULL)
2425 return NULL;
2426 Py_DECREF(res);
2427
2428 PyErr_Restore(type, value, traceback);
2429 }
2430 return NULL;
2431}
2432
2433static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002434textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002435{
2436 PyObject *pos = Py_None;
2437 PyObject *res;
2438
2439 CHECK_INITIALIZED(self)
2440 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2441 return NULL;
2442 }
2443
2444 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2445 if (res == NULL)
2446 return NULL;
2447 Py_DECREF(res);
2448
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002449 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002450}
2451
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002452static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002453textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002454{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002455 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002456
2457 CHECK_INITIALIZED(self);
2458
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002459 res = PyUnicode_FromString("<_io.TextIOWrapper");
2460 if (res == NULL)
2461 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002462 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002463 if (nameobj == NULL) {
2464 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2465 PyErr_Clear();
2466 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002467 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002468 }
2469 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002470 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002471 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002472 if (s == NULL)
2473 goto error;
2474 PyUnicode_AppendAndDel(&res, s);
2475 if (res == NULL)
2476 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002477 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002478 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002479 if (modeobj == NULL) {
2480 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2481 PyErr_Clear();
2482 else
2483 goto error;
2484 }
2485 else {
2486 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2487 Py_DECREF(modeobj);
2488 if (s == NULL)
2489 goto error;
2490 PyUnicode_AppendAndDel(&res, s);
2491 if (res == NULL)
2492 return NULL;
2493 }
2494 s = PyUnicode_FromFormat("%U encoding=%R>",
2495 res, self->encoding);
2496 Py_DECREF(res);
2497 return s;
2498error:
2499 Py_XDECREF(res);
2500 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002501}
2502
2503
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504/* Inquiries */
2505
2506static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002507textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508{
2509 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002510 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511}
2512
2513static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002514textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002515{
2516 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002517 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518}
2519
2520static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002521textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002522{
2523 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002524 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525}
2526
2527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002528textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529{
2530 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002531 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532}
2533
2534static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002535textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536{
2537 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002538 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539}
2540
2541static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002542textiowrapper_getstate(textio *self, PyObject *args)
2543{
2544 PyErr_Format(PyExc_TypeError,
2545 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2546 return NULL;
2547}
2548
2549static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002550textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551{
2552 CHECK_INITIALIZED(self);
2553 CHECK_CLOSED(self);
2554 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002555 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002556 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002557 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558}
2559
2560static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002561textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562{
2563 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002564 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002565 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566
Antoine Pitrou6be88762010-05-03 16:48:20 +00002567 res = textiowrapper_closed_get(self, NULL);
2568 if (res == NULL)
2569 return NULL;
2570 r = PyObject_IsTrue(res);
2571 Py_DECREF(res);
2572 if (r < 0)
2573 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002574
Antoine Pitrou6be88762010-05-03 16:48:20 +00002575 if (r > 0) {
2576 Py_RETURN_NONE; /* stream already closed */
2577 }
2578 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002579 PyObject *exc = NULL, *val, *tb;
Antoine Pitrou796564c2013-07-30 19:59:21 +02002580 if (self->finalizing) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002581 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002582 if (res)
2583 Py_DECREF(res);
2584 else
2585 PyErr_Clear();
2586 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002587 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002588 if (res == NULL)
2589 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002590 else
2591 Py_DECREF(res);
2592
Benjamin Peterson68623612012-12-20 11:53:11 -06002593 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2594 if (exc != NULL) {
2595 if (res != NULL) {
2596 Py_CLEAR(res);
2597 PyErr_Restore(exc, val, tb);
2598 }
2599 else {
2600 PyObject *val2;
2601 Py_DECREF(exc);
2602 Py_XDECREF(tb);
2603 PyErr_Fetch(&exc, &val2, &tb);
2604 PyErr_NormalizeException(&exc, &val2, &tb);
2605 PyException_SetContext(val2, val);
2606 PyErr_Restore(exc, val2, tb);
2607 }
2608 }
2609 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002610 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002611}
2612
2613static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002614textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615{
2616 PyObject *line;
2617
2618 CHECK_INITIALIZED(self);
2619
2620 self->telling = 0;
2621 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2622 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002623 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002624 }
2625 else {
2626 line = PyObject_CallMethodObjArgs((PyObject *)self,
2627 _PyIO_str_readline, NULL);
2628 if (line && !PyUnicode_Check(line)) {
2629 PyErr_Format(PyExc_IOError,
2630 "readline() should have returned an str object, "
2631 "not '%.200s'", Py_TYPE(line)->tp_name);
2632 Py_DECREF(line);
2633 return NULL;
2634 }
2635 }
2636
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002637 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002638 return NULL;
2639
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002640 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002641 /* Reached EOF or would have blocked */
2642 Py_DECREF(line);
2643 Py_CLEAR(self->snapshot);
2644 self->telling = self->seekable;
2645 return NULL;
2646 }
2647
2648 return line;
2649}
2650
2651static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002652textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002653{
2654 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002655 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656}
2657
2658static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002659textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002660{
2661 CHECK_INITIALIZED(self);
2662 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2663}
2664
2665static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002666textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667{
2668 PyObject *res;
2669 CHECK_INITIALIZED(self);
2670 if (self->decoder == NULL)
2671 Py_RETURN_NONE;
2672 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2673 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002674 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2675 PyErr_Clear();
2676 Py_RETURN_NONE;
2677 }
2678 else {
2679 return NULL;
2680 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002681 }
2682 return res;
2683}
2684
2685static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002686textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002687{
2688 CHECK_INITIALIZED(self);
2689 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2690}
2691
2692static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002693textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002694{
2695 CHECK_INITIALIZED(self);
2696 return PyLong_FromSsize_t(self->chunk_size);
2697}
2698
2699static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002700textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002701{
2702 Py_ssize_t n;
2703 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002704 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002705 if (n == -1 && PyErr_Occurred())
2706 return -1;
2707 if (n <= 0) {
2708 PyErr_SetString(PyExc_ValueError,
2709 "a strictly positive integer is required");
2710 return -1;
2711 }
2712 self->chunk_size = n;
2713 return 0;
2714}
2715
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002716static PyMethodDef textiowrapper_methods[] = {
2717 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2718 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2719 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2720 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2721 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2722 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002723
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002724 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2725 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2726 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2727 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2728 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002729 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002730
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002731 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2732 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2733 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002734 {NULL, NULL}
2735};
2736
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002737static PyMemberDef textiowrapper_members[] = {
2738 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2739 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2740 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Antoine Pitrou796564c2013-07-30 19:59:21 +02002741 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002742 {NULL}
2743};
2744
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002745static PyGetSetDef textiowrapper_getset[] = {
2746 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2747 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002748/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2749*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002750 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2751 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2752 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2753 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002754 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002755};
2756
2757PyTypeObject PyTextIOWrapper_Type = {
2758 PyVarObject_HEAD_INIT(NULL, 0)
2759 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002760 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002762 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002763 0, /*tp_print*/
2764 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002765 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002766 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002767 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768 0, /*tp_as_number*/
2769 0, /*tp_as_sequence*/
2770 0, /*tp_as_mapping*/
2771 0, /*tp_hash */
2772 0, /*tp_call*/
2773 0, /*tp_str*/
2774 0, /*tp_getattro*/
2775 0, /*tp_setattro*/
2776 0, /*tp_as_buffer*/
2777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
Antoine Pitrou796564c2013-07-30 19:59:21 +02002778 | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_HAVE_FINALIZE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002779 textiowrapper_doc, /* tp_doc */
2780 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2781 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002782 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002783 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002784 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002785 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2786 textiowrapper_methods, /* tp_methods */
2787 textiowrapper_members, /* tp_members */
2788 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /* tp_base */
2790 0, /* tp_dict */
2791 0, /* tp_descr_get */
2792 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002793 offsetof(textio, dict), /*tp_dictoffset*/
2794 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002795 0, /* tp_alloc */
2796 PyType_GenericNew, /* tp_new */
Antoine Pitrou796564c2013-07-30 19:59:21 +02002797 0, /* tp_free */
2798 0, /* tp_is_gc */
2799 0, /* tp_bases */
2800 0, /* tp_mro */
2801 0, /* tp_cache */
2802 0, /* tp_subclasses */
2803 0, /* tp_weaklist */
2804 0, /* tp_del */
2805 0, /* tp_version_tag */
2806 0, /* tp_finalize */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002807};