blob: cff9c6e9372d025bf4f6d3ec079f4186ecd9277b [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200260static int
261check_decoded(PyObject *decoded)
262{
263 if (decoded == NULL)
264 return -1;
265 if (!PyUnicode_Check(decoded)) {
266 PyErr_Format(PyExc_TypeError,
267 "decoder should return a string result, not '%.200s'",
268 Py_TYPE(decoded)->tp_name);
269 Py_DECREF(decoded);
270 return -1;
271 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200272 if (PyUnicode_READY(decoded) < 0) {
273 Py_DECREF(decoded);
274 return -1;
275 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200276 return 0;
277}
278
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279#define SEEN_CR 1
280#define SEEN_LF 2
281#define SEEN_CRLF 4
282#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
283
284PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000285_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000286 PyObject *input, int final)
287{
288 PyObject *output;
289 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000290 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 if (self->decoder == NULL) {
293 PyErr_SetString(PyExc_ValueError,
294 "IncrementalNewlineDecoder.__init__ not called");
295 return NULL;
296 }
297
298 /* decode input (with the eventual \r from a previous pass) */
299 if (self->decoder != Py_None) {
300 output = PyObject_CallMethodObjArgs(self->decoder,
301 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
302 }
303 else {
304 output = input;
305 Py_INCREF(output);
306 }
307
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200308 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 return NULL;
310
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200311 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 /* Prefix output with CR */
314 int kind;
315 PyObject *modified;
316 char *out;
317
318 modified = PyUnicode_New(output_len + 1,
319 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 if (modified == NULL)
321 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 kind = PyUnicode_KIND(modified);
323 out = PyUnicode_DATA(modified);
324 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200325 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 self->pendingcr = 0;
329 output_len++;
330 }
331
332 /* retain last \r even when not translating data:
333 * then readline() is sure to get \r\n in one pass
334 */
335 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000336 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
338 {
339 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
340 if (modified == NULL)
341 goto error;
342 Py_DECREF(output);
343 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 self->pendingcr = 1;
345 }
346 }
347
348 /* Record which newlines are read and do newline translation if desired,
349 all in one pass. */
350 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200351 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_ssize_t len;
353 int seennl = self->seennl;
354 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 in_str = PyUnicode_DATA(output);
358 len = PyUnicode_GET_LENGTH(output);
359 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
361 if (len == 0)
362 return output;
363
364 /* If, up to now, newlines are consistently \n, do a quick check
365 for the \r *byte* with the libc's optimized memchr.
366 */
367 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200368 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 }
370
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 if (only_lf) {
372 /* If not already seen, quick scan for a possible "\n" character.
373 (there's nothing else to be done, even when in translation mode)
374 */
375 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100377 if (kind == PyUnicode_1BYTE_KIND)
378 seennl |= SEEN_LF;
379 else {
380 Py_ssize_t i = 0;
381 for (;;) {
382 Py_UCS4 c;
383 /* Fast loop for non-control characters */
384 while (PyUnicode_READ(kind, in_str, i) > '\n')
385 i++;
386 c = PyUnicode_READ(kind, in_str, i++);
387 if (c == '\n') {
388 seennl |= SEEN_LF;
389 break;
390 }
391 if (i >= len)
392 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000394 }
395 }
396 /* Finished: we have scanned for newlines, and none of them
397 need translating */
398 }
399 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000401 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 if (seennl == SEEN_ALL)
403 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 while (PyUnicode_READ(kind, in_str, i) > '\r')
408 i++;
409 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 if (c == '\n')
411 seennl |= SEEN_LF;
412 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 }
417 else
418 seennl |= SEEN_CR;
419 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 break;
422 if (seennl == SEEN_ALL)
423 break;
424 }
425 endscan:
426 ;
427 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000428 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 void *translated;
430 int kind = PyUnicode_KIND(output);
431 void *in_str = PyUnicode_DATA(output);
432 Py_ssize_t in, out;
433 /* XXX: Previous in-place translation here is disabled as
434 resizing is not possible anymore */
435 /* We could try to optimize this so that we only do a copy
436 when there is something to translate. On the other hand,
437 we already know there is a \r byte, so chances are high
438 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200439 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (translated == NULL) {
441 PyErr_NoMemory();
442 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
449 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452 seennl |= SEEN_LF;
453 continue;
454 }
455 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 in++;
458 seennl |= SEEN_CRLF;
459 }
460 else
461 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 continue;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 Py_DECREF(output);
470 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100471 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200473 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 }
475 self->seennl |= seennl;
476 }
477
478 return output;
479
480 error:
481 Py_DECREF(output);
482 return NULL;
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 PyObject *args, PyObject *kwds)
488{
489 char *kwlist[] = {"input", "final", NULL};
490 PyObject *input;
491 int final = 0;
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
494 kwlist, &input, &final))
495 return NULL;
496 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
497}
498
499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000500incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501{
502 PyObject *buffer;
503 unsigned PY_LONG_LONG flag;
504
505 if (self->decoder != Py_None) {
506 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
507 _PyIO_str_getstate, NULL);
508 if (state == NULL)
509 return NULL;
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
511 Py_DECREF(state);
512 return NULL;
513 }
514 Py_INCREF(buffer);
515 Py_DECREF(state);
516 }
517 else {
518 buffer = PyBytes_FromString("");
519 flag = 0;
520 }
521 flag <<= 1;
522 if (self->pendingcr)
523 flag |= 1;
524 return Py_BuildValue("NK", buffer, flag);
525}
526
527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000528incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529{
530 PyObject *buffer;
531 unsigned PY_LONG_LONG flag;
532
533 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
534 return NULL;
535
536 self->pendingcr = (int) flag & 1;
537 flag >>= 1;
538
539 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200540 return _PyObject_CallMethodId(self->decoder,
541 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 else
543 Py_RETURN_NONE;
544}
545
546static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000547incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000548{
549 self->seennl = 0;
550 self->pendingcr = 0;
551 if (self->decoder != Py_None)
552 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
553 else
554 Py_RETURN_NONE;
555}
556
557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000558incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559{
560 switch (self->seennl) {
561 case SEEN_CR:
562 return PyUnicode_FromString("\r");
563 case SEEN_LF:
564 return PyUnicode_FromString("\n");
565 case SEEN_CRLF:
566 return PyUnicode_FromString("\r\n");
567 case SEEN_CR | SEEN_LF:
568 return Py_BuildValue("ss", "\r", "\n");
569 case SEEN_CR | SEEN_CRLF:
570 return Py_BuildValue("ss", "\r", "\r\n");
571 case SEEN_LF | SEEN_CRLF:
572 return Py_BuildValue("ss", "\n", "\r\n");
573 case SEEN_CR | SEEN_LF | SEEN_CRLF:
574 return Py_BuildValue("sss", "\r", "\n", "\r\n");
575 default:
576 Py_RETURN_NONE;
577 }
578
579}
580
581
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582static PyMethodDef incrementalnewlinedecoder_methods[] = {
583 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
584 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
585 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
586 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000587 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588};
589
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590static PyGetSetDef incrementalnewlinedecoder_getset[] = {
591 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000592 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000593};
594
595PyTypeObject PyIncrementalNewlineDecoder_Type = {
596 PyVarObject_HEAD_INIT(NULL, 0)
597 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000598 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /*tp_print*/
602 0, /*tp_getattr*/
603 0, /*tp_setattr*/
604 0, /*tp_compare */
605 0, /*tp_repr*/
606 0, /*tp_as_number*/
607 0, /*tp_as_sequence*/
608 0, /*tp_as_mapping*/
609 0, /*tp_hash */
610 0, /*tp_call*/
611 0, /*tp_str*/
612 0, /*tp_getattro*/
613 0, /*tp_setattro*/
614 0, /*tp_as_buffer*/
615 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_traverse */
618 0, /* tp_clear */
619 0, /* tp_richcompare */
620 0, /*tp_weaklistoffset*/
621 0, /* tp_iter */
622 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000623 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 0, /* tp_base */
627 0, /* tp_dict */
628 0, /* tp_descr_get */
629 0, /* tp_descr_set */
630 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000631 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632 0, /* tp_alloc */
633 PyType_GenericNew, /* tp_new */
634};
635
636
637/* TextIOWrapper */
638
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000639PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640 "Character and line based layer over a BufferedIOBase object, buffer.\n"
641 "\n"
642 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200643 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "\n"
645 "errors determines the strictness of encoding and decoding (see the\n"
646 "codecs.register) and defaults to \"strict\".\n"
647 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200648 "newline controls how line endings are handled. It can be None, '',\n"
649 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
650 "\n"
651 "* On input, if newline is None, universal newlines mode is\n"
652 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
653 " these are translated into '\\n' before being returned to the\n"
654 " caller. If it is '', universal newline mode is enabled, but line\n"
655 " endings are returned to the caller untranslated. If it has any of\n"
656 " the other legal values, input lines are only terminated by the given\n"
657 " string, and the line ending is returned to the caller untranslated.\n"
658 "\n"
659 "* On output, if newline is None, any '\\n' characters written are\n"
660 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300661 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200662 " of the other legal values, any '\\n' characters written are translated\n"
663 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664 "\n"
665 "If line_buffering is True, a call to flush is implied when a call to\n"
666 "write contains a newline character."
667 );
668
669typedef PyObject *
670 (*encodefunc_t)(PyObject *, PyObject *);
671
672typedef struct
673{
674 PyObject_HEAD
675 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000676 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 Py_ssize_t chunk_size;
678 PyObject *buffer;
679 PyObject *encoding;
680 PyObject *encoder;
681 PyObject *decoder;
682 PyObject *readnl;
683 PyObject *errors;
684 const char *writenl; /* utf-8 encoded, NULL stands for \n */
685 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200686 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000687 char readuniversal;
688 char readtranslate;
689 char writetranslate;
690 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200691 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000693 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694 /* Specialized encoding func (see below) */
695 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000696 /* Whether or not it's the start of the stream */
697 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698
699 /* Reads and writes are internally buffered in order to speed things up.
700 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000701
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702 Please also note that text to be written is first encoded before being
703 buffered. This is necessary so that encoding errors are immediately
704 reported to the caller, but it unfortunately means that the
705 IncrementalEncoder (whose encode() method is always written in Python)
706 becomes a bottleneck for small writes.
707 */
708 PyObject *decoded_chars; /* buffer for text returned from decoder */
709 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
710 PyObject *pending_bytes; /* list of bytes objects waiting to be
711 written, or NULL */
712 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000713
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714 /* snapshot is either None, or a tuple (dec_flags, next_input) where
715 * dec_flags is the second (integer) item of the decoder state and
716 * next_input is the chunk of input bytes that comes next after the
717 * snapshot point. We use this to reconstruct decoder states in tell().
718 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000719 PyObject *snapshot;
720 /* Bytes-to-characters ratio for the current chunk. Serves as input for
721 the heuristic in tell(). */
722 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723
724 /* Cache raw object if it's a FileIO object */
725 PyObject *raw;
726
727 PyObject *weakreflist;
728 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000729} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730
731
732/* A couple of specialized cases in order to bypass the slow incremental
733 encoding methods for the most popular encodings. */
734
735static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000736ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200738 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF16(text,
745 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF16(text,
752 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitroue4501852009-05-14 18:55:55 +0000758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000760#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000763 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF16(text,
767 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000768}
769
Antoine Pitroue4501852009-05-14 18:55:55 +0000770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000772{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100773 return _PyUnicode_EncodeUTF32(text,
774 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000775}
776
777static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000778utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000779{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF32(text,
781 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000782}
783
784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
787 if (!self->encoding_start_of_stream) {
788 /* Skip the BOM and use native byte ordering */
789#if defined(WORDS_BIGENDIAN)
790 return utf32be_encode(self, text);
791#else
792 return utf32le_encode(self, text);
793#endif
794 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100795 return _PyUnicode_EncodeUTF32(text,
796 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000797}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798
799static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000800utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803}
804
805static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000806latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809}
810
811/* Map normalized encoding names onto the specialized encoding funcs */
812
813typedef struct {
814 const char *name;
815 encodefunc_t encodefunc;
816} encodefuncentry;
817
Antoine Pitrou24f36292009-03-28 22:16:42 +0000818static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819 {"ascii", (encodefunc_t) ascii_encode},
820 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000821 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 {"utf-16-be", (encodefunc_t) utf16be_encode},
823 {"utf-16-le", (encodefunc_t) utf16le_encode},
824 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000825 {"utf-32-be", (encodefunc_t) utf32be_encode},
826 {"utf-32-le", (encodefunc_t) utf32le_encode},
827 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 {NULL, NULL}
829};
830
831
832static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000833textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834{
835 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 NULL};
838 PyObject *buffer, *raw;
839 char *encoding = NULL;
840 char *errors = NULL;
841 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 _PyIO_State *state = IO_STATE;
844
845 PyObject *res;
846 int r;
847
848 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000849 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200852 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 return -1;
854
855 if (newline && newline[0] != '\0'
856 && !(newline[0] == '\n' && newline[1] == '\0')
857 && !(newline[0] == '\r' && newline[1] == '\0')
858 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
859 PyErr_Format(PyExc_ValueError,
860 "illegal newline value: %s", newline);
861 return -1;
862 }
863
864 Py_CLEAR(self->buffer);
865 Py_CLEAR(self->encoding);
866 Py_CLEAR(self->encoder);
867 Py_CLEAR(self->decoder);
868 Py_CLEAR(self->readnl);
869 Py_CLEAR(self->decoded_chars);
870 Py_CLEAR(self->pending_bytes);
871 Py_CLEAR(self->snapshot);
872 Py_CLEAR(self->errors);
873 Py_CLEAR(self->raw);
874 self->decoded_chars_used = 0;
875 self->pending_bytes_count = 0;
876 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000877 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878
879 if (encoding == NULL) {
880 /* Try os.device_encoding(fileno) */
881 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200882 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883 /* Ignore only AttributeError and UnsupportedOperation */
884 if (fileno == NULL) {
885 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
886 PyErr_ExceptionMatches(state->unsupported_operation)) {
887 PyErr_Clear();
888 }
889 else {
890 goto error;
891 }
892 }
893 else {
Serhiy Storchaka9101e232013-01-19 12:41:45 +0200894 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500895 Py_DECREF(fileno);
896 if (fd == -1 && PyErr_Occurred()) {
897 goto error;
898 }
899
900 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000901 if (self->encoding == NULL)
902 goto error;
903 else if (!PyUnicode_Check(self->encoding))
904 Py_CLEAR(self->encoding);
905 }
906 }
907 if (encoding == NULL && self->encoding == NULL) {
908 if (state->locale_module == NULL) {
909 state->locale_module = PyImport_ImportModule("locale");
910 if (state->locale_module == NULL)
911 goto catch_ImportError;
912 else
913 goto use_locale;
914 }
915 else {
916 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200917 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200918 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 if (self->encoding == NULL) {
920 catch_ImportError:
921 /*
922 Importing locale can raise a ImportError because of
923 _functools, and locale.getpreferredencoding can raise a
924 ImportError if _locale is not available. These will happen
925 during module building.
926 */
927 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
928 PyErr_Clear();
929 self->encoding = PyUnicode_FromString("ascii");
930 }
931 else
932 goto error;
933 }
934 else if (!PyUnicode_Check(self->encoding))
935 Py_CLEAR(self->encoding);
936 }
937 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000938 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000939 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000940 if (encoding == NULL)
941 goto error;
942 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 else if (encoding != NULL) {
944 self->encoding = PyUnicode_FromString(encoding);
945 if (self->encoding == NULL)
946 goto error;
947 }
948 else {
949 PyErr_SetString(PyExc_IOError,
950 "could not determine default encoding");
951 }
952
953 if (errors == NULL)
954 errors = "strict";
955 self->errors = PyBytes_FromString(errors);
956 if (self->errors == NULL)
957 goto error;
958
959 self->chunk_size = 8192;
960 self->readuniversal = (newline == NULL || newline[0] == '\0');
961 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200962 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 self->readtranslate = (newline == NULL);
964 if (newline) {
965 self->readnl = PyUnicode_FromString(newline);
966 if (self->readnl == NULL)
967 return -1;
968 }
969 self->writetranslate = (newline == NULL || newline[0] != '\0');
970 if (!self->readuniversal && self->readnl) {
971 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000972 if (self->writenl == NULL)
973 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (!strcmp(self->writenl, "\n"))
975 self->writenl = NULL;
976 }
977#ifdef MS_WINDOWS
978 else
979 self->writenl = "\r\n";
980#endif
981
982 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000984 if (res == NULL)
985 goto error;
986 r = PyObject_IsTrue(res);
987 Py_DECREF(res);
988 if (r == -1)
989 goto error;
990 if (r == 1) {
991 self->decoder = PyCodec_IncrementalDecoder(
992 encoding, errors);
993 if (self->decoder == NULL)
994 goto error;
995
996 if (self->readuniversal) {
997 PyObject *incrementalDecoder = PyObject_CallFunction(
998 (PyObject *)&PyIncrementalNewlineDecoder_Type,
999 "Oi", self->decoder, (int)self->readtranslate);
1000 if (incrementalDecoder == NULL)
1001 goto error;
1002 Py_CLEAR(self->decoder);
1003 self->decoder = incrementalDecoder;
1004 }
1005 }
1006
1007 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001008 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009 if (res == NULL)
1010 goto error;
1011 r = PyObject_IsTrue(res);
1012 Py_DECREF(res);
1013 if (r == -1)
1014 goto error;
1015 if (r == 1) {
1016 PyObject *ci;
1017 self->encoder = PyCodec_IncrementalEncoder(
1018 encoding, errors);
1019 if (self->encoder == NULL)
1020 goto error;
1021 /* Get the normalized named of the codec */
1022 ci = _PyCodec_Lookup(encoding);
1023 if (ci == NULL)
1024 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001025 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001026 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001027 if (res == NULL) {
1028 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1029 PyErr_Clear();
1030 else
1031 goto error;
1032 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 else if (PyUnicode_Check(res)) {
1034 encodefuncentry *e = encodefuncs;
1035 while (e->name != NULL) {
1036 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1037 self->encodefunc = e->encodefunc;
1038 break;
1039 }
1040 e++;
1041 }
1042 }
1043 Py_XDECREF(res);
1044 }
1045
1046 self->buffer = buffer;
1047 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1050 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1051 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001052 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001054 if (raw == NULL) {
1055 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1056 PyErr_Clear();
1057 else
1058 goto error;
1059 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 else if (Py_TYPE(raw) == &PyFileIO_Type)
1061 self->raw = raw;
1062 else
1063 Py_DECREF(raw);
1064 }
1065
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001066 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 if (res == NULL)
1068 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001069 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001071 if (r < 0)
1072 goto error;
1073 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001074
Martin v. Löwis767046a2011-10-14 15:35:36 +02001075 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001076
Antoine Pitroue4501852009-05-14 18:55:55 +00001077 self->encoding_start_of_stream = 0;
1078 if (self->seekable && self->encoder) {
1079 PyObject *cookieObj;
1080 int cmp;
1081
1082 self->encoding_start_of_stream = 1;
1083
1084 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1085 if (cookieObj == NULL)
1086 goto error;
1087
1088 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1089 Py_DECREF(cookieObj);
1090 if (cmp < 0) {
1091 goto error;
1092 }
1093
1094 if (cmp == 0) {
1095 self->encoding_start_of_stream = 0;
1096 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1097 _PyIO_zero, NULL);
1098 if (res == NULL)
1099 goto error;
1100 Py_DECREF(res);
1101 }
1102 }
1103
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001104 self->ok = 1;
1105 return 0;
1106
1107 error:
1108 return -1;
1109}
1110
1111static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001112_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001113{
1114 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1115 return -1;
1116 self->ok = 0;
1117 Py_CLEAR(self->buffer);
1118 Py_CLEAR(self->encoding);
1119 Py_CLEAR(self->encoder);
1120 Py_CLEAR(self->decoder);
1121 Py_CLEAR(self->readnl);
1122 Py_CLEAR(self->decoded_chars);
1123 Py_CLEAR(self->pending_bytes);
1124 Py_CLEAR(self->snapshot);
1125 Py_CLEAR(self->errors);
1126 Py_CLEAR(self->raw);
1127 return 0;
1128}
1129
1130static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001131textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132{
Antoine Pitroue033e062010-10-29 10:38:18 +00001133 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 return;
1136 _PyObject_GC_UNTRACK(self);
1137 if (self->weakreflist != NULL)
1138 PyObject_ClearWeakRefs((PyObject *)self);
1139 Py_CLEAR(self->dict);
1140 Py_TYPE(self)->tp_free((PyObject *)self);
1141}
1142
1143static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001144textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145{
1146 Py_VISIT(self->buffer);
1147 Py_VISIT(self->encoding);
1148 Py_VISIT(self->encoder);
1149 Py_VISIT(self->decoder);
1150 Py_VISIT(self->readnl);
1151 Py_VISIT(self->decoded_chars);
1152 Py_VISIT(self->pending_bytes);
1153 Py_VISIT(self->snapshot);
1154 Py_VISIT(self->errors);
1155 Py_VISIT(self->raw);
1156
1157 Py_VISIT(self->dict);
1158 return 0;
1159}
1160
1161static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001162textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 return -1;
1166 Py_CLEAR(self->dict);
1167 return 0;
1168}
1169
1170static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001171textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172
1173/* This macro takes some shortcuts to make the common case faster. */
1174#define CHECK_CLOSED(self) \
1175 do { \
1176 int r; \
1177 PyObject *_res; \
1178 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1179 if (self->raw != NULL) \
1180 r = _PyFileIO_closed(self->raw); \
1181 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 if (_res == NULL) \
1184 return NULL; \
1185 r = PyObject_IsTrue(_res); \
1186 Py_DECREF(_res); \
1187 if (r < 0) \
1188 return NULL; \
1189 } \
1190 if (r > 0) { \
1191 PyErr_SetString(PyExc_ValueError, \
1192 "I/O operation on closed file."); \
1193 return NULL; \
1194 } \
1195 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001196 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 return NULL; \
1198 } while (0)
1199
1200#define CHECK_INITIALIZED(self) \
1201 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202 if (self->detached) { \
1203 PyErr_SetString(PyExc_ValueError, \
1204 "underlying buffer has been detached"); \
1205 } else { \
1206 PyErr_SetString(PyExc_ValueError, \
1207 "I/O operation on uninitialized object"); \
1208 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 return NULL; \
1210 }
1211
1212#define CHECK_INITIALIZED_INT(self) \
1213 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001214 if (self->detached) { \
1215 PyErr_SetString(PyExc_ValueError, \
1216 "underlying buffer has been detached"); \
1217 } else { \
1218 PyErr_SetString(PyExc_ValueError, \
1219 "I/O operation on uninitialized object"); \
1220 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001221 return -1; \
1222 }
1223
1224
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001225static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001226textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001227{
1228 PyObject *buffer, *res;
1229 CHECK_INITIALIZED(self);
1230 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1231 if (res == NULL)
1232 return NULL;
1233 Py_DECREF(res);
1234 buffer = self->buffer;
1235 self->buffer = NULL;
1236 self->detached = 1;
1237 self->ok = 0;
1238 return buffer;
1239}
1240
Antoine Pitrou24f36292009-03-28 22:16:42 +00001241/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001242 underlying buffered object, though. */
1243static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001244_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001246 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247
1248 if (self->pending_bytes == NULL)
1249 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001250
1251 pending = self->pending_bytes;
1252 Py_INCREF(pending);
1253 self->pending_bytes_count = 0;
1254 Py_CLEAR(self->pending_bytes);
1255
1256 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1257 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258 if (b == NULL)
1259 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001260 ret = NULL;
1261 do {
1262 ret = PyObject_CallMethodObjArgs(self->buffer,
1263 _PyIO_str_write, b, NULL);
1264 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 Py_DECREF(b);
1266 if (ret == NULL)
1267 return -1;
1268 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 return 0;
1270}
1271
1272static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001273textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274{
1275 PyObject *ret;
1276 PyObject *text; /* owned reference */
1277 PyObject *b;
1278 Py_ssize_t textlen;
1279 int haslf = 0;
1280 int needflush = 0;
1281
1282 CHECK_INITIALIZED(self);
1283
1284 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1285 return NULL;
1286 }
1287
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 if (PyUnicode_READY(text) == -1)
1289 return NULL;
1290
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 CHECK_CLOSED(self);
1292
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001293 if (self->encoder == NULL)
1294 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001295
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 Py_INCREF(text);
1297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299
1300 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 haslf = 1;
1303
1304 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001305 PyObject *newtext = _PyObject_CallMethodId(
1306 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 Py_DECREF(text);
1308 if (newtext == NULL)
1309 return NULL;
1310 text = newtext;
1311 }
1312
Antoine Pitroue96ec682011-07-23 21:46:35 +02001313 if (self->write_through)
1314 needflush = 1;
1315 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 needflush = 1;
1319
1320 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001321 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001323 self->encoding_start_of_stream = 0;
1324 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 else
1326 b = PyObject_CallMethodObjArgs(self->encoder,
1327 _PyIO_str_encode, text, NULL);
1328 Py_DECREF(text);
1329 if (b == NULL)
1330 return NULL;
1331
1332 if (self->pending_bytes == NULL) {
1333 self->pending_bytes = PyList_New(0);
1334 if (self->pending_bytes == NULL) {
1335 Py_DECREF(b);
1336 return NULL;
1337 }
1338 self->pending_bytes_count = 0;
1339 }
1340 if (PyList_Append(self->pending_bytes, b) < 0) {
1341 Py_DECREF(b);
1342 return NULL;
1343 }
1344 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1345 Py_DECREF(b);
1346 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348 return NULL;
1349 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001350
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351 if (needflush) {
1352 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1353 if (ret == NULL)
1354 return NULL;
1355 Py_DECREF(ret);
1356 }
1357
1358 Py_CLEAR(self->snapshot);
1359
1360 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001361 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 if (ret == NULL)
1363 return NULL;
1364 Py_DECREF(ret);
1365 }
1366
1367 return PyLong_FromSsize_t(textlen);
1368}
1369
1370/* Steal a reference to chars and store it in the decoded_char buffer;
1371 */
1372static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001373textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374{
1375 Py_CLEAR(self->decoded_chars);
1376 self->decoded_chars = chars;
1377 self->decoded_chars_used = 0;
1378}
1379
1380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001381textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382{
1383 PyObject *chars;
1384 Py_ssize_t avail;
1385
1386 if (self->decoded_chars == NULL)
1387 return PyUnicode_FromStringAndSize(NULL, 0);
1388
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001389 /* decoded_chars is guaranteed to be "ready". */
1390 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391 - self->decoded_chars_used);
1392
1393 assert(avail >= 0);
1394
1395 if (n < 0 || n > avail)
1396 n = avail;
1397
1398 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 chars = PyUnicode_Substring(self->decoded_chars,
1400 self->decoded_chars_used,
1401 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 if (chars == NULL)
1403 return NULL;
1404 }
1405 else {
1406 chars = self->decoded_chars;
1407 Py_INCREF(chars);
1408 }
1409
1410 self->decoded_chars_used += n;
1411 return chars;
1412}
1413
1414/* Read and decode the next chunk of data from the BufferedReader.
1415 */
1416static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001417textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418{
1419 PyObject *dec_buffer = NULL;
1420 PyObject *dec_flags = NULL;
1421 PyObject *input_chunk = NULL;
1422 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001423 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 int eof;
1425
1426 /* The return value is True unless EOF was reached. The decoded string is
1427 * placed in self._decoded_chars (replacing its previous value). The
1428 * entire input chunk is sent to the decoder, though some of it may remain
1429 * buffered in the decoder, yet to be converted.
1430 */
1431
1432 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001433 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 return -1;
1435 }
1436
1437 if (self->telling) {
1438 /* To prepare for tell(), we need to snapshot a point in the file
1439 * where the decoder's input buffer is empty.
1440 */
1441
1442 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1443 _PyIO_str_getstate, NULL);
1444 if (state == NULL)
1445 return -1;
1446 /* Given this, we know there was a valid snapshot point
1447 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1448 */
1449 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1450 Py_DECREF(state);
1451 return -1;
1452 }
1453 Py_INCREF(dec_buffer);
1454 Py_INCREF(dec_flags);
1455 Py_DECREF(state);
1456 }
1457
1458 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001459 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001460 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001461 }
1462 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 if (chunk_size == NULL)
1464 goto fail;
1465 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001466 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1467 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 Py_DECREF(chunk_size);
1469 if (input_chunk == NULL)
1470 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001471 if (!PyBytes_Check(input_chunk)) {
1472 PyErr_Format(PyExc_TypeError,
1473 "underlying %s() should have returned a bytes object, "
1474 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1475 Py_TYPE(input_chunk)->tp_name);
1476 goto fail;
1477 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001478
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001479 nbytes = PyBytes_Size(input_chunk);
1480 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481
1482 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1483 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1484 self->decoder, input_chunk, eof);
1485 }
1486 else {
1487 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1488 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1489 }
1490
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001491 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001493 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001494 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001495 if (nchars > 0)
1496 self->b2cratio = (double) nbytes / nchars;
1497 else
1498 self->b2cratio = 0.0;
1499 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 eof = 0;
1501
1502 if (self->telling) {
1503 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1504 * next input to be decoded is dec_buffer + input_chunk.
1505 */
1506 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1507 if (next_input == NULL)
1508 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001509 if (!PyBytes_Check(next_input)) {
1510 PyErr_Format(PyExc_TypeError,
1511 "decoder getstate() should have returned a bytes "
1512 "object, not '%.200s'",
1513 Py_TYPE(next_input)->tp_name);
1514 Py_DECREF(next_input);
1515 goto fail;
1516 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001517 Py_DECREF(dec_buffer);
1518 Py_CLEAR(self->snapshot);
1519 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1520 }
1521 Py_DECREF(input_chunk);
1522
1523 return (eof == 0);
1524
1525 fail:
1526 Py_XDECREF(dec_buffer);
1527 Py_XDECREF(dec_flags);
1528 Py_XDECREF(input_chunk);
1529 return -1;
1530}
1531
1532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001533textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534{
1535 Py_ssize_t n = -1;
1536 PyObject *result = NULL, *chunks = NULL;
1537
1538 CHECK_INITIALIZED(self);
1539
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001540 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 return NULL;
1542
1543 CHECK_CLOSED(self);
1544
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001545 if (self->decoder == NULL)
1546 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 return NULL;
1550
1551 if (n < 0) {
1552 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001553 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 PyObject *decoded;
1555 if (bytes == NULL)
1556 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001557
1558 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1559 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1560 bytes, 1);
1561 else
1562 decoded = PyObject_CallMethodObjArgs(
1563 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001565 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001566 goto fail;
1567
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001568 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569
1570 if (result == NULL) {
1571 Py_DECREF(decoded);
1572 return NULL;
1573 }
1574
1575 PyUnicode_AppendAndDel(&result, decoded);
1576 if (result == NULL)
1577 goto fail;
1578
1579 Py_CLEAR(self->snapshot);
1580 return result;
1581 }
1582 else {
1583 int res = 1;
1584 Py_ssize_t remaining = n;
1585
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001586 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 if (result == NULL)
1588 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001589 if (PyUnicode_READY(result) == -1)
1590 goto fail;
1591 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592
1593 /* Keep reading chunks until we have n characters to return */
1594 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001595 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001596 if (res < 0) {
1597 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1598 when EINTR occurs so we needn't do it ourselves. */
1599 if (_PyIO_trap_eintr()) {
1600 continue;
1601 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001603 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 if (res == 0) /* EOF */
1605 break;
1606 if (chunks == NULL) {
1607 chunks = PyList_New(0);
1608 if (chunks == NULL)
1609 goto fail;
1610 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001611 if (PyUnicode_GET_LENGTH(result) > 0 &&
1612 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 goto fail;
1614 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001615 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 if (result == NULL)
1617 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619 }
1620 if (chunks != NULL) {
1621 if (result != NULL && PyList_Append(chunks, result) < 0)
1622 goto fail;
1623 Py_CLEAR(result);
1624 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1625 if (result == NULL)
1626 goto fail;
1627 Py_CLEAR(chunks);
1628 }
1629 return result;
1630 }
1631 fail:
1632 Py_XDECREF(result);
1633 Py_XDECREF(chunks);
1634 return NULL;
1635}
1636
1637
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001638/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639 that is to the NUL character. Otherwise the function will produce
1640 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001641static char *
1642find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001644 if (kind == PyUnicode_1BYTE_KIND) {
1645 assert(ch < 256);
1646 return (char *) memchr((void *) s, (char) ch, end - s);
1647 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001649 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001650 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001651 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 return s;
1653 if (s == end)
1654 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001655 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 }
1657}
1658
1659Py_ssize_t
1660_PyIO_find_line_ending(
1661 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001664 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665
1666 if (translated) {
1667 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001670 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 else {
1672 *consumed = len;
1673 return -1;
1674 }
1675 }
1676 else if (universal) {
1677 /* Universal newline search. Find any of \r, \r\n, \n
1678 * The decoder ensures that \r\n are not split in two pieces
1679 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001680 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001684 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001685 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 if (s >= end) {
1688 *consumed = len;
1689 return -1;
1690 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001694 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001695 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001697 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001699 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 }
1701 }
1702 }
1703 else {
1704 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001705 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1706 char *nl = PyUnicode_DATA(readnl);
1707 /* Assume that readnl is an ASCII character. */
1708 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001710 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001712 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 *consumed = len;
1714 return -1;
1715 }
1716 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001717 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001718 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001719 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 if (e < s)
1721 e = s;
1722 while (s < e) {
1723 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001724 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if (pos == NULL || pos >= e)
1726 break;
1727 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001728 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 break;
1730 }
1731 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001732 return (pos - start)/kind + readnl_len;
1733 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001735 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 if (pos == NULL)
1737 *consumed = len;
1738 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001739 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 return -1;
1741 }
1742 }
1743}
1744
1745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001746_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747{
1748 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1749 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1750 int res;
1751
1752 CHECK_CLOSED(self);
1753
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001754 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755 return NULL;
1756
1757 chunked = 0;
1758
1759 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001760 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 Py_ssize_t consumed = 0;
1764
1765 /* First, get some data if necessary */
1766 res = 1;
1767 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001769 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001770 if (res < 0) {
1771 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1772 when EINTR occurs so we needn't do it ourselves. */
1773 if (_PyIO_trap_eintr()) {
1774 continue;
1775 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001777 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001778 if (res == 0)
1779 break;
1780 }
1781 if (res == 0) {
1782 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001783 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 Py_CLEAR(self->snapshot);
1785 start = endpos = offset_to_buffer = 0;
1786 break;
1787 }
1788
1789 if (remaining == NULL) {
1790 line = self->decoded_chars;
1791 start = self->decoded_chars_used;
1792 offset_to_buffer = 0;
1793 Py_INCREF(line);
1794 }
1795 else {
1796 assert(self->decoded_chars_used == 0);
1797 line = PyUnicode_Concat(remaining, self->decoded_chars);
1798 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001799 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 Py_CLEAR(remaining);
1801 if (line == NULL)
1802 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001803 if (PyUnicode_READY(line) == -1)
1804 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 }
1806
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001807 ptr = PyUnicode_DATA(line);
1808 line_len = PyUnicode_GET_LENGTH(line);
1809 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810
1811 endpos = _PyIO_find_line_ending(
1812 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001814 ptr + kind * start,
1815 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001816 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817 if (endpos >= 0) {
1818 endpos += start;
1819 if (limit >= 0 && (endpos - start) + chunked >= limit)
1820 endpos = start + limit - chunked;
1821 break;
1822 }
1823
1824 /* We can put aside up to `endpos` */
1825 endpos = consumed + start;
1826 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1827 /* Didn't find line ending, but reached length limit */
1828 endpos = start + limit - chunked;
1829 break;
1830 }
1831
1832 if (endpos > start) {
1833 /* No line ending seen yet - put aside current data */
1834 PyObject *s;
1835 if (chunks == NULL) {
1836 chunks = PyList_New(0);
1837 if (chunks == NULL)
1838 goto error;
1839 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001840 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 if (s == NULL)
1842 goto error;
1843 if (PyList_Append(chunks, s) < 0) {
1844 Py_DECREF(s);
1845 goto error;
1846 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001847 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 Py_DECREF(s);
1849 }
1850 /* There may be some remaining bytes we'll have to prepend to the
1851 next chunk of data */
1852 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001853 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 if (remaining == NULL)
1855 goto error;
1856 }
1857 Py_CLEAR(line);
1858 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001859 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001860 }
1861
1862 if (line != NULL) {
1863 /* Our line ends in the current buffer */
1864 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001865 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1866 PyObject *s = PyUnicode_Substring(line, start, endpos);
1867 Py_CLEAR(line);
1868 if (s == NULL)
1869 goto error;
1870 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871 }
1872 }
1873 if (remaining != NULL) {
1874 if (chunks == NULL) {
1875 chunks = PyList_New(0);
1876 if (chunks == NULL)
1877 goto error;
1878 }
1879 if (PyList_Append(chunks, remaining) < 0)
1880 goto error;
1881 Py_CLEAR(remaining);
1882 }
1883 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001884 if (line != NULL) {
1885 if (PyList_Append(chunks, line) < 0)
1886 goto error;
1887 Py_DECREF(line);
1888 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001889 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1890 if (line == NULL)
1891 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001892 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001894 if (line == NULL) {
1895 Py_INCREF(_PyIO_empty_str);
1896 line = _PyIO_empty_str;
1897 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898
1899 return line;
1900
1901 error:
1902 Py_XDECREF(chunks);
1903 Py_XDECREF(remaining);
1904 Py_XDECREF(line);
1905 return NULL;
1906}
1907
1908static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001909textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910{
1911 Py_ssize_t limit = -1;
1912
1913 CHECK_INITIALIZED(self);
1914 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1915 return NULL;
1916 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001917 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918}
1919
1920/* Seek and Tell */
1921
1922typedef struct {
1923 Py_off_t start_pos;
1924 int dec_flags;
1925 int bytes_to_feed;
1926 int chars_to_skip;
1927 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001928} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
1930/*
1931 To speed up cookie packing/unpacking, we store the fields in a temporary
1932 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1933 The following macros define at which offsets in the intermediary byte
1934 string the various CookieStruct fields will be stored.
1935 */
1936
1937#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1938
1939#if defined(WORDS_BIGENDIAN)
1940
1941# define IS_LITTLE_ENDIAN 0
1942
1943/* We want the least significant byte of start_pos to also be the least
1944 significant byte of the cookie, which means that in big-endian mode we
1945 must copy the fields in reverse order. */
1946
1947# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1948# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1949# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1950# define OFF_CHARS_TO_SKIP (sizeof(char))
1951# define OFF_NEED_EOF 0
1952
1953#else
1954
1955# define IS_LITTLE_ENDIAN 1
1956
1957/* Little-endian mode: the least significant byte of start_pos will
1958 naturally end up the least significant byte of the cookie. */
1959
1960# define OFF_START_POS 0
1961# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1962# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1963# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1964# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1965
1966#endif
1967
1968static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001969textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001970{
1971 unsigned char buffer[COOKIE_BUF_LEN];
1972 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1973 if (cookieLong == NULL)
1974 return -1;
1975
1976 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1977 IS_LITTLE_ENDIAN, 0) < 0) {
1978 Py_DECREF(cookieLong);
1979 return -1;
1980 }
1981 Py_DECREF(cookieLong);
1982
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001983 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1984 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1985 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1986 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1987 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988
1989 return 0;
1990}
1991
1992static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001993textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001994{
1995 unsigned char buffer[COOKIE_BUF_LEN];
1996
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001997 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1998 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1999 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2000 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2001 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002
2003 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
2004}
2005#undef IS_LITTLE_ENDIAN
2006
2007static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002008_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002009{
2010 PyObject *res;
2011 /* When seeking to the start of the stream, we call decoder.reset()
2012 rather than decoder.getstate().
2013 This is for a few decoders such as utf-16 for which the state value
2014 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2015 utf-16, that we are expecting a BOM).
2016 */
2017 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2018 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2019 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002020 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2021 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002022 if (res == NULL)
2023 return -1;
2024 Py_DECREF(res);
2025 return 0;
2026}
2027
Antoine Pitroue4501852009-05-14 18:55:55 +00002028static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002030{
2031 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002032 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002033 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2034 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2035 self->encoding_start_of_stream = 1;
2036 }
2037 else {
2038 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2039 _PyIO_zero, NULL);
2040 self->encoding_start_of_stream = 0;
2041 }
2042 if (res == NULL)
2043 return -1;
2044 Py_DECREF(res);
2045 return 0;
2046}
2047
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002049textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050{
2051 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002052 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 PyObject *res;
2055 int cmp;
2056
2057 CHECK_INITIALIZED(self);
2058
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002059 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2060 return NULL;
2061 CHECK_CLOSED(self);
2062
2063 Py_INCREF(cookieObj);
2064
2065 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002066 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 goto fail;
2068 }
2069
2070 if (whence == 1) {
2071 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002072 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 if (cmp < 0)
2074 goto fail;
2075
2076 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002077 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 goto fail;
2079 }
2080
2081 /* Seeking to the current position should attempt to
2082 * sync the underlying buffer with the current position.
2083 */
2084 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002085 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (cookieObj == NULL)
2087 goto fail;
2088 }
2089 else if (whence == 2) {
2090 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002091 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 if (cmp < 0)
2093 goto fail;
2094
2095 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002096 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002097 goto fail;
2098 }
2099
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002100 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 if (res == NULL)
2102 goto fail;
2103 Py_DECREF(res);
2104
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002105 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002106 Py_CLEAR(self->snapshot);
2107 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002108 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 if (res == NULL)
2110 goto fail;
2111 Py_DECREF(res);
2112 }
2113
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002114 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002115 Py_XDECREF(cookieObj);
2116 return res;
2117 }
2118 else if (whence != 0) {
2119 PyErr_Format(PyExc_ValueError,
2120 "invalid whence (%d, should be 0, 1 or 2)", whence);
2121 goto fail;
2122 }
2123
Antoine Pitroue4501852009-05-14 18:55:55 +00002124 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002125 if (cmp < 0)
2126 goto fail;
2127
2128 if (cmp == 1) {
2129 PyErr_Format(PyExc_ValueError,
2130 "negative seek position %R", cookieObj);
2131 goto fail;
2132 }
2133
2134 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2135 if (res == NULL)
2136 goto fail;
2137 Py_DECREF(res);
2138
2139 /* The strategy of seek() is to go back to the safe start point
2140 * and replay the effect of read(chars_to_skip) from there.
2141 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002142 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002143 goto fail;
2144
2145 /* Seek back to the safe start point. */
2146 posobj = PyLong_FromOff_t(cookie.start_pos);
2147 if (posobj == NULL)
2148 goto fail;
2149 res = PyObject_CallMethodObjArgs(self->buffer,
2150 _PyIO_str_seek, posobj, NULL);
2151 Py_DECREF(posobj);
2152 if (res == NULL)
2153 goto fail;
2154 Py_DECREF(res);
2155
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002156 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 Py_CLEAR(self->snapshot);
2158
2159 /* Restore the decoder to its state from the safe start point. */
2160 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002161 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162 goto fail;
2163 }
2164
2165 if (cookie.chars_to_skip) {
2166 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002167 PyObject *input_chunk = _PyObject_CallMethodId(
2168 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002169 PyObject *decoded;
2170
2171 if (input_chunk == NULL)
2172 goto fail;
2173
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002174 if (!PyBytes_Check(input_chunk)) {
2175 PyErr_Format(PyExc_TypeError,
2176 "underlying read() should have returned a bytes "
2177 "object, not '%.200s'",
2178 Py_TYPE(input_chunk)->tp_name);
2179 Py_DECREF(input_chunk);
2180 goto fail;
2181 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002182
2183 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2184 if (self->snapshot == NULL) {
2185 Py_DECREF(input_chunk);
2186 goto fail;
2187 }
2188
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002189 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2190 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002192 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 goto fail;
2194
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002195 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196
2197 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002198 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2200 goto fail;
2201 }
2202 self->decoded_chars_used = cookie.chars_to_skip;
2203 }
2204 else {
2205 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2206 if (self->snapshot == NULL)
2207 goto fail;
2208 }
2209
Antoine Pitroue4501852009-05-14 18:55:55 +00002210 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2211 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002212 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002213 goto fail;
2214 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002215 return cookieObj;
2216 fail:
2217 Py_XDECREF(cookieObj);
2218 return NULL;
2219
2220}
2221
2222static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002223textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002224{
2225 PyObject *res;
2226 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002227 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002228 PyObject *next_input;
2229 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002230 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 PyObject *saved_state = NULL;
2232 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002233 char *dec_buffer;
2234 Py_ssize_t dec_buffer_len;
2235 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236
2237 CHECK_INITIALIZED(self);
2238 CHECK_CLOSED(self);
2239
2240 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002241 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002242 goto fail;
2243 }
2244 if (!self->telling) {
2245 PyErr_SetString(PyExc_IOError,
2246 "telling position disabled by next() call");
2247 goto fail;
2248 }
2249
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002250 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002251 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002252 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253 if (res == NULL)
2254 goto fail;
2255 Py_DECREF(res);
2256
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002257 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258 if (posobj == NULL)
2259 goto fail;
2260
2261 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002262 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002263 return posobj;
2264 }
2265
2266#if defined(HAVE_LARGEFILE_SUPPORT)
2267 cookie.start_pos = PyLong_AsLongLong(posobj);
2268#else
2269 cookie.start_pos = PyLong_AsLong(posobj);
2270#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002271 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002272 if (PyErr_Occurred())
2273 goto fail;
2274
2275 /* Skip backward to the snapshot point (see _read_chunk). */
2276 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2277 goto fail;
2278
2279 assert (PyBytes_Check(next_input));
2280
2281 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2282
2283 /* How many decoded characters have been used up since the snapshot? */
2284 if (self->decoded_chars_used == 0) {
2285 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002286 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287 }
2288
2289 chars_to_skip = self->decoded_chars_used;
2290
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002291 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002292 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2293 _PyIO_str_getstate, NULL);
2294 if (saved_state == NULL)
2295 goto fail;
2296
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002297#define DECODER_GETSTATE() do { \
2298 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2299 _PyIO_str_getstate, NULL); \
2300 if (_state == NULL) \
2301 goto fail; \
2302 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2303 Py_DECREF(_state); \
2304 goto fail; \
2305 } \
2306 Py_DECREF(_state); \
2307 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002308
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002309#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002310 PyObject *_decoded = _PyObject_CallMethodId( \
2311 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002312 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002313 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002314 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002315 Py_DECREF(_decoded); \
2316 } while (0)
2317
2318 /* Fast search for an acceptable start point, close to our
2319 current pos */
2320 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2321 skip_back = 1;
2322 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2323 input = PyBytes_AS_STRING(next_input);
2324 while (skip_bytes > 0) {
2325 /* Decode up to temptative start point */
2326 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2327 goto fail;
2328 DECODER_DECODE(input, skip_bytes, chars_decoded);
2329 if (chars_decoded <= chars_to_skip) {
2330 DECODER_GETSTATE();
2331 if (dec_buffer_len == 0) {
2332 /* Before pos and no bytes buffered in decoder => OK */
2333 cookie.dec_flags = dec_flags;
2334 chars_to_skip -= chars_decoded;
2335 break;
2336 }
2337 /* Skip back by buffered amount and reset heuristic */
2338 skip_bytes -= dec_buffer_len;
2339 skip_back = 1;
2340 }
2341 else {
2342 /* We're too far ahead, skip back a bit */
2343 skip_bytes -= skip_back;
2344 skip_back *= 2;
2345 }
2346 }
2347 if (skip_bytes <= 0) {
2348 skip_bytes = 0;
2349 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2350 goto fail;
2351 }
2352
2353 /* Note our initial start point. */
2354 cookie.start_pos += skip_bytes;
2355 cookie.chars_to_skip = chars_to_skip;
2356 if (chars_to_skip == 0)
2357 goto finally;
2358
2359 /* We should be close to the desired position. Now feed the decoder one
2360 * byte at a time until we reach the `chars_to_skip` target.
2361 * As we go, note the nearest "safe start point" before the current
2362 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363 * can safely start from there and advance to this location).
2364 */
2365 chars_decoded = 0;
2366 input = PyBytes_AS_STRING(next_input);
2367 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002368 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002370 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002372 DECODER_DECODE(input, 1, n);
2373 /* We got n chars for 1 byte */
2374 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002375 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002376 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377
2378 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2379 /* Decoder buffer is empty, so this is a safe start point. */
2380 cookie.start_pos += cookie.bytes_to_feed;
2381 chars_to_skip -= chars_decoded;
2382 cookie.dec_flags = dec_flags;
2383 cookie.bytes_to_feed = 0;
2384 chars_decoded = 0;
2385 }
2386 if (chars_decoded >= chars_to_skip)
2387 break;
2388 input++;
2389 }
2390 if (input == input_end) {
2391 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002392 PyObject *decoded = _PyObject_CallMethodId(
2393 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002394 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002395 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002396 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397 Py_DECREF(decoded);
2398 cookie.need_eof = 1;
2399
2400 if (chars_decoded < chars_to_skip) {
2401 PyErr_SetString(PyExc_IOError,
2402 "can't reconstruct logical file position");
2403 goto fail;
2404 }
2405 }
2406
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002407finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002408 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002409 Py_DECREF(saved_state);
2410 if (res == NULL)
2411 return NULL;
2412 Py_DECREF(res);
2413
2414 /* The returned cookie corresponds to the last safe start point. */
2415 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002416 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002417
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002418fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002419 if (saved_state) {
2420 PyObject *type, *value, *traceback;
2421 PyErr_Fetch(&type, &value, &traceback);
2422
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002423 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002424 Py_DECREF(saved_state);
2425 if (res == NULL)
2426 return NULL;
2427 Py_DECREF(res);
2428
2429 PyErr_Restore(type, value, traceback);
2430 }
2431 return NULL;
2432}
2433
2434static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002435textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002436{
2437 PyObject *pos = Py_None;
2438 PyObject *res;
2439
2440 CHECK_INITIALIZED(self)
2441 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2442 return NULL;
2443 }
2444
2445 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2446 if (res == NULL)
2447 return NULL;
2448 Py_DECREF(res);
2449
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002450 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002451}
2452
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002453static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002454textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002455{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002456 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002457
2458 CHECK_INITIALIZED(self);
2459
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002460 res = PyUnicode_FromString("<_io.TextIOWrapper");
2461 if (res == NULL)
2462 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002463 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002464 if (nameobj == NULL) {
2465 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2466 PyErr_Clear();
2467 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002468 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002469 }
2470 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002471 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002472 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002473 if (s == NULL)
2474 goto error;
2475 PyUnicode_AppendAndDel(&res, s);
2476 if (res == NULL)
2477 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002478 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002479 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002480 if (modeobj == NULL) {
2481 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2482 PyErr_Clear();
2483 else
2484 goto error;
2485 }
2486 else {
2487 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2488 Py_DECREF(modeobj);
2489 if (s == NULL)
2490 goto error;
2491 PyUnicode_AppendAndDel(&res, s);
2492 if (res == NULL)
2493 return NULL;
2494 }
2495 s = PyUnicode_FromFormat("%U encoding=%R>",
2496 res, self->encoding);
2497 Py_DECREF(res);
2498 return s;
2499error:
2500 Py_XDECREF(res);
2501 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002502}
2503
2504
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002505/* Inquiries */
2506
2507static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002508textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002509{
2510 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002511 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002512}
2513
2514static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002515textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002516{
2517 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002518 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002519}
2520
2521static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002522textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523{
2524 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002525 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002526}
2527
2528static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002529textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530{
2531 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002532 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002533}
2534
2535static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002536textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537{
2538 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002539 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540}
2541
2542static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002543textiowrapper_getstate(textio *self, PyObject *args)
2544{
2545 PyErr_Format(PyExc_TypeError,
2546 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2547 return NULL;
2548}
2549
2550static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552{
2553 CHECK_INITIALIZED(self);
2554 CHECK_CLOSED(self);
2555 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002556 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002558 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002559}
2560
2561static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002562textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002563{
2564 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002565 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002566 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567
Antoine Pitrou6be88762010-05-03 16:48:20 +00002568 res = textiowrapper_closed_get(self, NULL);
2569 if (res == NULL)
2570 return NULL;
2571 r = PyObject_IsTrue(res);
2572 Py_DECREF(res);
2573 if (r < 0)
2574 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002575
Antoine Pitrou6be88762010-05-03 16:48:20 +00002576 if (r > 0) {
2577 Py_RETURN_NONE; /* stream already closed */
2578 }
2579 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002580 PyObject *exc = NULL, *val, *tb;
Antoine Pitroue033e062010-10-29 10:38:18 +00002581 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002582 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002583 if (res)
2584 Py_DECREF(res);
2585 else
2586 PyErr_Clear();
2587 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002588 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002589 if (res == NULL)
2590 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002591 else
2592 Py_DECREF(res);
2593
Benjamin Peterson68623612012-12-20 11:53:11 -06002594 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2595 if (exc != NULL) {
2596 if (res != NULL) {
2597 Py_CLEAR(res);
2598 PyErr_Restore(exc, val, tb);
2599 }
2600 else {
2601 PyObject *val2;
2602 Py_DECREF(exc);
2603 Py_XDECREF(tb);
2604 PyErr_Fetch(&exc, &val2, &tb);
2605 PyErr_NormalizeException(&exc, &val2, &tb);
2606 PyException_SetContext(val2, val);
2607 PyErr_Restore(exc, val2, tb);
2608 }
2609 }
2610 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002611 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612}
2613
2614static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002615textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002616{
2617 PyObject *line;
2618
2619 CHECK_INITIALIZED(self);
2620
2621 self->telling = 0;
2622 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2623 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002624 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002625 }
2626 else {
2627 line = PyObject_CallMethodObjArgs((PyObject *)self,
2628 _PyIO_str_readline, NULL);
2629 if (line && !PyUnicode_Check(line)) {
2630 PyErr_Format(PyExc_IOError,
2631 "readline() should have returned an str object, "
2632 "not '%.200s'", Py_TYPE(line)->tp_name);
2633 Py_DECREF(line);
2634 return NULL;
2635 }
2636 }
2637
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002638 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002639 return NULL;
2640
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002641 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002642 /* Reached EOF or would have blocked */
2643 Py_DECREF(line);
2644 Py_CLEAR(self->snapshot);
2645 self->telling = self->seekable;
2646 return NULL;
2647 }
2648
2649 return line;
2650}
2651
2652static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002653textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002654{
2655 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002656 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657}
2658
2659static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002660textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661{
2662 CHECK_INITIALIZED(self);
2663 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2664}
2665
2666static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002667textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002668{
2669 PyObject *res;
2670 CHECK_INITIALIZED(self);
2671 if (self->decoder == NULL)
2672 Py_RETURN_NONE;
2673 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2674 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002675 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2676 PyErr_Clear();
2677 Py_RETURN_NONE;
2678 }
2679 else {
2680 return NULL;
2681 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682 }
2683 return res;
2684}
2685
2686static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002687textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002688{
2689 CHECK_INITIALIZED(self);
2690 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2691}
2692
2693static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002694textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002695{
2696 CHECK_INITIALIZED(self);
2697 return PyLong_FromSsize_t(self->chunk_size);
2698}
2699
2700static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002701textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002702{
2703 Py_ssize_t n;
2704 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002705 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002706 if (n == -1 && PyErr_Occurred())
2707 return -1;
2708 if (n <= 0) {
2709 PyErr_SetString(PyExc_ValueError,
2710 "a strictly positive integer is required");
2711 return -1;
2712 }
2713 self->chunk_size = n;
2714 return 0;
2715}
2716
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002717static PyMethodDef textiowrapper_methods[] = {
2718 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2719 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2720 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2721 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2722 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2723 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002725 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2726 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2727 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2728 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2729 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002730 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002731
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002732 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2733 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2734 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002735 {NULL, NULL}
2736};
2737
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002738static PyMemberDef textiowrapper_members[] = {
2739 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2740 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2741 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002742 {NULL}
2743};
2744
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002745static PyGetSetDef textiowrapper_getset[] = {
2746 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2747 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002748/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2749*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002750 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2751 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2752 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2753 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002754 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002755};
2756
2757PyTypeObject PyTextIOWrapper_Type = {
2758 PyVarObject_HEAD_INIT(NULL, 0)
2759 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002760 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002762 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002763 0, /*tp_print*/
2764 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002765 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002766 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002767 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002768 0, /*tp_as_number*/
2769 0, /*tp_as_sequence*/
2770 0, /*tp_as_mapping*/
2771 0, /*tp_hash */
2772 0, /*tp_call*/
2773 0, /*tp_str*/
2774 0, /*tp_getattro*/
2775 0, /*tp_setattro*/
2776 0, /*tp_as_buffer*/
2777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2778 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002779 textiowrapper_doc, /* tp_doc */
2780 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2781 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002782 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002783 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002784 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002785 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2786 textiowrapper_methods, /* tp_methods */
2787 textiowrapper_members, /* tp_members */
2788 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /* tp_base */
2790 0, /* tp_dict */
2791 0, /* tp_descr_get */
2792 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002793 offsetof(textio, dict), /*tp_dictoffset*/
2794 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002795 0, /* tp_alloc */
2796 PyType_GenericNew, /* tp_new */
2797};