blob: 8bd9ba112289d6598f5d6a61a978a646095d78bf [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200260static int
261check_decoded(PyObject *decoded)
262{
263 if (decoded == NULL)
264 return -1;
265 if (!PyUnicode_Check(decoded)) {
266 PyErr_Format(PyExc_TypeError,
267 "decoder should return a string result, not '%.200s'",
268 Py_TYPE(decoded)->tp_name);
269 Py_DECREF(decoded);
270 return -1;
271 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200272 if (PyUnicode_READY(decoded) < 0) {
273 Py_DECREF(decoded);
274 return -1;
275 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200276 return 0;
277}
278
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279#define SEEN_CR 1
280#define SEEN_LF 2
281#define SEEN_CRLF 4
282#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
283
284PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000285_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000286 PyObject *input, int final)
287{
288 PyObject *output;
289 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000290 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 if (self->decoder == NULL) {
293 PyErr_SetString(PyExc_ValueError,
294 "IncrementalNewlineDecoder.__init__ not called");
295 return NULL;
296 }
297
298 /* decode input (with the eventual \r from a previous pass) */
299 if (self->decoder != Py_None) {
300 output = PyObject_CallMethodObjArgs(self->decoder,
301 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
302 }
303 else {
304 output = input;
305 Py_INCREF(output);
306 }
307
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200308 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 return NULL;
310
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200311 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 /* Prefix output with CR */
314 int kind;
315 PyObject *modified;
316 char *out;
317
318 modified = PyUnicode_New(output_len + 1,
319 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 if (modified == NULL)
321 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 kind = PyUnicode_KIND(modified);
323 out = PyUnicode_DATA(modified);
324 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200325 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 self->pendingcr = 0;
329 output_len++;
330 }
331
332 /* retain last \r even when not translating data:
333 * then readline() is sure to get \r\n in one pass
334 */
335 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000336 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
338 {
339 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
340 if (modified == NULL)
341 goto error;
342 Py_DECREF(output);
343 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 self->pendingcr = 1;
345 }
346 }
347
348 /* Record which newlines are read and do newline translation if desired,
349 all in one pass. */
350 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200351 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_ssize_t len;
353 int seennl = self->seennl;
354 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 in_str = PyUnicode_DATA(output);
358 len = PyUnicode_GET_LENGTH(output);
359 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
361 if (len == 0)
362 return output;
363
364 /* If, up to now, newlines are consistently \n, do a quick check
365 for the \r *byte* with the libc's optimized memchr.
366 */
367 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200368 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 }
370
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 if (only_lf) {
372 /* If not already seen, quick scan for a possible "\n" character.
373 (there's nothing else to be done, even when in translation mode)
374 */
375 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100377 if (kind == PyUnicode_1BYTE_KIND)
378 seennl |= SEEN_LF;
379 else {
380 Py_ssize_t i = 0;
381 for (;;) {
382 Py_UCS4 c;
383 /* Fast loop for non-control characters */
384 while (PyUnicode_READ(kind, in_str, i) > '\n')
385 i++;
386 c = PyUnicode_READ(kind, in_str, i++);
387 if (c == '\n') {
388 seennl |= SEEN_LF;
389 break;
390 }
391 if (i >= len)
392 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000394 }
395 }
396 /* Finished: we have scanned for newlines, and none of them
397 need translating */
398 }
399 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000401 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 if (seennl == SEEN_ALL)
403 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 while (PyUnicode_READ(kind, in_str, i) > '\r')
408 i++;
409 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 if (c == '\n')
411 seennl |= SEEN_LF;
412 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 }
417 else
418 seennl |= SEEN_CR;
419 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 break;
422 if (seennl == SEEN_ALL)
423 break;
424 }
425 endscan:
426 ;
427 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000428 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 void *translated;
430 int kind = PyUnicode_KIND(output);
431 void *in_str = PyUnicode_DATA(output);
432 Py_ssize_t in, out;
433 /* XXX: Previous in-place translation here is disabled as
434 resizing is not possible anymore */
435 /* We could try to optimize this so that we only do a copy
436 when there is something to translate. On the other hand,
437 we already know there is a \r byte, so chances are high
438 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200439 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (translated == NULL) {
441 PyErr_NoMemory();
442 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
449 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452 seennl |= SEEN_LF;
453 continue;
454 }
455 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 in++;
458 seennl |= SEEN_CRLF;
459 }
460 else
461 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 continue;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 Py_DECREF(output);
470 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100471 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200473 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 }
475 self->seennl |= seennl;
476 }
477
478 return output;
479
480 error:
481 Py_DECREF(output);
482 return NULL;
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 PyObject *args, PyObject *kwds)
488{
489 char *kwlist[] = {"input", "final", NULL};
490 PyObject *input;
491 int final = 0;
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
494 kwlist, &input, &final))
495 return NULL;
496 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
497}
498
499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000500incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501{
502 PyObject *buffer;
503 unsigned PY_LONG_LONG flag;
504
505 if (self->decoder != Py_None) {
506 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
507 _PyIO_str_getstate, NULL);
508 if (state == NULL)
509 return NULL;
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
511 Py_DECREF(state);
512 return NULL;
513 }
514 Py_INCREF(buffer);
515 Py_DECREF(state);
516 }
517 else {
518 buffer = PyBytes_FromString("");
519 flag = 0;
520 }
521 flag <<= 1;
522 if (self->pendingcr)
523 flag |= 1;
524 return Py_BuildValue("NK", buffer, flag);
525}
526
527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000528incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529{
530 PyObject *buffer;
531 unsigned PY_LONG_LONG flag;
532
533 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
534 return NULL;
535
536 self->pendingcr = (int) flag & 1;
537 flag >>= 1;
538
539 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200540 return _PyObject_CallMethodId(self->decoder,
541 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 else
543 Py_RETURN_NONE;
544}
545
546static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000547incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000548{
549 self->seennl = 0;
550 self->pendingcr = 0;
551 if (self->decoder != Py_None)
552 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
553 else
554 Py_RETURN_NONE;
555}
556
557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000558incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559{
560 switch (self->seennl) {
561 case SEEN_CR:
562 return PyUnicode_FromString("\r");
563 case SEEN_LF:
564 return PyUnicode_FromString("\n");
565 case SEEN_CRLF:
566 return PyUnicode_FromString("\r\n");
567 case SEEN_CR | SEEN_LF:
568 return Py_BuildValue("ss", "\r", "\n");
569 case SEEN_CR | SEEN_CRLF:
570 return Py_BuildValue("ss", "\r", "\r\n");
571 case SEEN_LF | SEEN_CRLF:
572 return Py_BuildValue("ss", "\n", "\r\n");
573 case SEEN_CR | SEEN_LF | SEEN_CRLF:
574 return Py_BuildValue("sss", "\r", "\n", "\r\n");
575 default:
576 Py_RETURN_NONE;
577 }
578
579}
580
581
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582static PyMethodDef incrementalnewlinedecoder_methods[] = {
583 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
584 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
585 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
586 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000587 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588};
589
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590static PyGetSetDef incrementalnewlinedecoder_getset[] = {
591 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000592 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000593};
594
595PyTypeObject PyIncrementalNewlineDecoder_Type = {
596 PyVarObject_HEAD_INIT(NULL, 0)
597 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000598 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /*tp_print*/
602 0, /*tp_getattr*/
603 0, /*tp_setattr*/
604 0, /*tp_compare */
605 0, /*tp_repr*/
606 0, /*tp_as_number*/
607 0, /*tp_as_sequence*/
608 0, /*tp_as_mapping*/
609 0, /*tp_hash */
610 0, /*tp_call*/
611 0, /*tp_str*/
612 0, /*tp_getattro*/
613 0, /*tp_setattro*/
614 0, /*tp_as_buffer*/
615 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_traverse */
618 0, /* tp_clear */
619 0, /* tp_richcompare */
620 0, /*tp_weaklistoffset*/
621 0, /* tp_iter */
622 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000623 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 0, /* tp_base */
627 0, /* tp_dict */
628 0, /* tp_descr_get */
629 0, /* tp_descr_set */
630 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000631 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632 0, /* tp_alloc */
633 PyType_GenericNew, /* tp_new */
634};
635
636
637/* TextIOWrapper */
638
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000639PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640 "Character and line based layer over a BufferedIOBase object, buffer.\n"
641 "\n"
642 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200643 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "\n"
645 "errors determines the strictness of encoding and decoding (see the\n"
646 "codecs.register) and defaults to \"strict\".\n"
647 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200648 "newline controls how line endings are handled. It can be None, '',\n"
649 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
650 "\n"
651 "* On input, if newline is None, universal newlines mode is\n"
652 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
653 " these are translated into '\\n' before being returned to the\n"
654 " caller. If it is '', universal newline mode is enabled, but line\n"
655 " endings are returned to the caller untranslated. If it has any of\n"
656 " the other legal values, input lines are only terminated by the given\n"
657 " string, and the line ending is returned to the caller untranslated.\n"
658 "\n"
659 "* On output, if newline is None, any '\\n' characters written are\n"
660 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300661 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200662 " of the other legal values, any '\\n' characters written are translated\n"
663 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000664 "\n"
665 "If line_buffering is True, a call to flush is implied when a call to\n"
666 "write contains a newline character."
667 );
668
669typedef PyObject *
670 (*encodefunc_t)(PyObject *, PyObject *);
671
672typedef struct
673{
674 PyObject_HEAD
675 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000676 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 Py_ssize_t chunk_size;
678 PyObject *buffer;
679 PyObject *encoding;
680 PyObject *encoder;
681 PyObject *decoder;
682 PyObject *readnl;
683 PyObject *errors;
684 const char *writenl; /* utf-8 encoded, NULL stands for \n */
685 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200686 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000687 char readuniversal;
688 char readtranslate;
689 char writetranslate;
690 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200691 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000693 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000694 /* Specialized encoding func (see below) */
695 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000696 /* Whether or not it's the start of the stream */
697 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000698
699 /* Reads and writes are internally buffered in order to speed things up.
700 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000701
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000702 Please also note that text to be written is first encoded before being
703 buffered. This is necessary so that encoding errors are immediately
704 reported to the caller, but it unfortunately means that the
705 IncrementalEncoder (whose encode() method is always written in Python)
706 becomes a bottleneck for small writes.
707 */
708 PyObject *decoded_chars; /* buffer for text returned from decoder */
709 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
710 PyObject *pending_bytes; /* list of bytes objects waiting to be
711 written, or NULL */
712 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000713
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000714 /* snapshot is either None, or a tuple (dec_flags, next_input) where
715 * dec_flags is the second (integer) item of the decoder state and
716 * next_input is the chunk of input bytes that comes next after the
717 * snapshot point. We use this to reconstruct decoder states in tell().
718 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000719 PyObject *snapshot;
720 /* Bytes-to-characters ratio for the current chunk. Serves as input for
721 the heuristic in tell(). */
722 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000723
724 /* Cache raw object if it's a FileIO object */
725 PyObject *raw;
726
727 PyObject *weakreflist;
728 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000729} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000730
731
732/* A couple of specialized cases in order to bypass the slow incremental
733 encoding methods for the most popular encodings. */
734
735static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000736ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000737{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200738 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000739}
740
741static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000742utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100744 return _PyUnicode_EncodeUTF16(text,
745 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000746}
747
748static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000749utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100751 return _PyUnicode_EncodeUTF16(text,
752 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000753}
754
755static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000756utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000757{
Antoine Pitroue4501852009-05-14 18:55:55 +0000758 if (!self->encoding_start_of_stream) {
759 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200760#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000761 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000762#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000763 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000764#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000765 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100766 return _PyUnicode_EncodeUTF16(text,
767 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000768}
769
Antoine Pitroue4501852009-05-14 18:55:55 +0000770static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000771utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000772{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100773 return _PyUnicode_EncodeUTF32(text,
774 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000775}
776
777static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000778utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000779{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100780 return _PyUnicode_EncodeUTF32(text,
781 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000782}
783
784static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000785utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000786{
787 if (!self->encoding_start_of_stream) {
788 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200789#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000790 return utf32be_encode(self, text);
791#else
792 return utf32le_encode(self, text);
793#endif
794 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100795 return _PyUnicode_EncodeUTF32(text,
796 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000797}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000798
799static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000800utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000801{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200802 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000803}
804
805static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000806latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000807{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200808 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809}
810
811/* Map normalized encoding names onto the specialized encoding funcs */
812
813typedef struct {
814 const char *name;
815 encodefunc_t encodefunc;
816} encodefuncentry;
817
Antoine Pitrou24f36292009-03-28 22:16:42 +0000818static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000819 {"ascii", (encodefunc_t) ascii_encode},
820 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000821 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000822 {"utf-16-be", (encodefunc_t) utf16be_encode},
823 {"utf-16-le", (encodefunc_t) utf16le_encode},
824 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000825 {"utf-32-be", (encodefunc_t) utf32be_encode},
826 {"utf-32-le", (encodefunc_t) utf32le_encode},
827 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000828 {NULL, NULL}
829};
830
831
832static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000833textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000834{
835 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200836 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000837 NULL};
838 PyObject *buffer, *raw;
839 char *encoding = NULL;
840 char *errors = NULL;
841 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 _PyIO_State *state = IO_STATE;
844
845 PyObject *res;
846 int r;
847
848 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000849 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200850 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000851 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200852 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000853 return -1;
854
855 if (newline && newline[0] != '\0'
856 && !(newline[0] == '\n' && newline[1] == '\0')
857 && !(newline[0] == '\r' && newline[1] == '\0')
858 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
859 PyErr_Format(PyExc_ValueError,
860 "illegal newline value: %s", newline);
861 return -1;
862 }
863
864 Py_CLEAR(self->buffer);
865 Py_CLEAR(self->encoding);
866 Py_CLEAR(self->encoder);
867 Py_CLEAR(self->decoder);
868 Py_CLEAR(self->readnl);
869 Py_CLEAR(self->decoded_chars);
870 Py_CLEAR(self->pending_bytes);
871 Py_CLEAR(self->snapshot);
872 Py_CLEAR(self->errors);
873 Py_CLEAR(self->raw);
874 self->decoded_chars_used = 0;
875 self->pending_bytes_count = 0;
876 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000877 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000878
879 if (encoding == NULL) {
880 /* Try os.device_encoding(fileno) */
881 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200882 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000883 /* Ignore only AttributeError and UnsupportedOperation */
884 if (fileno == NULL) {
885 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
886 PyErr_ExceptionMatches(state->unsupported_operation)) {
887 PyErr_Clear();
888 }
889 else {
890 goto error;
891 }
892 }
893 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200894 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500895 Py_DECREF(fileno);
896 if (fd == -1 && PyErr_Occurred()) {
897 goto error;
898 }
899
900 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000901 if (self->encoding == NULL)
902 goto error;
903 else if (!PyUnicode_Check(self->encoding))
904 Py_CLEAR(self->encoding);
905 }
906 }
907 if (encoding == NULL && self->encoding == NULL) {
908 if (state->locale_module == NULL) {
909 state->locale_module = PyImport_ImportModule("locale");
910 if (state->locale_module == NULL)
911 goto catch_ImportError;
912 else
913 goto use_locale;
914 }
915 else {
916 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200917 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200918 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000919 if (self->encoding == NULL) {
920 catch_ImportError:
921 /*
922 Importing locale can raise a ImportError because of
923 _functools, and locale.getpreferredencoding can raise a
924 ImportError if _locale is not available. These will happen
925 during module building.
926 */
927 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
928 PyErr_Clear();
929 self->encoding = PyUnicode_FromString("ascii");
930 }
931 else
932 goto error;
933 }
934 else if (!PyUnicode_Check(self->encoding))
935 Py_CLEAR(self->encoding);
936 }
937 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000938 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000939 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000940 if (encoding == NULL)
941 goto error;
942 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000943 else if (encoding != NULL) {
944 self->encoding = PyUnicode_FromString(encoding);
945 if (self->encoding == NULL)
946 goto error;
947 }
948 else {
949 PyErr_SetString(PyExc_IOError,
950 "could not determine default encoding");
951 }
952
953 if (errors == NULL)
954 errors = "strict";
955 self->errors = PyBytes_FromString(errors);
956 if (self->errors == NULL)
957 goto error;
958
959 self->chunk_size = 8192;
960 self->readuniversal = (newline == NULL || newline[0] == '\0');
961 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200962 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000963 self->readtranslate = (newline == NULL);
964 if (newline) {
965 self->readnl = PyUnicode_FromString(newline);
966 if (self->readnl == NULL)
967 return -1;
968 }
969 self->writetranslate = (newline == NULL || newline[0] != '\0');
970 if (!self->readuniversal && self->readnl) {
971 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000972 if (self->writenl == NULL)
973 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (!strcmp(self->writenl, "\n"))
975 self->writenl = NULL;
976 }
977#ifdef MS_WINDOWS
978 else
979 self->writenl = "\r\n";
980#endif
981
982 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200983 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000984 if (res == NULL)
985 goto error;
986 r = PyObject_IsTrue(res);
987 Py_DECREF(res);
988 if (r == -1)
989 goto error;
990 if (r == 1) {
991 self->decoder = PyCodec_IncrementalDecoder(
992 encoding, errors);
993 if (self->decoder == NULL)
994 goto error;
995
996 if (self->readuniversal) {
997 PyObject *incrementalDecoder = PyObject_CallFunction(
998 (PyObject *)&PyIncrementalNewlineDecoder_Type,
999 "Oi", self->decoder, (int)self->readtranslate);
1000 if (incrementalDecoder == NULL)
1001 goto error;
1002 Py_CLEAR(self->decoder);
1003 self->decoder = incrementalDecoder;
1004 }
1005 }
1006
1007 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001008 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001009 if (res == NULL)
1010 goto error;
1011 r = PyObject_IsTrue(res);
1012 Py_DECREF(res);
1013 if (r == -1)
1014 goto error;
1015 if (r == 1) {
1016 PyObject *ci;
1017 self->encoder = PyCodec_IncrementalEncoder(
1018 encoding, errors);
1019 if (self->encoder == NULL)
1020 goto error;
1021 /* Get the normalized named of the codec */
1022 ci = _PyCodec_Lookup(encoding);
1023 if (ci == NULL)
1024 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001025 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001026 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001027 if (res == NULL) {
1028 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1029 PyErr_Clear();
1030 else
1031 goto error;
1032 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001033 else if (PyUnicode_Check(res)) {
1034 encodefuncentry *e = encodefuncs;
1035 while (e->name != NULL) {
1036 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1037 self->encodefunc = e->encodefunc;
1038 break;
1039 }
1040 e++;
1041 }
1042 }
1043 Py_XDECREF(res);
1044 }
1045
1046 self->buffer = buffer;
1047 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001049 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1050 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1051 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001052 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001053 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001054 if (raw == NULL) {
1055 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1056 PyErr_Clear();
1057 else
1058 goto error;
1059 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 else if (Py_TYPE(raw) == &PyFileIO_Type)
1061 self->raw = raw;
1062 else
1063 Py_DECREF(raw);
1064 }
1065
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001066 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001067 if (res == NULL)
1068 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001069 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001070 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001071 if (r < 0)
1072 goto error;
1073 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001074
Martin v. Löwis767046a2011-10-14 15:35:36 +02001075 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001076
Antoine Pitroue4501852009-05-14 18:55:55 +00001077 self->encoding_start_of_stream = 0;
1078 if (self->seekable && self->encoder) {
1079 PyObject *cookieObj;
1080 int cmp;
1081
1082 self->encoding_start_of_stream = 1;
1083
1084 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1085 if (cookieObj == NULL)
1086 goto error;
1087
1088 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1089 Py_DECREF(cookieObj);
1090 if (cmp < 0) {
1091 goto error;
1092 }
1093
1094 if (cmp == 0) {
1095 self->encoding_start_of_stream = 0;
1096 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1097 _PyIO_zero, NULL);
1098 if (res == NULL)
1099 goto error;
1100 Py_DECREF(res);
1101 }
1102 }
1103
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001104 self->ok = 1;
1105 return 0;
1106
1107 error:
1108 return -1;
1109}
1110
1111static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001112_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001113{
1114 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1115 return -1;
1116 self->ok = 0;
1117 Py_CLEAR(self->buffer);
1118 Py_CLEAR(self->encoding);
1119 Py_CLEAR(self->encoder);
1120 Py_CLEAR(self->decoder);
1121 Py_CLEAR(self->readnl);
1122 Py_CLEAR(self->decoded_chars);
1123 Py_CLEAR(self->pending_bytes);
1124 Py_CLEAR(self->snapshot);
1125 Py_CLEAR(self->errors);
1126 Py_CLEAR(self->raw);
1127 return 0;
1128}
1129
1130static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001131textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001132{
Antoine Pitroue033e062010-10-29 10:38:18 +00001133 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135 return;
1136 _PyObject_GC_UNTRACK(self);
1137 if (self->weakreflist != NULL)
1138 PyObject_ClearWeakRefs((PyObject *)self);
1139 Py_CLEAR(self->dict);
1140 Py_TYPE(self)->tp_free((PyObject *)self);
1141}
1142
1143static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001144textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001145{
1146 Py_VISIT(self->buffer);
1147 Py_VISIT(self->encoding);
1148 Py_VISIT(self->encoder);
1149 Py_VISIT(self->decoder);
1150 Py_VISIT(self->readnl);
1151 Py_VISIT(self->decoded_chars);
1152 Py_VISIT(self->pending_bytes);
1153 Py_VISIT(self->snapshot);
1154 Py_VISIT(self->errors);
1155 Py_VISIT(self->raw);
1156
1157 Py_VISIT(self->dict);
1158 return 0;
1159}
1160
1161static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001162textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001163{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001164 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001165 return -1;
1166 Py_CLEAR(self->dict);
1167 return 0;
1168}
1169
1170static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001171textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001172
1173/* This macro takes some shortcuts to make the common case faster. */
1174#define CHECK_CLOSED(self) \
1175 do { \
1176 int r; \
1177 PyObject *_res; \
1178 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1179 if (self->raw != NULL) \
1180 r = _PyFileIO_closed(self->raw); \
1181 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001182 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001183 if (_res == NULL) \
1184 return NULL; \
1185 r = PyObject_IsTrue(_res); \
1186 Py_DECREF(_res); \
1187 if (r < 0) \
1188 return NULL; \
1189 } \
1190 if (r > 0) { \
1191 PyErr_SetString(PyExc_ValueError, \
1192 "I/O operation on closed file."); \
1193 return NULL; \
1194 } \
1195 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001196 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001197 return NULL; \
1198 } while (0)
1199
1200#define CHECK_INITIALIZED(self) \
1201 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001202 if (self->detached) { \
1203 PyErr_SetString(PyExc_ValueError, \
1204 "underlying buffer has been detached"); \
1205 } else { \
1206 PyErr_SetString(PyExc_ValueError, \
1207 "I/O operation on uninitialized object"); \
1208 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001209 return NULL; \
1210 }
1211
1212#define CHECK_INITIALIZED_INT(self) \
1213 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001214 if (self->detached) { \
1215 PyErr_SetString(PyExc_ValueError, \
1216 "underlying buffer has been detached"); \
1217 } else { \
1218 PyErr_SetString(PyExc_ValueError, \
1219 "I/O operation on uninitialized object"); \
1220 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001221 return -1; \
1222 }
1223
1224
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001225static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001226textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001227{
1228 PyObject *buffer, *res;
1229 CHECK_INITIALIZED(self);
1230 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1231 if (res == NULL)
1232 return NULL;
1233 Py_DECREF(res);
1234 buffer = self->buffer;
1235 self->buffer = NULL;
1236 self->detached = 1;
1237 self->ok = 0;
1238 return buffer;
1239}
1240
Antoine Pitrou24f36292009-03-28 22:16:42 +00001241/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001242 underlying buffered object, though. */
1243static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001244_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001245{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001246 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001247
1248 if (self->pending_bytes == NULL)
1249 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001250
1251 pending = self->pending_bytes;
1252 Py_INCREF(pending);
1253 self->pending_bytes_count = 0;
1254 Py_CLEAR(self->pending_bytes);
1255
1256 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1257 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001258 if (b == NULL)
1259 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001260 ret = NULL;
1261 do {
1262 ret = PyObject_CallMethodObjArgs(self->buffer,
1263 _PyIO_str_write, b, NULL);
1264 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001265 Py_DECREF(b);
1266 if (ret == NULL)
1267 return -1;
1268 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001269 return 0;
1270}
1271
1272static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001273textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001274{
1275 PyObject *ret;
1276 PyObject *text; /* owned reference */
1277 PyObject *b;
1278 Py_ssize_t textlen;
1279 int haslf = 0;
1280 int needflush = 0;
1281
1282 CHECK_INITIALIZED(self);
1283
1284 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1285 return NULL;
1286 }
1287
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 if (PyUnicode_READY(text) == -1)
1289 return NULL;
1290
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001291 CHECK_CLOSED(self);
1292
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001293 if (self->encoder == NULL)
1294 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001295
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001296 Py_INCREF(text);
1297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001298 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001299
1300 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001301 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001302 haslf = 1;
1303
1304 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001305 PyObject *newtext = _PyObject_CallMethodId(
1306 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001307 Py_DECREF(text);
1308 if (newtext == NULL)
1309 return NULL;
1310 text = newtext;
1311 }
1312
Antoine Pitroue96ec682011-07-23 21:46:35 +02001313 if (self->write_through)
1314 needflush = 1;
1315 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001316 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001317 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001318 needflush = 1;
1319
1320 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001321 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001322 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001323 self->encoding_start_of_stream = 0;
1324 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001325 else
1326 b = PyObject_CallMethodObjArgs(self->encoder,
1327 _PyIO_str_encode, text, NULL);
1328 Py_DECREF(text);
1329 if (b == NULL)
1330 return NULL;
1331
1332 if (self->pending_bytes == NULL) {
1333 self->pending_bytes = PyList_New(0);
1334 if (self->pending_bytes == NULL) {
1335 Py_DECREF(b);
1336 return NULL;
1337 }
1338 self->pending_bytes_count = 0;
1339 }
1340 if (PyList_Append(self->pending_bytes, b) < 0) {
1341 Py_DECREF(b);
1342 return NULL;
1343 }
1344 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1345 Py_DECREF(b);
1346 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001347 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001348 return NULL;
1349 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001350
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001351 if (needflush) {
1352 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1353 if (ret == NULL)
1354 return NULL;
1355 Py_DECREF(ret);
1356 }
1357
1358 Py_CLEAR(self->snapshot);
1359
1360 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001361 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001362 if (ret == NULL)
1363 return NULL;
1364 Py_DECREF(ret);
1365 }
1366
1367 return PyLong_FromSsize_t(textlen);
1368}
1369
1370/* Steal a reference to chars and store it in the decoded_char buffer;
1371 */
1372static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001373textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001374{
1375 Py_CLEAR(self->decoded_chars);
1376 self->decoded_chars = chars;
1377 self->decoded_chars_used = 0;
1378}
1379
1380static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001381textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001382{
1383 PyObject *chars;
1384 Py_ssize_t avail;
1385
1386 if (self->decoded_chars == NULL)
1387 return PyUnicode_FromStringAndSize(NULL, 0);
1388
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001389 /* decoded_chars is guaranteed to be "ready". */
1390 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001391 - self->decoded_chars_used);
1392
1393 assert(avail >= 0);
1394
1395 if (n < 0 || n > avail)
1396 n = avail;
1397
1398 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001399 chars = PyUnicode_Substring(self->decoded_chars,
1400 self->decoded_chars_used,
1401 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001402 if (chars == NULL)
1403 return NULL;
1404 }
1405 else {
1406 chars = self->decoded_chars;
1407 Py_INCREF(chars);
1408 }
1409
1410 self->decoded_chars_used += n;
1411 return chars;
1412}
1413
1414/* Read and decode the next chunk of data from the BufferedReader.
1415 */
1416static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001417textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001418{
1419 PyObject *dec_buffer = NULL;
1420 PyObject *dec_flags = NULL;
1421 PyObject *input_chunk = NULL;
1422 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001423 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 int eof;
1425
1426 /* The return value is True unless EOF was reached. The decoded string is
1427 * placed in self._decoded_chars (replacing its previous value). The
1428 * entire input chunk is sent to the decoder, though some of it may remain
1429 * buffered in the decoder, yet to be converted.
1430 */
1431
1432 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001433 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001434 return -1;
1435 }
1436
1437 if (self->telling) {
1438 /* To prepare for tell(), we need to snapshot a point in the file
1439 * where the decoder's input buffer is empty.
1440 */
1441
1442 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1443 _PyIO_str_getstate, NULL);
1444 if (state == NULL)
1445 return -1;
1446 /* Given this, we know there was a valid snapshot point
1447 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1448 */
1449 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1450 Py_DECREF(state);
1451 return -1;
1452 }
1453 Py_INCREF(dec_buffer);
1454 Py_INCREF(dec_flags);
1455 Py_DECREF(state);
1456 }
1457
1458 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001459 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001460 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001461 }
1462 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001463 if (chunk_size == NULL)
1464 goto fail;
1465 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001466 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1467 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001468 Py_DECREF(chunk_size);
1469 if (input_chunk == NULL)
1470 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001471 if (!PyBytes_Check(input_chunk)) {
1472 PyErr_Format(PyExc_TypeError,
1473 "underlying %s() should have returned a bytes object, "
1474 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1475 Py_TYPE(input_chunk)->tp_name);
1476 goto fail;
1477 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001478
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001479 nbytes = PyBytes_Size(input_chunk);
1480 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001481
1482 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1483 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1484 self->decoder, input_chunk, eof);
1485 }
1486 else {
1487 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1488 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1489 }
1490
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001491 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001492 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001493 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001494 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001495 if (nchars > 0)
1496 self->b2cratio = (double) nbytes / nchars;
1497 else
1498 self->b2cratio = 0.0;
1499 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001500 eof = 0;
1501
1502 if (self->telling) {
1503 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1504 * next input to be decoded is dec_buffer + input_chunk.
1505 */
1506 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1507 if (next_input == NULL)
1508 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001509 if (!PyBytes_Check(next_input)) {
1510 PyErr_Format(PyExc_TypeError,
1511 "decoder getstate() should have returned a bytes "
1512 "object, not '%.200s'",
1513 Py_TYPE(next_input)->tp_name);
1514 Py_DECREF(next_input);
1515 goto fail;
1516 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001517 Py_DECREF(dec_buffer);
1518 Py_CLEAR(self->snapshot);
1519 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1520 }
1521 Py_DECREF(input_chunk);
1522
1523 return (eof == 0);
1524
1525 fail:
1526 Py_XDECREF(dec_buffer);
1527 Py_XDECREF(dec_flags);
1528 Py_XDECREF(input_chunk);
1529 return -1;
1530}
1531
1532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001533textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534{
1535 Py_ssize_t n = -1;
1536 PyObject *result = NULL, *chunks = NULL;
1537
1538 CHECK_INITIALIZED(self);
1539
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001540 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 return NULL;
1542
1543 CHECK_CLOSED(self);
1544
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001545 if (self->decoder == NULL)
1546 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549 return NULL;
1550
1551 if (n < 0) {
1552 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001553 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001554 PyObject *decoded;
1555 if (bytes == NULL)
1556 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001557
1558 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1559 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1560 bytes, 1);
1561 else
1562 decoded = PyObject_CallMethodObjArgs(
1563 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001565 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001566 goto fail;
1567
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001568 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569
1570 if (result == NULL) {
1571 Py_DECREF(decoded);
1572 return NULL;
1573 }
1574
1575 PyUnicode_AppendAndDel(&result, decoded);
1576 if (result == NULL)
1577 goto fail;
1578
1579 Py_CLEAR(self->snapshot);
1580 return result;
1581 }
1582 else {
1583 int res = 1;
1584 Py_ssize_t remaining = n;
1585
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001586 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001587 if (result == NULL)
1588 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001589 if (PyUnicode_READY(result) == -1)
1590 goto fail;
1591 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001592
1593 /* Keep reading chunks until we have n characters to return */
1594 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001595 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001596 if (res < 0) {
1597 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1598 when EINTR occurs so we needn't do it ourselves. */
1599 if (_PyIO_trap_eintr()) {
1600 continue;
1601 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001602 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001603 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001604 if (res == 0) /* EOF */
1605 break;
1606 if (chunks == NULL) {
1607 chunks = PyList_New(0);
1608 if (chunks == NULL)
1609 goto fail;
1610 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001611 if (PyUnicode_GET_LENGTH(result) > 0 &&
1612 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001613 goto fail;
1614 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001615 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 if (result == NULL)
1617 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619 }
1620 if (chunks != NULL) {
1621 if (result != NULL && PyList_Append(chunks, result) < 0)
1622 goto fail;
1623 Py_CLEAR(result);
1624 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1625 if (result == NULL)
1626 goto fail;
1627 Py_CLEAR(chunks);
1628 }
1629 return result;
1630 }
1631 fail:
1632 Py_XDECREF(result);
1633 Py_XDECREF(chunks);
1634 return NULL;
1635}
1636
1637
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001638/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001639 that is to the NUL character. Otherwise the function will produce
1640 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001641static char *
1642find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001644 if (kind == PyUnicode_1BYTE_KIND) {
1645 assert(ch < 256);
1646 return (char *) memchr((void *) s, (char) ch, end - s);
1647 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001649 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001650 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001651 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001652 return s;
1653 if (s == end)
1654 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001655 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001656 }
1657}
1658
1659Py_ssize_t
1660_PyIO_find_line_ending(
1661 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001664 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001665
1666 if (translated) {
1667 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001669 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001670 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001671 else {
1672 *consumed = len;
1673 return -1;
1674 }
1675 }
1676 else if (universal) {
1677 /* Universal newline search. Find any of \r, \r\n, \n
1678 * The decoder ensures that \r\n are not split in two pieces
1679 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001680 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001681 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001683 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001684 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001685 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001686 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001687 if (s >= end) {
1688 *consumed = len;
1689 return -1;
1690 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001691 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001694 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001695 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001697 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001698 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001699 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 }
1701 }
1702 }
1703 else {
1704 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001705 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1706 char *nl = PyUnicode_DATA(readnl);
1707 /* Assume that readnl is an ASCII character. */
1708 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001710 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001712 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 *consumed = len;
1714 return -1;
1715 }
1716 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001717 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001718 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001719 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 if (e < s)
1721 e = s;
1722 while (s < e) {
1723 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001724 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001725 if (pos == NULL || pos >= e)
1726 break;
1727 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001728 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001729 break;
1730 }
1731 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001732 return (pos - start)/kind + readnl_len;
1733 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001734 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001735 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001736 if (pos == NULL)
1737 *consumed = len;
1738 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001739 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 return -1;
1741 }
1742 }
1743}
1744
1745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001746_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001747{
1748 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1749 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1750 int res;
1751
1752 CHECK_CLOSED(self);
1753
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001754 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755 return NULL;
1756
1757 chunked = 0;
1758
1759 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001760 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001762 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001763 Py_ssize_t consumed = 0;
1764
1765 /* First, get some data if necessary */
1766 res = 1;
1767 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001768 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001769 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001770 if (res < 0) {
1771 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1772 when EINTR occurs so we needn't do it ourselves. */
1773 if (_PyIO_trap_eintr()) {
1774 continue;
1775 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001776 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001777 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001778 if (res == 0)
1779 break;
1780 }
1781 if (res == 0) {
1782 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001783 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001784 Py_CLEAR(self->snapshot);
1785 start = endpos = offset_to_buffer = 0;
1786 break;
1787 }
1788
1789 if (remaining == NULL) {
1790 line = self->decoded_chars;
1791 start = self->decoded_chars_used;
1792 offset_to_buffer = 0;
1793 Py_INCREF(line);
1794 }
1795 else {
1796 assert(self->decoded_chars_used == 0);
1797 line = PyUnicode_Concat(remaining, self->decoded_chars);
1798 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001799 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001800 Py_CLEAR(remaining);
1801 if (line == NULL)
1802 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001803 if (PyUnicode_READY(line) == -1)
1804 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001805 }
1806
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001807 ptr = PyUnicode_DATA(line);
1808 line_len = PyUnicode_GET_LENGTH(line);
1809 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001810
1811 endpos = _PyIO_find_line_ending(
1812 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001813 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001814 ptr + kind * start,
1815 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001816 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001817 if (endpos >= 0) {
1818 endpos += start;
1819 if (limit >= 0 && (endpos - start) + chunked >= limit)
1820 endpos = start + limit - chunked;
1821 break;
1822 }
1823
1824 /* We can put aside up to `endpos` */
1825 endpos = consumed + start;
1826 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1827 /* Didn't find line ending, but reached length limit */
1828 endpos = start + limit - chunked;
1829 break;
1830 }
1831
1832 if (endpos > start) {
1833 /* No line ending seen yet - put aside current data */
1834 PyObject *s;
1835 if (chunks == NULL) {
1836 chunks = PyList_New(0);
1837 if (chunks == NULL)
1838 goto error;
1839 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001840 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001841 if (s == NULL)
1842 goto error;
1843 if (PyList_Append(chunks, s) < 0) {
1844 Py_DECREF(s);
1845 goto error;
1846 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001847 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 Py_DECREF(s);
1849 }
1850 /* There may be some remaining bytes we'll have to prepend to the
1851 next chunk of data */
1852 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001853 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001854 if (remaining == NULL)
1855 goto error;
1856 }
1857 Py_CLEAR(line);
1858 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001859 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001860 }
1861
1862 if (line != NULL) {
1863 /* Our line ends in the current buffer */
1864 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001865 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1866 PyObject *s = PyUnicode_Substring(line, start, endpos);
1867 Py_CLEAR(line);
1868 if (s == NULL)
1869 goto error;
1870 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001871 }
1872 }
1873 if (remaining != NULL) {
1874 if (chunks == NULL) {
1875 chunks = PyList_New(0);
1876 if (chunks == NULL)
1877 goto error;
1878 }
1879 if (PyList_Append(chunks, remaining) < 0)
1880 goto error;
1881 Py_CLEAR(remaining);
1882 }
1883 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001884 if (line != NULL) {
1885 if (PyList_Append(chunks, line) < 0)
1886 goto error;
1887 Py_DECREF(line);
1888 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001889 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1890 if (line == NULL)
1891 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001892 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001893 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001894 if (line == NULL) {
1895 Py_INCREF(_PyIO_empty_str);
1896 line = _PyIO_empty_str;
1897 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898
1899 return line;
1900
1901 error:
1902 Py_XDECREF(chunks);
1903 Py_XDECREF(remaining);
1904 Py_XDECREF(line);
1905 return NULL;
1906}
1907
1908static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001909textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001910{
1911 Py_ssize_t limit = -1;
1912
1913 CHECK_INITIALIZED(self);
1914 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1915 return NULL;
1916 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001917 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001918}
1919
1920/* Seek and Tell */
1921
1922typedef struct {
1923 Py_off_t start_pos;
1924 int dec_flags;
1925 int bytes_to_feed;
1926 int chars_to_skip;
1927 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001928} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001929
1930/*
1931 To speed up cookie packing/unpacking, we store the fields in a temporary
1932 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1933 The following macros define at which offsets in the intermediary byte
1934 string the various CookieStruct fields will be stored.
1935 */
1936
1937#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1938
Christian Heimes743e0cd2012-10-17 23:52:17 +02001939#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001940/* We want the least significant byte of start_pos to also be the least
1941 significant byte of the cookie, which means that in big-endian mode we
1942 must copy the fields in reverse order. */
1943
1944# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1945# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1946# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1947# define OFF_CHARS_TO_SKIP (sizeof(char))
1948# define OFF_NEED_EOF 0
1949
1950#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001951/* Little-endian mode: the least significant byte of start_pos will
1952 naturally end up the least significant byte of the cookie. */
1953
1954# define OFF_START_POS 0
1955# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1956# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1957# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1958# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1959
1960#endif
1961
1962static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001963textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001964{
1965 unsigned char buffer[COOKIE_BUF_LEN];
1966 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1967 if (cookieLong == NULL)
1968 return -1;
1969
1970 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001971 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001972 Py_DECREF(cookieLong);
1973 return -1;
1974 }
1975 Py_DECREF(cookieLong);
1976
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001977 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1978 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1979 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1980 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1981 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982
1983 return 0;
1984}
1985
1986static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001987textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001988{
1989 unsigned char buffer[COOKIE_BUF_LEN];
1990
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001991 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1992 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1993 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1994 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1995 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001996
Christian Heimes743e0cd2012-10-17 23:52:17 +02001997 return _PyLong_FromByteArray(buffer, sizeof(buffer),
1998 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000
2001static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002002_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003{
2004 PyObject *res;
2005 /* When seeking to the start of the stream, we call decoder.reset()
2006 rather than decoder.getstate().
2007 This is for a few decoders such as utf-16 for which the state value
2008 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2009 utf-16, that we are expecting a BOM).
2010 */
2011 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2012 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2013 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002014 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2015 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002016 if (res == NULL)
2017 return -1;
2018 Py_DECREF(res);
2019 return 0;
2020}
2021
Antoine Pitroue4501852009-05-14 18:55:55 +00002022static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002023_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002024{
2025 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002026 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002027 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2028 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2029 self->encoding_start_of_stream = 1;
2030 }
2031 else {
2032 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2033 _PyIO_zero, NULL);
2034 self->encoding_start_of_stream = 0;
2035 }
2036 if (res == NULL)
2037 return -1;
2038 Py_DECREF(res);
2039 return 0;
2040}
2041
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002042static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002043textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044{
2045 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002046 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048 PyObject *res;
2049 int cmp;
2050
2051 CHECK_INITIALIZED(self);
2052
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2054 return NULL;
2055 CHECK_CLOSED(self);
2056
2057 Py_INCREF(cookieObj);
2058
2059 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002060 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002061 goto fail;
2062 }
2063
2064 if (whence == 1) {
2065 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002066 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002067 if (cmp < 0)
2068 goto fail;
2069
2070 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002071 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 goto fail;
2073 }
2074
2075 /* Seeking to the current position should attempt to
2076 * sync the underlying buffer with the current position.
2077 */
2078 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002079 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 if (cookieObj == NULL)
2081 goto fail;
2082 }
2083 else if (whence == 2) {
2084 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002085 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (cmp < 0)
2087 goto fail;
2088
2089 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002090 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002091 goto fail;
2092 }
2093
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002094 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 if (res == NULL)
2096 goto fail;
2097 Py_DECREF(res);
2098
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002099 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002100 Py_CLEAR(self->snapshot);
2101 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002102 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002103 if (res == NULL)
2104 goto fail;
2105 Py_DECREF(res);
2106 }
2107
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002108 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002109 Py_XDECREF(cookieObj);
2110 return res;
2111 }
2112 else if (whence != 0) {
2113 PyErr_Format(PyExc_ValueError,
2114 "invalid whence (%d, should be 0, 1 or 2)", whence);
2115 goto fail;
2116 }
2117
Antoine Pitroue4501852009-05-14 18:55:55 +00002118 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002119 if (cmp < 0)
2120 goto fail;
2121
2122 if (cmp == 1) {
2123 PyErr_Format(PyExc_ValueError,
2124 "negative seek position %R", cookieObj);
2125 goto fail;
2126 }
2127
2128 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2129 if (res == NULL)
2130 goto fail;
2131 Py_DECREF(res);
2132
2133 /* The strategy of seek() is to go back to the safe start point
2134 * and replay the effect of read(chars_to_skip) from there.
2135 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002136 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002137 goto fail;
2138
2139 /* Seek back to the safe start point. */
2140 posobj = PyLong_FromOff_t(cookie.start_pos);
2141 if (posobj == NULL)
2142 goto fail;
2143 res = PyObject_CallMethodObjArgs(self->buffer,
2144 _PyIO_str_seek, posobj, NULL);
2145 Py_DECREF(posobj);
2146 if (res == NULL)
2147 goto fail;
2148 Py_DECREF(res);
2149
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002150 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002151 Py_CLEAR(self->snapshot);
2152
2153 /* Restore the decoder to its state from the safe start point. */
2154 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002155 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002156 goto fail;
2157 }
2158
2159 if (cookie.chars_to_skip) {
2160 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002161 PyObject *input_chunk = _PyObject_CallMethodId(
2162 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163 PyObject *decoded;
2164
2165 if (input_chunk == NULL)
2166 goto fail;
2167
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002168 if (!PyBytes_Check(input_chunk)) {
2169 PyErr_Format(PyExc_TypeError,
2170 "underlying read() should have returned a bytes "
2171 "object, not '%.200s'",
2172 Py_TYPE(input_chunk)->tp_name);
2173 Py_DECREF(input_chunk);
2174 goto fail;
2175 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176
2177 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2178 if (self->snapshot == NULL) {
2179 Py_DECREF(input_chunk);
2180 goto fail;
2181 }
2182
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002183 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2184 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002185
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002186 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002187 goto fail;
2188
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002189 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002190
2191 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002192 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002193 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2194 goto fail;
2195 }
2196 self->decoded_chars_used = cookie.chars_to_skip;
2197 }
2198 else {
2199 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2200 if (self->snapshot == NULL)
2201 goto fail;
2202 }
2203
Antoine Pitroue4501852009-05-14 18:55:55 +00002204 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2205 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002206 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002207 goto fail;
2208 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002209 return cookieObj;
2210 fail:
2211 Py_XDECREF(cookieObj);
2212 return NULL;
2213
2214}
2215
2216static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002217textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002218{
2219 PyObject *res;
2220 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002221 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002222 PyObject *next_input;
2223 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002224 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 PyObject *saved_state = NULL;
2226 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002227 char *dec_buffer;
2228 Py_ssize_t dec_buffer_len;
2229 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230
2231 CHECK_INITIALIZED(self);
2232 CHECK_CLOSED(self);
2233
2234 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002235 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002236 goto fail;
2237 }
2238 if (!self->telling) {
2239 PyErr_SetString(PyExc_IOError,
2240 "telling position disabled by next() call");
2241 goto fail;
2242 }
2243
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002244 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002245 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002246 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002247 if (res == NULL)
2248 goto fail;
2249 Py_DECREF(res);
2250
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002251 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 if (posobj == NULL)
2253 goto fail;
2254
2255 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002256 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002257 return posobj;
2258 }
2259
2260#if defined(HAVE_LARGEFILE_SUPPORT)
2261 cookie.start_pos = PyLong_AsLongLong(posobj);
2262#else
2263 cookie.start_pos = PyLong_AsLong(posobj);
2264#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002265 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002266 if (PyErr_Occurred())
2267 goto fail;
2268
2269 /* Skip backward to the snapshot point (see _read_chunk). */
2270 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2271 goto fail;
2272
2273 assert (PyBytes_Check(next_input));
2274
2275 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2276
2277 /* How many decoded characters have been used up since the snapshot? */
2278 if (self->decoded_chars_used == 0) {
2279 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002280 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002281 }
2282
2283 chars_to_skip = self->decoded_chars_used;
2284
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002285 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002286 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2287 _PyIO_str_getstate, NULL);
2288 if (saved_state == NULL)
2289 goto fail;
2290
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002291#define DECODER_GETSTATE() do { \
2292 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2293 _PyIO_str_getstate, NULL); \
2294 if (_state == NULL) \
2295 goto fail; \
2296 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2297 Py_DECREF(_state); \
2298 goto fail; \
2299 } \
2300 Py_DECREF(_state); \
2301 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002302
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002303#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002304 PyObject *_decoded = _PyObject_CallMethodId( \
2305 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002306 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002307 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002308 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002309 Py_DECREF(_decoded); \
2310 } while (0)
2311
2312 /* Fast search for an acceptable start point, close to our
2313 current pos */
2314 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2315 skip_back = 1;
2316 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2317 input = PyBytes_AS_STRING(next_input);
2318 while (skip_bytes > 0) {
2319 /* Decode up to temptative start point */
2320 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2321 goto fail;
2322 DECODER_DECODE(input, skip_bytes, chars_decoded);
2323 if (chars_decoded <= chars_to_skip) {
2324 DECODER_GETSTATE();
2325 if (dec_buffer_len == 0) {
2326 /* Before pos and no bytes buffered in decoder => OK */
2327 cookie.dec_flags = dec_flags;
2328 chars_to_skip -= chars_decoded;
2329 break;
2330 }
2331 /* Skip back by buffered amount and reset heuristic */
2332 skip_bytes -= dec_buffer_len;
2333 skip_back = 1;
2334 }
2335 else {
2336 /* We're too far ahead, skip back a bit */
2337 skip_bytes -= skip_back;
2338 skip_back *= 2;
2339 }
2340 }
2341 if (skip_bytes <= 0) {
2342 skip_bytes = 0;
2343 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2344 goto fail;
2345 }
2346
2347 /* Note our initial start point. */
2348 cookie.start_pos += skip_bytes;
2349 cookie.chars_to_skip = chars_to_skip;
2350 if (chars_to_skip == 0)
2351 goto finally;
2352
2353 /* We should be close to the desired position. Now feed the decoder one
2354 * byte at a time until we reach the `chars_to_skip` target.
2355 * As we go, note the nearest "safe start point" before the current
2356 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002357 * can safely start from there and advance to this location).
2358 */
2359 chars_decoded = 0;
2360 input = PyBytes_AS_STRING(next_input);
2361 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002362 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002363 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002364 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002365
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002366 DECODER_DECODE(input, 1, n);
2367 /* We got n chars for 1 byte */
2368 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002369 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002370 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002371
2372 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2373 /* Decoder buffer is empty, so this is a safe start point. */
2374 cookie.start_pos += cookie.bytes_to_feed;
2375 chars_to_skip -= chars_decoded;
2376 cookie.dec_flags = dec_flags;
2377 cookie.bytes_to_feed = 0;
2378 chars_decoded = 0;
2379 }
2380 if (chars_decoded >= chars_to_skip)
2381 break;
2382 input++;
2383 }
2384 if (input == input_end) {
2385 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002386 PyObject *decoded = _PyObject_CallMethodId(
2387 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002388 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002390 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002391 Py_DECREF(decoded);
2392 cookie.need_eof = 1;
2393
2394 if (chars_decoded < chars_to_skip) {
2395 PyErr_SetString(PyExc_IOError,
2396 "can't reconstruct logical file position");
2397 goto fail;
2398 }
2399 }
2400
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002401finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002402 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002403 Py_DECREF(saved_state);
2404 if (res == NULL)
2405 return NULL;
2406 Py_DECREF(res);
2407
2408 /* The returned cookie corresponds to the last safe start point. */
2409 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002410 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002411
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002412fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413 if (saved_state) {
2414 PyObject *type, *value, *traceback;
2415 PyErr_Fetch(&type, &value, &traceback);
2416
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002417 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418 Py_DECREF(saved_state);
2419 if (res == NULL)
2420 return NULL;
2421 Py_DECREF(res);
2422
2423 PyErr_Restore(type, value, traceback);
2424 }
2425 return NULL;
2426}
2427
2428static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002429textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002430{
2431 PyObject *pos = Py_None;
2432 PyObject *res;
2433
2434 CHECK_INITIALIZED(self)
2435 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2436 return NULL;
2437 }
2438
2439 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2440 if (res == NULL)
2441 return NULL;
2442 Py_DECREF(res);
2443
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002444 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002445}
2446
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002447static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002448textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002449{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002450 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002451
2452 CHECK_INITIALIZED(self);
2453
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002454 res = PyUnicode_FromString("<_io.TextIOWrapper");
2455 if (res == NULL)
2456 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002457 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002458 if (nameobj == NULL) {
2459 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2460 PyErr_Clear();
2461 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002462 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002463 }
2464 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002465 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002466 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002467 if (s == NULL)
2468 goto error;
2469 PyUnicode_AppendAndDel(&res, s);
2470 if (res == NULL)
2471 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002472 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002473 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002474 if (modeobj == NULL) {
2475 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2476 PyErr_Clear();
2477 else
2478 goto error;
2479 }
2480 else {
2481 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2482 Py_DECREF(modeobj);
2483 if (s == NULL)
2484 goto error;
2485 PyUnicode_AppendAndDel(&res, s);
2486 if (res == NULL)
2487 return NULL;
2488 }
2489 s = PyUnicode_FromFormat("%U encoding=%R>",
2490 res, self->encoding);
2491 Py_DECREF(res);
2492 return s;
2493error:
2494 Py_XDECREF(res);
2495 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002496}
2497
2498
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499/* Inquiries */
2500
2501static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002502textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503{
2504 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002505 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506}
2507
2508static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002509textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510{
2511 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002512 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513}
2514
2515static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002516textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517{
2518 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002519 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520}
2521
2522static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002523textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524{
2525 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002526 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527}
2528
2529static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002530textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531{
2532 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002533 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534}
2535
2536static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002537textiowrapper_getstate(textio *self, PyObject *args)
2538{
2539 PyErr_Format(PyExc_TypeError,
2540 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2541 return NULL;
2542}
2543
2544static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002545textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546{
2547 CHECK_INITIALIZED(self);
2548 CHECK_CLOSED(self);
2549 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002550 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002551 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002552 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553}
2554
2555static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002556textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002557{
2558 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002559 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561
Antoine Pitrou6be88762010-05-03 16:48:20 +00002562 res = textiowrapper_closed_get(self, NULL);
2563 if (res == NULL)
2564 return NULL;
2565 r = PyObject_IsTrue(res);
2566 Py_DECREF(res);
2567 if (r < 0)
2568 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002569
Antoine Pitrou6be88762010-05-03 16:48:20 +00002570 if (r > 0) {
2571 Py_RETURN_NONE; /* stream already closed */
2572 }
2573 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002574 PyObject *exc = NULL, *val, *tb;
Antoine Pitroue033e062010-10-29 10:38:18 +00002575 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002576 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002577 if (res)
2578 Py_DECREF(res);
2579 else
2580 PyErr_Clear();
2581 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002582 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002583 if (res == NULL)
2584 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002585 else
2586 Py_DECREF(res);
2587
Benjamin Peterson68623612012-12-20 11:53:11 -06002588 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2589 if (exc != NULL) {
2590 if (res != NULL) {
2591 Py_CLEAR(res);
2592 PyErr_Restore(exc, val, tb);
2593 }
2594 else {
2595 PyObject *val2;
2596 Py_DECREF(exc);
2597 Py_XDECREF(tb);
2598 PyErr_Fetch(&exc, &val2, &tb);
2599 PyErr_NormalizeException(&exc, &val2, &tb);
2600 PyException_SetContext(val2, val);
2601 PyErr_Restore(exc, val2, tb);
2602 }
2603 }
2604 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002605 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002606}
2607
2608static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002609textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002610{
2611 PyObject *line;
2612
2613 CHECK_INITIALIZED(self);
2614
2615 self->telling = 0;
2616 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2617 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002618 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619 }
2620 else {
2621 line = PyObject_CallMethodObjArgs((PyObject *)self,
2622 _PyIO_str_readline, NULL);
2623 if (line && !PyUnicode_Check(line)) {
2624 PyErr_Format(PyExc_IOError,
2625 "readline() should have returned an str object, "
2626 "not '%.200s'", Py_TYPE(line)->tp_name);
2627 Py_DECREF(line);
2628 return NULL;
2629 }
2630 }
2631
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002632 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002633 return NULL;
2634
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002635 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002636 /* Reached EOF or would have blocked */
2637 Py_DECREF(line);
2638 Py_CLEAR(self->snapshot);
2639 self->telling = self->seekable;
2640 return NULL;
2641 }
2642
2643 return line;
2644}
2645
2646static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002647textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648{
2649 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002650 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002651}
2652
2653static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002654textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002655{
2656 CHECK_INITIALIZED(self);
2657 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2658}
2659
2660static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662{
2663 PyObject *res;
2664 CHECK_INITIALIZED(self);
2665 if (self->decoder == NULL)
2666 Py_RETURN_NONE;
2667 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2668 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002669 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2670 PyErr_Clear();
2671 Py_RETURN_NONE;
2672 }
2673 else {
2674 return NULL;
2675 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002676 }
2677 return res;
2678}
2679
2680static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002681textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002682{
2683 CHECK_INITIALIZED(self);
2684 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2685}
2686
2687static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002688textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002689{
2690 CHECK_INITIALIZED(self);
2691 return PyLong_FromSsize_t(self->chunk_size);
2692}
2693
2694static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002695textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696{
2697 Py_ssize_t n;
2698 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002699 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002700 if (n == -1 && PyErr_Occurred())
2701 return -1;
2702 if (n <= 0) {
2703 PyErr_SetString(PyExc_ValueError,
2704 "a strictly positive integer is required");
2705 return -1;
2706 }
2707 self->chunk_size = n;
2708 return 0;
2709}
2710
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002711static PyMethodDef textiowrapper_methods[] = {
2712 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2713 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2714 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2715 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2716 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2717 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002719 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2720 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2721 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2722 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2723 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002724 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002726 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2727 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2728 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002729 {NULL, NULL}
2730};
2731
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002732static PyMemberDef textiowrapper_members[] = {
2733 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2734 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2735 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 {NULL}
2737};
2738
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002739static PyGetSetDef textiowrapper_getset[] = {
2740 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2741 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002742/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2743*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002744 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2745 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2746 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2747 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002748 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749};
2750
2751PyTypeObject PyTextIOWrapper_Type = {
2752 PyVarObject_HEAD_INIT(NULL, 0)
2753 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002754 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002755 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002756 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002757 0, /*tp_print*/
2758 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002759 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002760 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002761 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762 0, /*tp_as_number*/
2763 0, /*tp_as_sequence*/
2764 0, /*tp_as_mapping*/
2765 0, /*tp_hash */
2766 0, /*tp_call*/
2767 0, /*tp_str*/
2768 0, /*tp_getattro*/
2769 0, /*tp_setattro*/
2770 0, /*tp_as_buffer*/
2771 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2772 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002773 textiowrapper_doc, /* tp_doc */
2774 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2775 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002776 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002777 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002778 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002779 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2780 textiowrapper_methods, /* tp_methods */
2781 textiowrapper_members, /* tp_members */
2782 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002783 0, /* tp_base */
2784 0, /* tp_dict */
2785 0, /* tp_descr_get */
2786 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002787 offsetof(textio, dict), /*tp_dictoffset*/
2788 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002789 0, /* tp_alloc */
2790 PyType_GenericNew, /* tp_new */
2791};