blob: 4d0009da5b4d0f26a752db73a7676bf0d471c83e [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200260static int
261check_decoded(PyObject *decoded)
262{
263 if (decoded == NULL)
264 return -1;
265 if (!PyUnicode_Check(decoded)) {
266 PyErr_Format(PyExc_TypeError,
267 "decoder should return a string result, not '%.200s'",
268 Py_TYPE(decoded)->tp_name);
269 Py_DECREF(decoded);
270 return -1;
271 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200272 if (PyUnicode_READY(decoded) < 0) {
273 Py_DECREF(decoded);
274 return -1;
275 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200276 return 0;
277}
278
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279#define SEEN_CR 1
280#define SEEN_LF 2
281#define SEEN_CRLF 4
282#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
283
284PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000285_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000286 PyObject *input, int final)
287{
288 PyObject *output;
289 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000290 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 if (self->decoder == NULL) {
293 PyErr_SetString(PyExc_ValueError,
294 "IncrementalNewlineDecoder.__init__ not called");
295 return NULL;
296 }
297
298 /* decode input (with the eventual \r from a previous pass) */
299 if (self->decoder != Py_None) {
300 output = PyObject_CallMethodObjArgs(self->decoder,
301 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
302 }
303 else {
304 output = input;
305 Py_INCREF(output);
306 }
307
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200308 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 return NULL;
310
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200311 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 /* Prefix output with CR */
314 int kind;
315 PyObject *modified;
316 char *out;
317
318 modified = PyUnicode_New(output_len + 1,
319 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 if (modified == NULL)
321 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 kind = PyUnicode_KIND(modified);
323 out = PyUnicode_DATA(modified);
324 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200325 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 self->pendingcr = 0;
329 output_len++;
330 }
331
332 /* retain last \r even when not translating data:
333 * then readline() is sure to get \r\n in one pass
334 */
335 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000336 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
338 {
339 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
340 if (modified == NULL)
341 goto error;
342 Py_DECREF(output);
343 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 self->pendingcr = 1;
345 }
346 }
347
348 /* Record which newlines are read and do newline translation if desired,
349 all in one pass. */
350 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200351 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_ssize_t len;
353 int seennl = self->seennl;
354 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 in_str = PyUnicode_DATA(output);
358 len = PyUnicode_GET_LENGTH(output);
359 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
361 if (len == 0)
362 return output;
363
364 /* If, up to now, newlines are consistently \n, do a quick check
365 for the \r *byte* with the libc's optimized memchr.
366 */
367 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200368 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 }
370
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 if (only_lf) {
372 /* If not already seen, quick scan for a possible "\n" character.
373 (there's nothing else to be done, even when in translation mode)
374 */
375 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100377 if (kind == PyUnicode_1BYTE_KIND)
378 seennl |= SEEN_LF;
379 else {
380 Py_ssize_t i = 0;
381 for (;;) {
382 Py_UCS4 c;
383 /* Fast loop for non-control characters */
384 while (PyUnicode_READ(kind, in_str, i) > '\n')
385 i++;
386 c = PyUnicode_READ(kind, in_str, i++);
387 if (c == '\n') {
388 seennl |= SEEN_LF;
389 break;
390 }
391 if (i >= len)
392 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000394 }
395 }
396 /* Finished: we have scanned for newlines, and none of them
397 need translating */
398 }
399 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000401 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 if (seennl == SEEN_ALL)
403 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 while (PyUnicode_READ(kind, in_str, i) > '\r')
408 i++;
409 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 if (c == '\n')
411 seennl |= SEEN_LF;
412 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 }
417 else
418 seennl |= SEEN_CR;
419 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 break;
422 if (seennl == SEEN_ALL)
423 break;
424 }
425 endscan:
426 ;
427 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000428 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 void *translated;
430 int kind = PyUnicode_KIND(output);
431 void *in_str = PyUnicode_DATA(output);
432 Py_ssize_t in, out;
433 /* XXX: Previous in-place translation here is disabled as
434 resizing is not possible anymore */
435 /* We could try to optimize this so that we only do a copy
436 when there is something to translate. On the other hand,
437 we already know there is a \r byte, so chances are high
438 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200439 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (translated == NULL) {
441 PyErr_NoMemory();
442 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
449 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452 seennl |= SEEN_LF;
453 continue;
454 }
455 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 in++;
458 seennl |= SEEN_CRLF;
459 }
460 else
461 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 continue;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 Py_DECREF(output);
470 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100471 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200473 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 }
475 self->seennl |= seennl;
476 }
477
478 return output;
479
480 error:
481 Py_DECREF(output);
482 return NULL;
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 PyObject *args, PyObject *kwds)
488{
489 char *kwlist[] = {"input", "final", NULL};
490 PyObject *input;
491 int final = 0;
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
494 kwlist, &input, &final))
495 return NULL;
496 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
497}
498
499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000500incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501{
502 PyObject *buffer;
503 unsigned PY_LONG_LONG flag;
504
505 if (self->decoder != Py_None) {
506 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
507 _PyIO_str_getstate, NULL);
508 if (state == NULL)
509 return NULL;
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
511 Py_DECREF(state);
512 return NULL;
513 }
514 Py_INCREF(buffer);
515 Py_DECREF(state);
516 }
517 else {
518 buffer = PyBytes_FromString("");
519 flag = 0;
520 }
521 flag <<= 1;
522 if (self->pendingcr)
523 flag |= 1;
524 return Py_BuildValue("NK", buffer, flag);
525}
526
527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000528incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529{
530 PyObject *buffer;
531 unsigned PY_LONG_LONG flag;
532
533 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
534 return NULL;
535
536 self->pendingcr = (int) flag & 1;
537 flag >>= 1;
538
539 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200540 return _PyObject_CallMethodId(self->decoder,
541 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 else
543 Py_RETURN_NONE;
544}
545
546static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000547incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000548{
549 self->seennl = 0;
550 self->pendingcr = 0;
551 if (self->decoder != Py_None)
552 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
553 else
554 Py_RETURN_NONE;
555}
556
557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000558incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559{
560 switch (self->seennl) {
561 case SEEN_CR:
562 return PyUnicode_FromString("\r");
563 case SEEN_LF:
564 return PyUnicode_FromString("\n");
565 case SEEN_CRLF:
566 return PyUnicode_FromString("\r\n");
567 case SEEN_CR | SEEN_LF:
568 return Py_BuildValue("ss", "\r", "\n");
569 case SEEN_CR | SEEN_CRLF:
570 return Py_BuildValue("ss", "\r", "\r\n");
571 case SEEN_LF | SEEN_CRLF:
572 return Py_BuildValue("ss", "\n", "\r\n");
573 case SEEN_CR | SEEN_LF | SEEN_CRLF:
574 return Py_BuildValue("sss", "\r", "\n", "\r\n");
575 default:
576 Py_RETURN_NONE;
577 }
578
579}
580
581
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582static PyMethodDef incrementalnewlinedecoder_methods[] = {
583 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
584 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
585 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
586 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000587 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588};
589
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590static PyGetSetDef incrementalnewlinedecoder_getset[] = {
591 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000592 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000593};
594
595PyTypeObject PyIncrementalNewlineDecoder_Type = {
596 PyVarObject_HEAD_INIT(NULL, 0)
597 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000598 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /*tp_print*/
602 0, /*tp_getattr*/
603 0, /*tp_setattr*/
604 0, /*tp_compare */
605 0, /*tp_repr*/
606 0, /*tp_as_number*/
607 0, /*tp_as_sequence*/
608 0, /*tp_as_mapping*/
609 0, /*tp_hash */
610 0, /*tp_call*/
611 0, /*tp_str*/
612 0, /*tp_getattro*/
613 0, /*tp_setattro*/
614 0, /*tp_as_buffer*/
615 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_traverse */
618 0, /* tp_clear */
619 0, /* tp_richcompare */
620 0, /*tp_weaklistoffset*/
621 0, /* tp_iter */
622 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000623 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 0, /* tp_base */
627 0, /* tp_dict */
628 0, /* tp_descr_get */
629 0, /* tp_descr_set */
630 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000631 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632 0, /* tp_alloc */
633 PyType_GenericNew, /* tp_new */
634};
635
636
637/* TextIOWrapper */
638
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000639PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640 "Character and line based layer over a BufferedIOBase object, buffer.\n"
641 "\n"
642 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200643 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400645 "errors determines the strictness of encoding and decoding (see\n"
646 "help(codecs.Codec) or the documentation for codecs.register) and\n"
647 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000648 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200649 "newline controls how line endings are handled. It can be None, '',\n"
650 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
651 "\n"
652 "* On input, if newline is None, universal newlines mode is\n"
653 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
654 " these are translated into '\\n' before being returned to the\n"
655 " caller. If it is '', universal newline mode is enabled, but line\n"
656 " endings are returned to the caller untranslated. If it has any of\n"
657 " the other legal values, input lines are only terminated by the given\n"
658 " string, and the line ending is returned to the caller untranslated.\n"
659 "\n"
660 "* On output, if newline is None, any '\\n' characters written are\n"
661 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300662 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200663 " of the other legal values, any '\\n' characters written are translated\n"
664 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665 "\n"
666 "If line_buffering is True, a call to flush is implied when a call to\n"
667 "write contains a newline character."
668 );
669
670typedef PyObject *
671 (*encodefunc_t)(PyObject *, PyObject *);
672
673typedef struct
674{
675 PyObject_HEAD
676 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000677 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 Py_ssize_t chunk_size;
679 PyObject *buffer;
680 PyObject *encoding;
681 PyObject *encoder;
682 PyObject *decoder;
683 PyObject *readnl;
684 PyObject *errors;
685 const char *writenl; /* utf-8 encoded, NULL stands for \n */
686 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200687 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688 char readuniversal;
689 char readtranslate;
690 char writetranslate;
691 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200692 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000694 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000695 /* Specialized encoding func (see below) */
696 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000697 /* Whether or not it's the start of the stream */
698 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
700 /* Reads and writes are internally buffered in order to speed things up.
701 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 Please also note that text to be written is first encoded before being
704 buffered. This is necessary so that encoding errors are immediately
705 reported to the caller, but it unfortunately means that the
706 IncrementalEncoder (whose encode() method is always written in Python)
707 becomes a bottleneck for small writes.
708 */
709 PyObject *decoded_chars; /* buffer for text returned from decoder */
710 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
711 PyObject *pending_bytes; /* list of bytes objects waiting to be
712 written, or NULL */
713 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000714
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 /* snapshot is either None, or a tuple (dec_flags, next_input) where
716 * dec_flags is the second (integer) item of the decoder state and
717 * next_input is the chunk of input bytes that comes next after the
718 * snapshot point. We use this to reconstruct decoder states in tell().
719 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000720 PyObject *snapshot;
721 /* Bytes-to-characters ratio for the current chunk. Serves as input for
722 the heuristic in tell(). */
723 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724
725 /* Cache raw object if it's a FileIO object */
726 PyObject *raw;
727
728 PyObject *weakreflist;
729 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000730} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731
732
733/* A couple of specialized cases in order to bypass the slow incremental
734 encoding methods for the most popular encodings. */
735
736static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000737ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740}
741
742static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100745 return _PyUnicode_EncodeUTF16(text,
746 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747}
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100752 return _PyUnicode_EncodeUTF16(text,
753 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754}
755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758{
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 if (!self->encoding_start_of_stream) {
760 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200761#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000762 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000763#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000764 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000766 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100767 return _PyUnicode_EncodeUTF16(text,
768 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769}
770
Antoine Pitroue4501852009-05-14 18:55:55 +0000771static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000772utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000773{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100774 return _PyUnicode_EncodeUTF32(text,
775 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000780{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100781 return _PyUnicode_EncodeUTF32(text,
782 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000783}
784
785static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000786utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000787{
788 if (!self->encoding_start_of_stream) {
789 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200790#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000791 return utf32be_encode(self, text);
792#else
793 return utf32le_encode(self, text);
794#endif
795 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100796 return _PyUnicode_EncodeUTF32(text,
797 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000798}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799
800static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000801utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200803 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804}
805
806static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000807latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810}
811
812/* Map normalized encoding names onto the specialized encoding funcs */
813
814typedef struct {
815 const char *name;
816 encodefunc_t encodefunc;
817} encodefuncentry;
818
Antoine Pitrou24f36292009-03-28 22:16:42 +0000819static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 {"ascii", (encodefunc_t) ascii_encode},
821 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000822 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 {"utf-16-be", (encodefunc_t) utf16be_encode},
824 {"utf-16-le", (encodefunc_t) utf16le_encode},
825 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000826 {"utf-32-be", (encodefunc_t) utf32be_encode},
827 {"utf-32-le", (encodefunc_t) utf32le_encode},
828 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 {NULL, NULL}
830};
831
832
833static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000834textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835{
836 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200837 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 NULL};
839 PyObject *buffer, *raw;
840 char *encoding = NULL;
841 char *errors = NULL;
842 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200843 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 _PyIO_State *state = IO_STATE;
845
846 PyObject *res;
847 int r;
848
849 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000850 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200851 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000852 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200853 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854 return -1;
855
856 if (newline && newline[0] != '\0'
857 && !(newline[0] == '\n' && newline[1] == '\0')
858 && !(newline[0] == '\r' && newline[1] == '\0')
859 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
860 PyErr_Format(PyExc_ValueError,
861 "illegal newline value: %s", newline);
862 return -1;
863 }
864
865 Py_CLEAR(self->buffer);
866 Py_CLEAR(self->encoding);
867 Py_CLEAR(self->encoder);
868 Py_CLEAR(self->decoder);
869 Py_CLEAR(self->readnl);
870 Py_CLEAR(self->decoded_chars);
871 Py_CLEAR(self->pending_bytes);
872 Py_CLEAR(self->snapshot);
873 Py_CLEAR(self->errors);
874 Py_CLEAR(self->raw);
875 self->decoded_chars_used = 0;
876 self->pending_bytes_count = 0;
877 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000878 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879
880 if (encoding == NULL) {
881 /* Try os.device_encoding(fileno) */
882 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200883 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 /* Ignore only AttributeError and UnsupportedOperation */
885 if (fileno == NULL) {
886 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
887 PyErr_ExceptionMatches(state->unsupported_operation)) {
888 PyErr_Clear();
889 }
890 else {
891 goto error;
892 }
893 }
894 else {
Serhiy Storchaka78980432013-01-15 01:12:17 +0200895 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500896 Py_DECREF(fileno);
897 if (fd == -1 && PyErr_Occurred()) {
898 goto error;
899 }
900
901 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 if (self->encoding == NULL)
903 goto error;
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
908 if (encoding == NULL && self->encoding == NULL) {
909 if (state->locale_module == NULL) {
910 state->locale_module = PyImport_ImportModule("locale");
911 if (state->locale_module == NULL)
912 goto catch_ImportError;
913 else
914 goto use_locale;
915 }
916 else {
917 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200918 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200919 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 if (self->encoding == NULL) {
921 catch_ImportError:
922 /*
923 Importing locale can raise a ImportError because of
924 _functools, and locale.getpreferredencoding can raise a
925 ImportError if _locale is not available. These will happen
926 during module building.
927 */
928 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
929 PyErr_Clear();
930 self->encoding = PyUnicode_FromString("ascii");
931 }
932 else
933 goto error;
934 }
935 else if (!PyUnicode_Check(self->encoding))
936 Py_CLEAR(self->encoding);
937 }
938 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000939 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000941 if (encoding == NULL)
942 goto error;
943 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 else if (encoding != NULL) {
945 self->encoding = PyUnicode_FromString(encoding);
946 if (self->encoding == NULL)
947 goto error;
948 }
949 else {
950 PyErr_SetString(PyExc_IOError,
951 "could not determine default encoding");
952 }
953
954 if (errors == NULL)
955 errors = "strict";
956 self->errors = PyBytes_FromString(errors);
957 if (self->errors == NULL)
958 goto error;
959
960 self->chunk_size = 8192;
961 self->readuniversal = (newline == NULL || newline[0] == '\0');
962 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200963 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 self->readtranslate = (newline == NULL);
965 if (newline) {
966 self->readnl = PyUnicode_FromString(newline);
967 if (self->readnl == NULL)
968 return -1;
969 }
970 self->writetranslate = (newline == NULL || newline[0] != '\0');
971 if (!self->readuniversal && self->readnl) {
972 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000973 if (self->writenl == NULL)
974 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (!strcmp(self->writenl, "\n"))
976 self->writenl = NULL;
977 }
978#ifdef MS_WINDOWS
979 else
980 self->writenl = "\r\n";
981#endif
982
983 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200984 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 if (res == NULL)
986 goto error;
987 r = PyObject_IsTrue(res);
988 Py_DECREF(res);
989 if (r == -1)
990 goto error;
991 if (r == 1) {
992 self->decoder = PyCodec_IncrementalDecoder(
993 encoding, errors);
994 if (self->decoder == NULL)
995 goto error;
996
997 if (self->readuniversal) {
998 PyObject *incrementalDecoder = PyObject_CallFunction(
999 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1000 "Oi", self->decoder, (int)self->readtranslate);
1001 if (incrementalDecoder == NULL)
1002 goto error;
1003 Py_CLEAR(self->decoder);
1004 self->decoder = incrementalDecoder;
1005 }
1006 }
1007
1008 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001009 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 if (res == NULL)
1011 goto error;
1012 r = PyObject_IsTrue(res);
1013 Py_DECREF(res);
1014 if (r == -1)
1015 goto error;
1016 if (r == 1) {
1017 PyObject *ci;
1018 self->encoder = PyCodec_IncrementalEncoder(
1019 encoding, errors);
1020 if (self->encoder == NULL)
1021 goto error;
1022 /* Get the normalized named of the codec */
1023 ci = _PyCodec_Lookup(encoding);
1024 if (ci == NULL)
1025 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001026 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001028 if (res == NULL) {
1029 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1030 PyErr_Clear();
1031 else
1032 goto error;
1033 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 else if (PyUnicode_Check(res)) {
1035 encodefuncentry *e = encodefuncs;
1036 while (e->name != NULL) {
1037 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1038 self->encodefunc = e->encodefunc;
1039 break;
1040 }
1041 e++;
1042 }
1043 }
1044 Py_XDECREF(res);
1045 }
1046
1047 self->buffer = buffer;
1048 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001049
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1051 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1052 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001053 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001055 if (raw == NULL) {
1056 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1057 PyErr_Clear();
1058 else
1059 goto error;
1060 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061 else if (Py_TYPE(raw) == &PyFileIO_Type)
1062 self->raw = raw;
1063 else
1064 Py_DECREF(raw);
1065 }
1066
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001067 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 if (res == NULL)
1069 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001070 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001072 if (r < 0)
1073 goto error;
1074 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075
Martin v. Löwis767046a2011-10-14 15:35:36 +02001076 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001077
Antoine Pitroue4501852009-05-14 18:55:55 +00001078 self->encoding_start_of_stream = 0;
1079 if (self->seekable && self->encoder) {
1080 PyObject *cookieObj;
1081 int cmp;
1082
1083 self->encoding_start_of_stream = 1;
1084
1085 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1086 if (cookieObj == NULL)
1087 goto error;
1088
1089 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1090 Py_DECREF(cookieObj);
1091 if (cmp < 0) {
1092 goto error;
1093 }
1094
1095 if (cmp == 0) {
1096 self->encoding_start_of_stream = 0;
1097 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1098 _PyIO_zero, NULL);
1099 if (res == NULL)
1100 goto error;
1101 Py_DECREF(res);
1102 }
1103 }
1104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001105 self->ok = 1;
1106 return 0;
1107
1108 error:
1109 return -1;
1110}
1111
1112static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001113_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001114{
1115 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1116 return -1;
1117 self->ok = 0;
1118 Py_CLEAR(self->buffer);
1119 Py_CLEAR(self->encoding);
1120 Py_CLEAR(self->encoder);
1121 Py_CLEAR(self->decoder);
1122 Py_CLEAR(self->readnl);
1123 Py_CLEAR(self->decoded_chars);
1124 Py_CLEAR(self->pending_bytes);
1125 Py_CLEAR(self->snapshot);
1126 Py_CLEAR(self->errors);
1127 Py_CLEAR(self->raw);
1128 return 0;
1129}
1130
1131static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001132textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133{
Antoine Pitroue033e062010-10-29 10:38:18 +00001134 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001135 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136 return;
1137 _PyObject_GC_UNTRACK(self);
1138 if (self->weakreflist != NULL)
1139 PyObject_ClearWeakRefs((PyObject *)self);
1140 Py_CLEAR(self->dict);
1141 Py_TYPE(self)->tp_free((PyObject *)self);
1142}
1143
1144static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146{
1147 Py_VISIT(self->buffer);
1148 Py_VISIT(self->encoding);
1149 Py_VISIT(self->encoder);
1150 Py_VISIT(self->decoder);
1151 Py_VISIT(self->readnl);
1152 Py_VISIT(self->decoded_chars);
1153 Py_VISIT(self->pending_bytes);
1154 Py_VISIT(self->snapshot);
1155 Py_VISIT(self->errors);
1156 Py_VISIT(self->raw);
1157
1158 Py_VISIT(self->dict);
1159 return 0;
1160}
1161
1162static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001163textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001165 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 return -1;
1167 Py_CLEAR(self->dict);
1168 return 0;
1169}
1170
1171static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173
1174/* This macro takes some shortcuts to make the common case faster. */
1175#define CHECK_CLOSED(self) \
1176 do { \
1177 int r; \
1178 PyObject *_res; \
1179 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1180 if (self->raw != NULL) \
1181 r = _PyFileIO_closed(self->raw); \
1182 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001183 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 if (_res == NULL) \
1185 return NULL; \
1186 r = PyObject_IsTrue(_res); \
1187 Py_DECREF(_res); \
1188 if (r < 0) \
1189 return NULL; \
1190 } \
1191 if (r > 0) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "I/O operation on closed file."); \
1194 return NULL; \
1195 } \
1196 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001197 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return NULL; \
1199 } while (0)
1200
1201#define CHECK_INITIALIZED(self) \
1202 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001203 if (self->detached) { \
1204 PyErr_SetString(PyExc_ValueError, \
1205 "underlying buffer has been detached"); \
1206 } else { \
1207 PyErr_SetString(PyExc_ValueError, \
1208 "I/O operation on uninitialized object"); \
1209 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 return NULL; \
1211 }
1212
1213#define CHECK_INITIALIZED_INT(self) \
1214 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001215 if (self->detached) { \
1216 PyErr_SetString(PyExc_ValueError, \
1217 "underlying buffer has been detached"); \
1218 } else { \
1219 PyErr_SetString(PyExc_ValueError, \
1220 "I/O operation on uninitialized object"); \
1221 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 return -1; \
1223 }
1224
1225
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001226static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001227textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001228{
1229 PyObject *buffer, *res;
1230 CHECK_INITIALIZED(self);
1231 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1232 if (res == NULL)
1233 return NULL;
1234 Py_DECREF(res);
1235 buffer = self->buffer;
1236 self->buffer = NULL;
1237 self->detached = 1;
1238 self->ok = 0;
1239 return buffer;
1240}
1241
Antoine Pitrou24f36292009-03-28 22:16:42 +00001242/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 underlying buffered object, though. */
1244static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001245_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001247 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248
1249 if (self->pending_bytes == NULL)
1250 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001251
1252 pending = self->pending_bytes;
1253 Py_INCREF(pending);
1254 self->pending_bytes_count = 0;
1255 Py_CLEAR(self->pending_bytes);
1256
1257 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1258 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 if (b == NULL)
1260 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001261 ret = NULL;
1262 do {
1263 ret = PyObject_CallMethodObjArgs(self->buffer,
1264 _PyIO_str_write, b, NULL);
1265 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 Py_DECREF(b);
1267 if (ret == NULL)
1268 return -1;
1269 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 return 0;
1271}
1272
1273static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001274textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275{
1276 PyObject *ret;
1277 PyObject *text; /* owned reference */
1278 PyObject *b;
1279 Py_ssize_t textlen;
1280 int haslf = 0;
1281 int needflush = 0;
1282
1283 CHECK_INITIALIZED(self);
1284
1285 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1286 return NULL;
1287 }
1288
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 if (PyUnicode_READY(text) == -1)
1290 return NULL;
1291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 CHECK_CLOSED(self);
1293
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001294 if (self->encoder == NULL)
1295 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001296
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 Py_INCREF(text);
1298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300
1301 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 haslf = 1;
1304
1305 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001306 PyObject *newtext = _PyObject_CallMethodId(
1307 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001308 Py_DECREF(text);
1309 if (newtext == NULL)
1310 return NULL;
1311 text = newtext;
1312 }
1313
Antoine Pitroue96ec682011-07-23 21:46:35 +02001314 if (self->write_through)
1315 needflush = 1;
1316 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 needflush = 1;
1320
1321 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001322 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001324 self->encoding_start_of_stream = 0;
1325 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 else
1327 b = PyObject_CallMethodObjArgs(self->encoder,
1328 _PyIO_str_encode, text, NULL);
1329 Py_DECREF(text);
1330 if (b == NULL)
1331 return NULL;
1332
1333 if (self->pending_bytes == NULL) {
1334 self->pending_bytes = PyList_New(0);
1335 if (self->pending_bytes == NULL) {
1336 Py_DECREF(b);
1337 return NULL;
1338 }
1339 self->pending_bytes_count = 0;
1340 }
1341 if (PyList_Append(self->pending_bytes, b) < 0) {
1342 Py_DECREF(b);
1343 return NULL;
1344 }
1345 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1346 Py_DECREF(b);
1347 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001348 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 return NULL;
1350 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001351
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001352 if (needflush) {
1353 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1354 if (ret == NULL)
1355 return NULL;
1356 Py_DECREF(ret);
1357 }
1358
1359 Py_CLEAR(self->snapshot);
1360
1361 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001362 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363 if (ret == NULL)
1364 return NULL;
1365 Py_DECREF(ret);
1366 }
1367
1368 return PyLong_FromSsize_t(textlen);
1369}
1370
1371/* Steal a reference to chars and store it in the decoded_char buffer;
1372 */
1373static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001374textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375{
1376 Py_CLEAR(self->decoded_chars);
1377 self->decoded_chars = chars;
1378 self->decoded_chars_used = 0;
1379}
1380
1381static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001382textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383{
1384 PyObject *chars;
1385 Py_ssize_t avail;
1386
1387 if (self->decoded_chars == NULL)
1388 return PyUnicode_FromStringAndSize(NULL, 0);
1389
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001390 /* decoded_chars is guaranteed to be "ready". */
1391 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392 - self->decoded_chars_used);
1393
1394 assert(avail >= 0);
1395
1396 if (n < 0 || n > avail)
1397 n = avail;
1398
1399 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400 chars = PyUnicode_Substring(self->decoded_chars,
1401 self->decoded_chars_used,
1402 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 if (chars == NULL)
1404 return NULL;
1405 }
1406 else {
1407 chars = self->decoded_chars;
1408 Py_INCREF(chars);
1409 }
1410
1411 self->decoded_chars_used += n;
1412 return chars;
1413}
1414
1415/* Read and decode the next chunk of data from the BufferedReader.
1416 */
1417static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001418textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419{
1420 PyObject *dec_buffer = NULL;
1421 PyObject *dec_flags = NULL;
1422 PyObject *input_chunk = NULL;
1423 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001424 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425 int eof;
1426
1427 /* The return value is True unless EOF was reached. The decoded string is
1428 * placed in self._decoded_chars (replacing its previous value). The
1429 * entire input chunk is sent to the decoder, though some of it may remain
1430 * buffered in the decoder, yet to be converted.
1431 */
1432
1433 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001434 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435 return -1;
1436 }
1437
1438 if (self->telling) {
1439 /* To prepare for tell(), we need to snapshot a point in the file
1440 * where the decoder's input buffer is empty.
1441 */
1442
1443 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1444 _PyIO_str_getstate, NULL);
1445 if (state == NULL)
1446 return -1;
1447 /* Given this, we know there was a valid snapshot point
1448 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1449 */
1450 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1451 Py_DECREF(state);
1452 return -1;
1453 }
1454 Py_INCREF(dec_buffer);
1455 Py_INCREF(dec_flags);
1456 Py_DECREF(state);
1457 }
1458
1459 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001460 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001461 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001462 }
1463 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001464 if (chunk_size == NULL)
1465 goto fail;
1466 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001467 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1468 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001469 Py_DECREF(chunk_size);
1470 if (input_chunk == NULL)
1471 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001472 if (!PyBytes_Check(input_chunk)) {
1473 PyErr_Format(PyExc_TypeError,
1474 "underlying %s() should have returned a bytes object, "
1475 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1476 Py_TYPE(input_chunk)->tp_name);
1477 goto fail;
1478 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001479
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001480 nbytes = PyBytes_Size(input_chunk);
1481 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001482
1483 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1484 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1485 self->decoder, input_chunk, eof);
1486 }
1487 else {
1488 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1489 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1490 }
1491
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001492 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001493 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001494 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001496 if (nchars > 0)
1497 self->b2cratio = (double) nbytes / nchars;
1498 else
1499 self->b2cratio = 0.0;
1500 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 eof = 0;
1502
1503 if (self->telling) {
1504 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1505 * next input to be decoded is dec_buffer + input_chunk.
1506 */
1507 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1508 if (next_input == NULL)
1509 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001510 if (!PyBytes_Check(next_input)) {
1511 PyErr_Format(PyExc_TypeError,
1512 "decoder getstate() should have returned a bytes "
1513 "object, not '%.200s'",
1514 Py_TYPE(next_input)->tp_name);
1515 Py_DECREF(next_input);
1516 goto fail;
1517 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 Py_DECREF(dec_buffer);
1519 Py_CLEAR(self->snapshot);
1520 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1521 }
1522 Py_DECREF(input_chunk);
1523
1524 return (eof == 0);
1525
1526 fail:
1527 Py_XDECREF(dec_buffer);
1528 Py_XDECREF(dec_flags);
1529 Py_XDECREF(input_chunk);
1530 return -1;
1531}
1532
1533static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001534textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001535{
1536 Py_ssize_t n = -1;
1537 PyObject *result = NULL, *chunks = NULL;
1538
1539 CHECK_INITIALIZED(self);
1540
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001541 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542 return NULL;
1543
1544 CHECK_CLOSED(self);
1545
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001546 if (self->decoder == NULL)
1547 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001548
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001549 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 return NULL;
1551
1552 if (n < 0) {
1553 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001554 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555 PyObject *decoded;
1556 if (bytes == NULL)
1557 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001558
1559 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1560 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1561 bytes, 1);
1562 else
1563 decoded = PyObject_CallMethodObjArgs(
1564 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001566 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567 goto fail;
1568
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001569 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570
1571 if (result == NULL) {
1572 Py_DECREF(decoded);
1573 return NULL;
1574 }
1575
1576 PyUnicode_AppendAndDel(&result, decoded);
1577 if (result == NULL)
1578 goto fail;
1579
1580 Py_CLEAR(self->snapshot);
1581 return result;
1582 }
1583 else {
1584 int res = 1;
1585 Py_ssize_t remaining = n;
1586
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001587 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001588 if (result == NULL)
1589 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001590 if (PyUnicode_READY(result) == -1)
1591 goto fail;
1592 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593
1594 /* Keep reading chunks until we have n characters to return */
1595 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001596 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001597 if (res < 0) {
1598 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1599 when EINTR occurs so we needn't do it ourselves. */
1600 if (_PyIO_trap_eintr()) {
1601 continue;
1602 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001604 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001605 if (res == 0) /* EOF */
1606 break;
1607 if (chunks == NULL) {
1608 chunks = PyList_New(0);
1609 if (chunks == NULL)
1610 goto fail;
1611 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001612 if (PyUnicode_GET_LENGTH(result) > 0 &&
1613 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 goto fail;
1615 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001616 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617 if (result == NULL)
1618 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001619 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 }
1621 if (chunks != NULL) {
1622 if (result != NULL && PyList_Append(chunks, result) < 0)
1623 goto fail;
1624 Py_CLEAR(result);
1625 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1626 if (result == NULL)
1627 goto fail;
1628 Py_CLEAR(chunks);
1629 }
1630 return result;
1631 }
1632 fail:
1633 Py_XDECREF(result);
1634 Py_XDECREF(chunks);
1635 return NULL;
1636}
1637
1638
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001639/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 that is to the NUL character. Otherwise the function will produce
1641 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642static char *
1643find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001645 if (kind == PyUnicode_1BYTE_KIND) {
1646 assert(ch < 256);
1647 return (char *) memchr((void *) s, (char) ch, end - s);
1648 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001650 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001651 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 return s;
1654 if (s == end)
1655 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001656 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 }
1658}
1659
1660Py_ssize_t
1661_PyIO_find_line_ending(
1662 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001663 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001665 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666
1667 if (translated) {
1668 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001669 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001671 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 else {
1673 *consumed = len;
1674 return -1;
1675 }
1676 }
1677 else if (universal) {
1678 /* Universal newline search. Find any of \r, \r\n, \n
1679 * The decoder ensures that \r\n are not split in two pieces
1680 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001681 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001683 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001685 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001687 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (s >= end) {
1689 *consumed = len;
1690 return -1;
1691 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001692 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001693 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001695 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001697 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001700 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 }
1702 }
1703 }
1704 else {
1705 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001706 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1707 char *nl = PyUnicode_DATA(readnl);
1708 /* Assume that readnl is an ASCII character. */
1709 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001711 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001713 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 *consumed = len;
1715 return -1;
1716 }
1717 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001719 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (e < s)
1722 e = s;
1723 while (s < e) {
1724 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001725 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 if (pos == NULL || pos >= e)
1727 break;
1728 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 break;
1731 }
1732 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001733 return (pos - start)/kind + readnl_len;
1734 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001736 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 if (pos == NULL)
1738 *consumed = len;
1739 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001740 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 return -1;
1742 }
1743 }
1744}
1745
1746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001747_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748{
1749 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1750 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1751 int res;
1752
1753 CHECK_CLOSED(self);
1754
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001755 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 return NULL;
1757
1758 chunked = 0;
1759
1760 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 Py_ssize_t consumed = 0;
1765
1766 /* First, get some data if necessary */
1767 res = 1;
1768 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001769 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001770 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001771 if (res < 0) {
1772 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1773 when EINTR occurs so we needn't do it ourselves. */
1774 if (_PyIO_trap_eintr()) {
1775 continue;
1776 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001778 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001779 if (res == 0)
1780 break;
1781 }
1782 if (res == 0) {
1783 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001784 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 Py_CLEAR(self->snapshot);
1786 start = endpos = offset_to_buffer = 0;
1787 break;
1788 }
1789
1790 if (remaining == NULL) {
1791 line = self->decoded_chars;
1792 start = self->decoded_chars_used;
1793 offset_to_buffer = 0;
1794 Py_INCREF(line);
1795 }
1796 else {
1797 assert(self->decoded_chars_used == 0);
1798 line = PyUnicode_Concat(remaining, self->decoded_chars);
1799 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001800 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 Py_CLEAR(remaining);
1802 if (line == NULL)
1803 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001804 if (PyUnicode_READY(line) == -1)
1805 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 }
1807
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001808 ptr = PyUnicode_DATA(line);
1809 line_len = PyUnicode_GET_LENGTH(line);
1810 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811
1812 endpos = _PyIO_find_line_ending(
1813 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001814 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001815 ptr + kind * start,
1816 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 if (endpos >= 0) {
1819 endpos += start;
1820 if (limit >= 0 && (endpos - start) + chunked >= limit)
1821 endpos = start + limit - chunked;
1822 break;
1823 }
1824
1825 /* We can put aside up to `endpos` */
1826 endpos = consumed + start;
1827 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1828 /* Didn't find line ending, but reached length limit */
1829 endpos = start + limit - chunked;
1830 break;
1831 }
1832
1833 if (endpos > start) {
1834 /* No line ending seen yet - put aside current data */
1835 PyObject *s;
1836 if (chunks == NULL) {
1837 chunks = PyList_New(0);
1838 if (chunks == NULL)
1839 goto error;
1840 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001841 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 if (s == NULL)
1843 goto error;
1844 if (PyList_Append(chunks, s) < 0) {
1845 Py_DECREF(s);
1846 goto error;
1847 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001848 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 Py_DECREF(s);
1850 }
1851 /* There may be some remaining bytes we'll have to prepend to the
1852 next chunk of data */
1853 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001854 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001855 if (remaining == NULL)
1856 goto error;
1857 }
1858 Py_CLEAR(line);
1859 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001860 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 }
1862
1863 if (line != NULL) {
1864 /* Our line ends in the current buffer */
1865 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001866 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1867 PyObject *s = PyUnicode_Substring(line, start, endpos);
1868 Py_CLEAR(line);
1869 if (s == NULL)
1870 goto error;
1871 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872 }
1873 }
1874 if (remaining != NULL) {
1875 if (chunks == NULL) {
1876 chunks = PyList_New(0);
1877 if (chunks == NULL)
1878 goto error;
1879 }
1880 if (PyList_Append(chunks, remaining) < 0)
1881 goto error;
1882 Py_CLEAR(remaining);
1883 }
1884 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001885 if (line != NULL) {
1886 if (PyList_Append(chunks, line) < 0)
1887 goto error;
1888 Py_DECREF(line);
1889 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001890 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1891 if (line == NULL)
1892 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001893 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001895 if (line == NULL) {
1896 Py_INCREF(_PyIO_empty_str);
1897 line = _PyIO_empty_str;
1898 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899
1900 return line;
1901
1902 error:
1903 Py_XDECREF(chunks);
1904 Py_XDECREF(remaining);
1905 Py_XDECREF(line);
1906 return NULL;
1907}
1908
1909static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001910textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911{
1912 Py_ssize_t limit = -1;
1913
1914 CHECK_INITIALIZED(self);
1915 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1916 return NULL;
1917 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001918 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919}
1920
1921/* Seek and Tell */
1922
1923typedef struct {
1924 Py_off_t start_pos;
1925 int dec_flags;
1926 int bytes_to_feed;
1927 int chars_to_skip;
1928 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001929} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930
1931/*
1932 To speed up cookie packing/unpacking, we store the fields in a temporary
1933 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1934 The following macros define at which offsets in the intermediary byte
1935 string the various CookieStruct fields will be stored.
1936 */
1937
1938#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1939
Christian Heimes743e0cd2012-10-17 23:52:17 +02001940#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941/* We want the least significant byte of start_pos to also be the least
1942 significant byte of the cookie, which means that in big-endian mode we
1943 must copy the fields in reverse order. */
1944
1945# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1946# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1947# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1948# define OFF_CHARS_TO_SKIP (sizeof(char))
1949# define OFF_NEED_EOF 0
1950
1951#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001952/* Little-endian mode: the least significant byte of start_pos will
1953 naturally end up the least significant byte of the cookie. */
1954
1955# define OFF_START_POS 0
1956# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1957# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1958# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1959# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1960
1961#endif
1962
1963static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001964textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965{
1966 unsigned char buffer[COOKIE_BUF_LEN];
1967 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1968 if (cookieLong == NULL)
1969 return -1;
1970
1971 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001972 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973 Py_DECREF(cookieLong);
1974 return -1;
1975 }
1976 Py_DECREF(cookieLong);
1977
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001978 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1979 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1980 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1981 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1982 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001983
1984 return 0;
1985}
1986
1987static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001988textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989{
1990 unsigned char buffer[COOKIE_BUF_LEN];
1991
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001992 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1993 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1994 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1995 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1996 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001997
Christian Heimes743e0cd2012-10-17 23:52:17 +02001998 return _PyLong_FromByteArray(buffer, sizeof(buffer),
1999 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002000}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002001
2002static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002003_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002004{
2005 PyObject *res;
2006 /* When seeking to the start of the stream, we call decoder.reset()
2007 rather than decoder.getstate().
2008 This is for a few decoders such as utf-16 for which the state value
2009 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2010 utf-16, that we are expecting a BOM).
2011 */
2012 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2013 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2014 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002015 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2016 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002017 if (res == NULL)
2018 return -1;
2019 Py_DECREF(res);
2020 return 0;
2021}
2022
Antoine Pitroue4501852009-05-14 18:55:55 +00002023static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002024_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002025{
2026 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002027 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002028 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2029 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2030 self->encoding_start_of_stream = 1;
2031 }
2032 else {
2033 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2034 _PyIO_zero, NULL);
2035 self->encoding_start_of_stream = 0;
2036 }
2037 if (res == NULL)
2038 return -1;
2039 Py_DECREF(res);
2040 return 0;
2041}
2042
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002043static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002044textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002045{
2046 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002047 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002048 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 PyObject *res;
2050 int cmp;
2051
2052 CHECK_INITIALIZED(self);
2053
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2055 return NULL;
2056 CHECK_CLOSED(self);
2057
2058 Py_INCREF(cookieObj);
2059
2060 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002061 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002062 goto fail;
2063 }
2064
2065 if (whence == 1) {
2066 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002067 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 if (cmp < 0)
2069 goto fail;
2070
2071 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002072 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002073 goto fail;
2074 }
2075
2076 /* Seeking to the current position should attempt to
2077 * sync the underlying buffer with the current position.
2078 */
2079 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002080 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 if (cookieObj == NULL)
2082 goto fail;
2083 }
2084 else if (whence == 2) {
2085 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002086 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087 if (cmp < 0)
2088 goto fail;
2089
2090 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002091 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 goto fail;
2093 }
2094
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002095 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 if (res == NULL)
2097 goto fail;
2098 Py_DECREF(res);
2099
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002100 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002101 Py_CLEAR(self->snapshot);
2102 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002103 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002104 if (res == NULL)
2105 goto fail;
2106 Py_DECREF(res);
2107 }
2108
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002109 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 Py_XDECREF(cookieObj);
2111 return res;
2112 }
2113 else if (whence != 0) {
2114 PyErr_Format(PyExc_ValueError,
2115 "invalid whence (%d, should be 0, 1 or 2)", whence);
2116 goto fail;
2117 }
2118
Antoine Pitroue4501852009-05-14 18:55:55 +00002119 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002120 if (cmp < 0)
2121 goto fail;
2122
2123 if (cmp == 1) {
2124 PyErr_Format(PyExc_ValueError,
2125 "negative seek position %R", cookieObj);
2126 goto fail;
2127 }
2128
2129 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2130 if (res == NULL)
2131 goto fail;
2132 Py_DECREF(res);
2133
2134 /* The strategy of seek() is to go back to the safe start point
2135 * and replay the effect of read(chars_to_skip) from there.
2136 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002137 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002138 goto fail;
2139
2140 /* Seek back to the safe start point. */
2141 posobj = PyLong_FromOff_t(cookie.start_pos);
2142 if (posobj == NULL)
2143 goto fail;
2144 res = PyObject_CallMethodObjArgs(self->buffer,
2145 _PyIO_str_seek, posobj, NULL);
2146 Py_DECREF(posobj);
2147 if (res == NULL)
2148 goto fail;
2149 Py_DECREF(res);
2150
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002151 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002152 Py_CLEAR(self->snapshot);
2153
2154 /* Restore the decoder to its state from the safe start point. */
2155 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002156 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002157 goto fail;
2158 }
2159
2160 if (cookie.chars_to_skip) {
2161 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002162 PyObject *input_chunk = _PyObject_CallMethodId(
2163 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164 PyObject *decoded;
2165
2166 if (input_chunk == NULL)
2167 goto fail;
2168
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002169 if (!PyBytes_Check(input_chunk)) {
2170 PyErr_Format(PyExc_TypeError,
2171 "underlying read() should have returned a bytes "
2172 "object, not '%.200s'",
2173 Py_TYPE(input_chunk)->tp_name);
2174 Py_DECREF(input_chunk);
2175 goto fail;
2176 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002177
2178 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2179 if (self->snapshot == NULL) {
2180 Py_DECREF(input_chunk);
2181 goto fail;
2182 }
2183
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002184 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2185 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002186
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002187 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002188 goto fail;
2189
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002190 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002191
2192 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002193 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2195 goto fail;
2196 }
2197 self->decoded_chars_used = cookie.chars_to_skip;
2198 }
2199 else {
2200 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2201 if (self->snapshot == NULL)
2202 goto fail;
2203 }
2204
Antoine Pitroue4501852009-05-14 18:55:55 +00002205 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2206 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002207 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002208 goto fail;
2209 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 return cookieObj;
2211 fail:
2212 Py_XDECREF(cookieObj);
2213 return NULL;
2214
2215}
2216
2217static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002218textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002219{
2220 PyObject *res;
2221 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002222 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002223 PyObject *next_input;
2224 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002225 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002226 PyObject *saved_state = NULL;
2227 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002228 char *dec_buffer;
2229 Py_ssize_t dec_buffer_len;
2230 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231
2232 CHECK_INITIALIZED(self);
2233 CHECK_CLOSED(self);
2234
2235 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002236 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237 goto fail;
2238 }
2239 if (!self->telling) {
2240 PyErr_SetString(PyExc_IOError,
2241 "telling position disabled by next() call");
2242 goto fail;
2243 }
2244
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002245 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002247 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002248 if (res == NULL)
2249 goto fail;
2250 Py_DECREF(res);
2251
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002252 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002253 if (posobj == NULL)
2254 goto fail;
2255
2256 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002257 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002258 return posobj;
2259 }
2260
2261#if defined(HAVE_LARGEFILE_SUPPORT)
2262 cookie.start_pos = PyLong_AsLongLong(posobj);
2263#else
2264 cookie.start_pos = PyLong_AsLong(posobj);
2265#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002266 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002267 if (PyErr_Occurred())
2268 goto fail;
2269
2270 /* Skip backward to the snapshot point (see _read_chunk). */
2271 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2272 goto fail;
2273
2274 assert (PyBytes_Check(next_input));
2275
2276 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2277
2278 /* How many decoded characters have been used up since the snapshot? */
2279 if (self->decoded_chars_used == 0) {
2280 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002281 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282 }
2283
2284 chars_to_skip = self->decoded_chars_used;
2285
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002286 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002287 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2288 _PyIO_str_getstate, NULL);
2289 if (saved_state == NULL)
2290 goto fail;
2291
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002292#define DECODER_GETSTATE() do { \
2293 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2294 _PyIO_str_getstate, NULL); \
2295 if (_state == NULL) \
2296 goto fail; \
2297 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2298 Py_DECREF(_state); \
2299 goto fail; \
2300 } \
2301 Py_DECREF(_state); \
2302 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002303
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002304#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002305 PyObject *_decoded = _PyObject_CallMethodId( \
2306 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002307 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002308 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002309 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002310 Py_DECREF(_decoded); \
2311 } while (0)
2312
2313 /* Fast search for an acceptable start point, close to our
2314 current pos */
2315 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2316 skip_back = 1;
2317 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2318 input = PyBytes_AS_STRING(next_input);
2319 while (skip_bytes > 0) {
2320 /* Decode up to temptative start point */
2321 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2322 goto fail;
2323 DECODER_DECODE(input, skip_bytes, chars_decoded);
2324 if (chars_decoded <= chars_to_skip) {
2325 DECODER_GETSTATE();
2326 if (dec_buffer_len == 0) {
2327 /* Before pos and no bytes buffered in decoder => OK */
2328 cookie.dec_flags = dec_flags;
2329 chars_to_skip -= chars_decoded;
2330 break;
2331 }
2332 /* Skip back by buffered amount and reset heuristic */
2333 skip_bytes -= dec_buffer_len;
2334 skip_back = 1;
2335 }
2336 else {
2337 /* We're too far ahead, skip back a bit */
2338 skip_bytes -= skip_back;
2339 skip_back *= 2;
2340 }
2341 }
2342 if (skip_bytes <= 0) {
2343 skip_bytes = 0;
2344 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2345 goto fail;
2346 }
2347
2348 /* Note our initial start point. */
2349 cookie.start_pos += skip_bytes;
2350 cookie.chars_to_skip = chars_to_skip;
2351 if (chars_to_skip == 0)
2352 goto finally;
2353
2354 /* We should be close to the desired position. Now feed the decoder one
2355 * byte at a time until we reach the `chars_to_skip` target.
2356 * As we go, note the nearest "safe start point" before the current
2357 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002358 * can safely start from there and advance to this location).
2359 */
2360 chars_decoded = 0;
2361 input = PyBytes_AS_STRING(next_input);
2362 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002363 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002365 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002366
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002367 DECODER_DECODE(input, 1, n);
2368 /* We got n chars for 1 byte */
2369 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002371 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372
2373 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2374 /* Decoder buffer is empty, so this is a safe start point. */
2375 cookie.start_pos += cookie.bytes_to_feed;
2376 chars_to_skip -= chars_decoded;
2377 cookie.dec_flags = dec_flags;
2378 cookie.bytes_to_feed = 0;
2379 chars_decoded = 0;
2380 }
2381 if (chars_decoded >= chars_to_skip)
2382 break;
2383 input++;
2384 }
2385 if (input == input_end) {
2386 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002387 PyObject *decoded = _PyObject_CallMethodId(
2388 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002389 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002391 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002392 Py_DECREF(decoded);
2393 cookie.need_eof = 1;
2394
2395 if (chars_decoded < chars_to_skip) {
2396 PyErr_SetString(PyExc_IOError,
2397 "can't reconstruct logical file position");
2398 goto fail;
2399 }
2400 }
2401
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002402finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002403 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404 Py_DECREF(saved_state);
2405 if (res == NULL)
2406 return NULL;
2407 Py_DECREF(res);
2408
2409 /* The returned cookie corresponds to the last safe start point. */
2410 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002411 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002412
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002413fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002414 if (saved_state) {
2415 PyObject *type, *value, *traceback;
2416 PyErr_Fetch(&type, &value, &traceback);
2417
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002418 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002419 Py_DECREF(saved_state);
2420 if (res == NULL)
2421 return NULL;
2422 Py_DECREF(res);
2423
2424 PyErr_Restore(type, value, traceback);
2425 }
2426 return NULL;
2427}
2428
2429static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002430textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002431{
2432 PyObject *pos = Py_None;
2433 PyObject *res;
2434
2435 CHECK_INITIALIZED(self)
2436 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2437 return NULL;
2438 }
2439
2440 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2441 if (res == NULL)
2442 return NULL;
2443 Py_DECREF(res);
2444
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002445 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002446}
2447
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002448static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002449textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002450{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002451 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002452
2453 CHECK_INITIALIZED(self);
2454
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002455 res = PyUnicode_FromString("<_io.TextIOWrapper");
2456 if (res == NULL)
2457 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002458 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002459 if (nameobj == NULL) {
2460 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2461 PyErr_Clear();
2462 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002463 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002464 }
2465 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002466 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002467 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002468 if (s == NULL)
2469 goto error;
2470 PyUnicode_AppendAndDel(&res, s);
2471 if (res == NULL)
2472 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002473 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002474 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002475 if (modeobj == NULL) {
2476 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2477 PyErr_Clear();
2478 else
2479 goto error;
2480 }
2481 else {
2482 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2483 Py_DECREF(modeobj);
2484 if (s == NULL)
2485 goto error;
2486 PyUnicode_AppendAndDel(&res, s);
2487 if (res == NULL)
2488 return NULL;
2489 }
2490 s = PyUnicode_FromFormat("%U encoding=%R>",
2491 res, self->encoding);
2492 Py_DECREF(res);
2493 return s;
2494error:
2495 Py_XDECREF(res);
2496 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002497}
2498
2499
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500/* Inquiries */
2501
2502static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002503textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504{
2505 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002506 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507}
2508
2509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002510textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511{
2512 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002513 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514}
2515
2516static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002517textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002518{
2519 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002520 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002521}
2522
2523static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002524textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002525{
2526 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002527 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528}
2529
2530static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002531textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532{
2533 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002534 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002535}
2536
2537static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002538textiowrapper_getstate(textio *self, PyObject *args)
2539{
2540 PyErr_Format(PyExc_TypeError,
2541 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2542 return NULL;
2543}
2544
2545static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002546textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547{
2548 CHECK_INITIALIZED(self);
2549 CHECK_CLOSED(self);
2550 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002551 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002552 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002553 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002554}
2555
2556static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002557textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558{
2559 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002560 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002561 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002562
Antoine Pitrou6be88762010-05-03 16:48:20 +00002563 res = textiowrapper_closed_get(self, NULL);
2564 if (res == NULL)
2565 return NULL;
2566 r = PyObject_IsTrue(res);
2567 Py_DECREF(res);
2568 if (r < 0)
2569 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002570
Antoine Pitrou6be88762010-05-03 16:48:20 +00002571 if (r > 0) {
2572 Py_RETURN_NONE; /* stream already closed */
2573 }
2574 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002575 PyObject *exc = NULL, *val, *tb;
Antoine Pitroue033e062010-10-29 10:38:18 +00002576 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002577 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002578 if (res)
2579 Py_DECREF(res);
2580 else
2581 PyErr_Clear();
2582 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002583 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002584 if (res == NULL)
2585 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002586 else
2587 Py_DECREF(res);
2588
Benjamin Peterson68623612012-12-20 11:53:11 -06002589 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2590 if (exc != NULL) {
2591 if (res != NULL) {
2592 Py_CLEAR(res);
2593 PyErr_Restore(exc, val, tb);
2594 }
2595 else {
2596 PyObject *val2;
2597 Py_DECREF(exc);
2598 Py_XDECREF(tb);
2599 PyErr_Fetch(&exc, &val2, &tb);
2600 PyErr_NormalizeException(&exc, &val2, &tb);
2601 PyException_SetContext(val2, val);
2602 PyErr_Restore(exc, val2, tb);
2603 }
2604 }
2605 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002606 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002607}
2608
2609static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002610textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002611{
2612 PyObject *line;
2613
2614 CHECK_INITIALIZED(self);
2615
2616 self->telling = 0;
2617 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2618 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002619 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002620 }
2621 else {
2622 line = PyObject_CallMethodObjArgs((PyObject *)self,
2623 _PyIO_str_readline, NULL);
2624 if (line && !PyUnicode_Check(line)) {
2625 PyErr_Format(PyExc_IOError,
2626 "readline() should have returned an str object, "
2627 "not '%.200s'", Py_TYPE(line)->tp_name);
2628 Py_DECREF(line);
2629 return NULL;
2630 }
2631 }
2632
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002633 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634 return NULL;
2635
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002636 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637 /* Reached EOF or would have blocked */
2638 Py_DECREF(line);
2639 Py_CLEAR(self->snapshot);
2640 self->telling = self->seekable;
2641 return NULL;
2642 }
2643
2644 return line;
2645}
2646
2647static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002648textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002649{
2650 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002651 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002652}
2653
2654static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002655textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656{
2657 CHECK_INITIALIZED(self);
2658 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2659}
2660
2661static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663{
2664 PyObject *res;
2665 CHECK_INITIALIZED(self);
2666 if (self->decoder == NULL)
2667 Py_RETURN_NONE;
2668 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2669 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002670 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2671 PyErr_Clear();
2672 Py_RETURN_NONE;
2673 }
2674 else {
2675 return NULL;
2676 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002677 }
2678 return res;
2679}
2680
2681static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002682textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002683{
2684 CHECK_INITIALIZED(self);
2685 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2686}
2687
2688static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002689textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690{
2691 CHECK_INITIALIZED(self);
2692 return PyLong_FromSsize_t(self->chunk_size);
2693}
2694
2695static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002696textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697{
2698 Py_ssize_t n;
2699 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002700 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002701 if (n == -1 && PyErr_Occurred())
2702 return -1;
2703 if (n <= 0) {
2704 PyErr_SetString(PyExc_ValueError,
2705 "a strictly positive integer is required");
2706 return -1;
2707 }
2708 self->chunk_size = n;
2709 return 0;
2710}
2711
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002712static PyMethodDef textiowrapper_methods[] = {
2713 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2714 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2715 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2716 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2717 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2718 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002719
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002720 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2721 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2722 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2723 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2724 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002725 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002726
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002727 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2728 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2729 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002730 {NULL, NULL}
2731};
2732
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002733static PyMemberDef textiowrapper_members[] = {
2734 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2735 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2736 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737 {NULL}
2738};
2739
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002740static PyGetSetDef textiowrapper_getset[] = {
2741 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2742 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2744*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002745 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2746 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2747 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2748 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002749 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750};
2751
2752PyTypeObject PyTextIOWrapper_Type = {
2753 PyVarObject_HEAD_INIT(NULL, 0)
2754 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002755 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002757 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002758 0, /*tp_print*/
2759 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002760 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002761 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002762 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002763 0, /*tp_as_number*/
2764 0, /*tp_as_sequence*/
2765 0, /*tp_as_mapping*/
2766 0, /*tp_hash */
2767 0, /*tp_call*/
2768 0, /*tp_str*/
2769 0, /*tp_getattro*/
2770 0, /*tp_setattro*/
2771 0, /*tp_as_buffer*/
2772 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2773 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002774 textiowrapper_doc, /* tp_doc */
2775 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2776 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002777 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002778 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002779 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002780 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2781 textiowrapper_methods, /* tp_methods */
2782 textiowrapper_members, /* tp_members */
2783 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002784 0, /* tp_base */
2785 0, /* tp_dict */
2786 0, /* tp_descr_get */
2787 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002788 offsetof(textio, dict), /*tp_dictoffset*/
2789 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002790 0, /* tp_alloc */
2791 PyType_GenericNew, /* tp_new */
2792};