blob: cd751c1400371aa40a41fec913f71c08d0a13a94 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200260static int
261check_decoded(PyObject *decoded)
262{
263 if (decoded == NULL)
264 return -1;
265 if (!PyUnicode_Check(decoded)) {
266 PyErr_Format(PyExc_TypeError,
267 "decoder should return a string result, not '%.200s'",
268 Py_TYPE(decoded)->tp_name);
269 Py_DECREF(decoded);
270 return -1;
271 }
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +0200272 if (PyUnicode_READY(decoded) < 0) {
273 Py_DECREF(decoded);
274 return -1;
275 }
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200276 return 0;
277}
278
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000279#define SEEN_CR 1
280#define SEEN_LF 2
281#define SEEN_CRLF 4
282#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
283
284PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000285_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000286 PyObject *input, int final)
287{
288 PyObject *output;
289 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000290 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000291
292 if (self->decoder == NULL) {
293 PyErr_SetString(PyExc_ValueError,
294 "IncrementalNewlineDecoder.__init__ not called");
295 return NULL;
296 }
297
298 /* decode input (with the eventual \r from a previous pass) */
299 if (self->decoder != Py_None) {
300 output = PyObject_CallMethodObjArgs(self->decoder,
301 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
302 }
303 else {
304 output = input;
305 Py_INCREF(output);
306 }
307
Serhiy Storchaka94dc6732013-02-03 17:03:31 +0200308 if (check_decoded(output) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000309 return NULL;
310
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200311 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000312 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200313 /* Prefix output with CR */
314 int kind;
315 PyObject *modified;
316 char *out;
317
318 modified = PyUnicode_New(output_len + 1,
319 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000320 if (modified == NULL)
321 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200322 kind = PyUnicode_KIND(modified);
323 out = PyUnicode_DATA(modified);
324 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200325 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000326 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000328 self->pendingcr = 0;
329 output_len++;
330 }
331
332 /* retain last \r even when not translating data:
333 * then readline() is sure to get \r\n in one pass
334 */
335 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000336 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200337 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
338 {
339 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
340 if (modified == NULL)
341 goto error;
342 Py_DECREF(output);
343 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000344 self->pendingcr = 1;
345 }
346 }
347
348 /* Record which newlines are read and do newline translation if desired,
349 all in one pass. */
350 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200351 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000352 Py_ssize_t len;
353 int seennl = self->seennl;
354 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200355 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000356
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200357 in_str = PyUnicode_DATA(output);
358 len = PyUnicode_GET_LENGTH(output);
359 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000360
361 if (len == 0)
362 return output;
363
364 /* If, up to now, newlines are consistently \n, do a quick check
365 for the \r *byte* with the libc's optimized memchr.
366 */
367 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200368 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000369 }
370
Antoine Pitrou66913e22009-03-06 23:40:56 +0000371 if (only_lf) {
372 /* If not already seen, quick scan for a possible "\n" character.
373 (there's nothing else to be done, even when in translation mode)
374 */
375 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200376 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100377 if (kind == PyUnicode_1BYTE_KIND)
378 seennl |= SEEN_LF;
379 else {
380 Py_ssize_t i = 0;
381 for (;;) {
382 Py_UCS4 c;
383 /* Fast loop for non-control characters */
384 while (PyUnicode_READ(kind, in_str, i) > '\n')
385 i++;
386 c = PyUnicode_READ(kind, in_str, i++);
387 if (c == '\n') {
388 seennl |= SEEN_LF;
389 break;
390 }
391 if (i >= len)
392 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000393 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000394 }
395 }
396 /* Finished: we have scanned for newlines, and none of them
397 need translating */
398 }
399 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200400 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000401 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000402 if (seennl == SEEN_ALL)
403 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200407 while (PyUnicode_READ(kind, in_str, i) > '\r')
408 i++;
409 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000410 if (c == '\n')
411 seennl |= SEEN_LF;
412 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200413 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000414 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200415 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000416 }
417 else
418 seennl |= SEEN_CR;
419 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200420 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000421 break;
422 if (seennl == SEEN_ALL)
423 break;
424 }
425 endscan:
426 ;
427 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000428 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200429 void *translated;
430 int kind = PyUnicode_KIND(output);
431 void *in_str = PyUnicode_DATA(output);
432 Py_ssize_t in, out;
433 /* XXX: Previous in-place translation here is disabled as
434 resizing is not possible anymore */
435 /* We could try to optimize this so that we only do a copy
436 when there is something to translate. On the other hand,
437 we already know there is a \r byte, so chances are high
438 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200439 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200440 if (translated == NULL) {
441 PyErr_NoMemory();
442 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000443 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200444 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000445 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200448 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
449 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000450 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200451 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000452 seennl |= SEEN_LF;
453 continue;
454 }
455 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200456 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000457 in++;
458 seennl |= SEEN_CRLF;
459 }
460 else
461 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000463 continue;
464 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200465 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000466 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200467 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000468 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200469 Py_DECREF(output);
470 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100471 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200472 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200473 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000474 }
475 self->seennl |= seennl;
476 }
477
478 return output;
479
480 error:
481 Py_DECREF(output);
482 return NULL;
483}
484
485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000486incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000487 PyObject *args, PyObject *kwds)
488{
489 char *kwlist[] = {"input", "final", NULL};
490 PyObject *input;
491 int final = 0;
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
494 kwlist, &input, &final))
495 return NULL;
496 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
497}
498
499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000500incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000501{
502 PyObject *buffer;
503 unsigned PY_LONG_LONG flag;
504
505 if (self->decoder != Py_None) {
506 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
507 _PyIO_str_getstate, NULL);
508 if (state == NULL)
509 return NULL;
510 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
511 Py_DECREF(state);
512 return NULL;
513 }
514 Py_INCREF(buffer);
515 Py_DECREF(state);
516 }
517 else {
518 buffer = PyBytes_FromString("");
519 flag = 0;
520 }
521 flag <<= 1;
522 if (self->pendingcr)
523 flag |= 1;
524 return Py_BuildValue("NK", buffer, flag);
525}
526
527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000528incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000529{
530 PyObject *buffer;
531 unsigned PY_LONG_LONG flag;
532
533 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
534 return NULL;
535
536 self->pendingcr = (int) flag & 1;
537 flag >>= 1;
538
539 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200540 return _PyObject_CallMethodId(self->decoder,
541 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000542 else
543 Py_RETURN_NONE;
544}
545
546static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000547incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000548{
549 self->seennl = 0;
550 self->pendingcr = 0;
551 if (self->decoder != Py_None)
552 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
553 else
554 Py_RETURN_NONE;
555}
556
557static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000558incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000559{
560 switch (self->seennl) {
561 case SEEN_CR:
562 return PyUnicode_FromString("\r");
563 case SEEN_LF:
564 return PyUnicode_FromString("\n");
565 case SEEN_CRLF:
566 return PyUnicode_FromString("\r\n");
567 case SEEN_CR | SEEN_LF:
568 return Py_BuildValue("ss", "\r", "\n");
569 case SEEN_CR | SEEN_CRLF:
570 return Py_BuildValue("ss", "\r", "\r\n");
571 case SEEN_LF | SEEN_CRLF:
572 return Py_BuildValue("ss", "\n", "\r\n");
573 case SEEN_CR | SEEN_LF | SEEN_CRLF:
574 return Py_BuildValue("sss", "\r", "\n", "\r\n");
575 default:
576 Py_RETURN_NONE;
577 }
578
579}
580
581
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000582static PyMethodDef incrementalnewlinedecoder_methods[] = {
583 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
584 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
585 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
586 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000587 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000588};
589
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590static PyGetSetDef incrementalnewlinedecoder_getset[] = {
591 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000592 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000593};
594
595PyTypeObject PyIncrementalNewlineDecoder_Type = {
596 PyVarObject_HEAD_INIT(NULL, 0)
597 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000598 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000599 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000600 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000601 0, /*tp_print*/
602 0, /*tp_getattr*/
603 0, /*tp_setattr*/
604 0, /*tp_compare */
605 0, /*tp_repr*/
606 0, /*tp_as_number*/
607 0, /*tp_as_sequence*/
608 0, /*tp_as_mapping*/
609 0, /*tp_hash */
610 0, /*tp_call*/
611 0, /*tp_str*/
612 0, /*tp_getattro*/
613 0, /*tp_setattro*/
614 0, /*tp_as_buffer*/
615 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000616 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000617 0, /* tp_traverse */
618 0, /* tp_clear */
619 0, /* tp_richcompare */
620 0, /*tp_weaklistoffset*/
621 0, /* tp_iter */
622 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000623 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000624 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000625 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000626 0, /* tp_base */
627 0, /* tp_dict */
628 0, /* tp_descr_get */
629 0, /* tp_descr_set */
630 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000631 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000632 0, /* tp_alloc */
633 PyType_GenericNew, /* tp_new */
634};
635
636
637/* TextIOWrapper */
638
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000639PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000640 "Character and line based layer over a BufferedIOBase object, buffer.\n"
641 "\n"
642 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200643 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000644 "\n"
Andrew Kuchlingc7b6c502013-06-16 12:58:48 -0400645 "errors determines the strictness of encoding and decoding (see\n"
646 "help(codecs.Codec) or the documentation for codecs.register) and\n"
647 "defaults to \"strict\".\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000648 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200649 "newline controls how line endings are handled. It can be None, '',\n"
650 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
651 "\n"
652 "* On input, if newline is None, universal newlines mode is\n"
653 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
654 " these are translated into '\\n' before being returned to the\n"
655 " caller. If it is '', universal newline mode is enabled, but line\n"
656 " endings are returned to the caller untranslated. If it has any of\n"
657 " the other legal values, input lines are only terminated by the given\n"
658 " string, and the line ending is returned to the caller untranslated.\n"
659 "\n"
660 "* On output, if newline is None, any '\\n' characters written are\n"
661 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300662 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200663 " of the other legal values, any '\\n' characters written are translated\n"
664 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000665 "\n"
666 "If line_buffering is True, a call to flush is implied when a call to\n"
667 "write contains a newline character."
668 );
669
670typedef PyObject *
671 (*encodefunc_t)(PyObject *, PyObject *);
672
673typedef struct
674{
675 PyObject_HEAD
676 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000677 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000678 Py_ssize_t chunk_size;
679 PyObject *buffer;
680 PyObject *encoding;
681 PyObject *encoder;
682 PyObject *decoder;
683 PyObject *readnl;
684 PyObject *errors;
685 const char *writenl; /* utf-8 encoded, NULL stands for \n */
686 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200687 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688 char readuniversal;
689 char readtranslate;
690 char writetranslate;
691 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200692 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000693 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000694 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000695 /* Specialized encoding func (see below) */
696 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000697 /* Whether or not it's the start of the stream */
698 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000699
700 /* Reads and writes are internally buffered in order to speed things up.
701 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000702
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000703 Please also note that text to be written is first encoded before being
704 buffered. This is necessary so that encoding errors are immediately
705 reported to the caller, but it unfortunately means that the
706 IncrementalEncoder (whose encode() method is always written in Python)
707 becomes a bottleneck for small writes.
708 */
709 PyObject *decoded_chars; /* buffer for text returned from decoder */
710 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
711 PyObject *pending_bytes; /* list of bytes objects waiting to be
712 written, or NULL */
713 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000714
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000715 /* snapshot is either None, or a tuple (dec_flags, next_input) where
716 * dec_flags is the second (integer) item of the decoder state and
717 * next_input is the chunk of input bytes that comes next after the
718 * snapshot point. We use this to reconstruct decoder states in tell().
719 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000720 PyObject *snapshot;
721 /* Bytes-to-characters ratio for the current chunk. Serves as input for
722 the heuristic in tell(). */
723 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000724
725 /* Cache raw object if it's a FileIO object */
726 PyObject *raw;
727
728 PyObject *weakreflist;
729 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000730} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000731
732
733/* A couple of specialized cases in order to bypass the slow incremental
734 encoding methods for the most popular encodings. */
735
736static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000737ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000738{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200739 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740}
741
742static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000743utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000744{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100745 return _PyUnicode_EncodeUTF16(text,
746 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747}
748
749static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000750utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000751{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100752 return _PyUnicode_EncodeUTF16(text,
753 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754}
755
756static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000757utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758{
Antoine Pitroue4501852009-05-14 18:55:55 +0000759 if (!self->encoding_start_of_stream) {
760 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000761#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000763#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000764 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000765#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000766 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100767 return _PyUnicode_EncodeUTF16(text,
768 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000769}
770
Antoine Pitroue4501852009-05-14 18:55:55 +0000771static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000772utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000773{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100774 return _PyUnicode_EncodeUTF32(text,
775 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000776}
777
778static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000779utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000780{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100781 return _PyUnicode_EncodeUTF32(text,
782 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000783}
784
785static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000786utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000787{
788 if (!self->encoding_start_of_stream) {
789 /* Skip the BOM and use native byte ordering */
790#if defined(WORDS_BIGENDIAN)
791 return utf32be_encode(self, text);
792#else
793 return utf32le_encode(self, text);
794#endif
795 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100796 return _PyUnicode_EncodeUTF32(text,
797 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000798}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799
800static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000801utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000802{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200803 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000804}
805
806static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000807latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000808{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200809 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000810}
811
812/* Map normalized encoding names onto the specialized encoding funcs */
813
814typedef struct {
815 const char *name;
816 encodefunc_t encodefunc;
817} encodefuncentry;
818
Antoine Pitrou24f36292009-03-28 22:16:42 +0000819static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000820 {"ascii", (encodefunc_t) ascii_encode},
821 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000822 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000823 {"utf-16-be", (encodefunc_t) utf16be_encode},
824 {"utf-16-le", (encodefunc_t) utf16le_encode},
825 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000826 {"utf-32-be", (encodefunc_t) utf32be_encode},
827 {"utf-32-le", (encodefunc_t) utf32le_encode},
828 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000829 {NULL, NULL}
830};
831
832
833static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000834textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000835{
836 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200837 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000838 NULL};
839 PyObject *buffer, *raw;
840 char *encoding = NULL;
841 char *errors = NULL;
842 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200843 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000844 _PyIO_State *state = IO_STATE;
845
846 PyObject *res;
847 int r;
848
849 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000850 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200851 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000852 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200853 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000854 return -1;
855
856 if (newline && newline[0] != '\0'
857 && !(newline[0] == '\n' && newline[1] == '\0')
858 && !(newline[0] == '\r' && newline[1] == '\0')
859 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
860 PyErr_Format(PyExc_ValueError,
861 "illegal newline value: %s", newline);
862 return -1;
863 }
864
865 Py_CLEAR(self->buffer);
866 Py_CLEAR(self->encoding);
867 Py_CLEAR(self->encoder);
868 Py_CLEAR(self->decoder);
869 Py_CLEAR(self->readnl);
870 Py_CLEAR(self->decoded_chars);
871 Py_CLEAR(self->pending_bytes);
872 Py_CLEAR(self->snapshot);
873 Py_CLEAR(self->errors);
874 Py_CLEAR(self->raw);
875 self->decoded_chars_used = 0;
876 self->pending_bytes_count = 0;
877 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000878 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000879
880 if (encoding == NULL) {
881 /* Try os.device_encoding(fileno) */
882 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200883 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000884 /* Ignore only AttributeError and UnsupportedOperation */
885 if (fileno == NULL) {
886 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
887 PyErr_ExceptionMatches(state->unsupported_operation)) {
888 PyErr_Clear();
889 }
890 else {
891 goto error;
892 }
893 }
894 else {
Serhiy Storchaka9101e232013-01-19 12:41:45 +0200895 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500896 Py_DECREF(fileno);
897 if (fd == -1 && PyErr_Occurred()) {
898 goto error;
899 }
900
901 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000902 if (self->encoding == NULL)
903 goto error;
904 else if (!PyUnicode_Check(self->encoding))
905 Py_CLEAR(self->encoding);
906 }
907 }
908 if (encoding == NULL && self->encoding == NULL) {
909 if (state->locale_module == NULL) {
910 state->locale_module = PyImport_ImportModule("locale");
911 if (state->locale_module == NULL)
912 goto catch_ImportError;
913 else
914 goto use_locale;
915 }
916 else {
917 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200918 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200919 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000920 if (self->encoding == NULL) {
921 catch_ImportError:
922 /*
923 Importing locale can raise a ImportError because of
924 _functools, and locale.getpreferredencoding can raise a
925 ImportError if _locale is not available. These will happen
926 during module building.
927 */
928 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
929 PyErr_Clear();
930 self->encoding = PyUnicode_FromString("ascii");
931 }
932 else
933 goto error;
934 }
935 else if (!PyUnicode_Check(self->encoding))
936 Py_CLEAR(self->encoding);
937 }
938 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000939 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000940 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000941 if (encoding == NULL)
942 goto error;
943 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000944 else if (encoding != NULL) {
945 self->encoding = PyUnicode_FromString(encoding);
946 if (self->encoding == NULL)
947 goto error;
948 }
949 else {
950 PyErr_SetString(PyExc_IOError,
951 "could not determine default encoding");
952 }
953
954 if (errors == NULL)
955 errors = "strict";
956 self->errors = PyBytes_FromString(errors);
957 if (self->errors == NULL)
958 goto error;
959
960 self->chunk_size = 8192;
961 self->readuniversal = (newline == NULL || newline[0] == '\0');
962 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200963 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 self->readtranslate = (newline == NULL);
965 if (newline) {
966 self->readnl = PyUnicode_FromString(newline);
967 if (self->readnl == NULL)
968 return -1;
969 }
970 self->writetranslate = (newline == NULL || newline[0] != '\0');
971 if (!self->readuniversal && self->readnl) {
972 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000973 if (self->writenl == NULL)
974 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000975 if (!strcmp(self->writenl, "\n"))
976 self->writenl = NULL;
977 }
978#ifdef MS_WINDOWS
979 else
980 self->writenl = "\r\n";
981#endif
982
983 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200984 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000985 if (res == NULL)
986 goto error;
987 r = PyObject_IsTrue(res);
988 Py_DECREF(res);
989 if (r == -1)
990 goto error;
991 if (r == 1) {
992 self->decoder = PyCodec_IncrementalDecoder(
993 encoding, errors);
994 if (self->decoder == NULL)
995 goto error;
996
997 if (self->readuniversal) {
998 PyObject *incrementalDecoder = PyObject_CallFunction(
999 (PyObject *)&PyIncrementalNewlineDecoder_Type,
1000 "Oi", self->decoder, (int)self->readtranslate);
1001 if (incrementalDecoder == NULL)
1002 goto error;
1003 Py_CLEAR(self->decoder);
1004 self->decoder = incrementalDecoder;
1005 }
1006 }
1007
1008 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001009 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001010 if (res == NULL)
1011 goto error;
1012 r = PyObject_IsTrue(res);
1013 Py_DECREF(res);
1014 if (r == -1)
1015 goto error;
1016 if (r == 1) {
1017 PyObject *ci;
1018 self->encoder = PyCodec_IncrementalEncoder(
1019 encoding, errors);
1020 if (self->encoder == NULL)
1021 goto error;
1022 /* Get the normalized named of the codec */
1023 ci = _PyCodec_Lookup(encoding);
1024 if (ci == NULL)
1025 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001026 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001027 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001028 if (res == NULL) {
1029 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1030 PyErr_Clear();
1031 else
1032 goto error;
1033 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001034 else if (PyUnicode_Check(res)) {
1035 encodefuncentry *e = encodefuncs;
1036 while (e->name != NULL) {
1037 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1038 self->encodefunc = e->encodefunc;
1039 break;
1040 }
1041 e++;
1042 }
1043 }
1044 Py_XDECREF(res);
1045 }
1046
1047 self->buffer = buffer;
1048 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001049
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1051 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1052 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001053 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001054 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001055 if (raw == NULL) {
1056 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1057 PyErr_Clear();
1058 else
1059 goto error;
1060 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001061 else if (Py_TYPE(raw) == &PyFileIO_Type)
1062 self->raw = raw;
1063 else
1064 Py_DECREF(raw);
1065 }
1066
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001067 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001068 if (res == NULL)
1069 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001070 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001071 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001072 if (r < 0)
1073 goto error;
1074 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001075
Martin v. Löwis767046a2011-10-14 15:35:36 +02001076 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001077
Antoine Pitroue4501852009-05-14 18:55:55 +00001078 self->encoding_start_of_stream = 0;
1079 if (self->seekable && self->encoder) {
1080 PyObject *cookieObj;
1081 int cmp;
1082
1083 self->encoding_start_of_stream = 1;
1084
1085 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1086 if (cookieObj == NULL)
1087 goto error;
1088
1089 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1090 Py_DECREF(cookieObj);
1091 if (cmp < 0) {
1092 goto error;
1093 }
1094
1095 if (cmp == 0) {
1096 self->encoding_start_of_stream = 0;
1097 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1098 _PyIO_zero, NULL);
1099 if (res == NULL)
1100 goto error;
1101 Py_DECREF(res);
1102 }
1103 }
1104
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001105 self->ok = 1;
1106 return 0;
1107
1108 error:
1109 return -1;
1110}
1111
1112static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001113_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001114{
1115 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1116 return -1;
1117 self->ok = 0;
1118 Py_CLEAR(self->buffer);
1119 Py_CLEAR(self->encoding);
1120 Py_CLEAR(self->encoder);
1121 Py_CLEAR(self->decoder);
1122 Py_CLEAR(self->readnl);
1123 Py_CLEAR(self->decoded_chars);
1124 Py_CLEAR(self->pending_bytes);
1125 Py_CLEAR(self->snapshot);
1126 Py_CLEAR(self->errors);
1127 Py_CLEAR(self->raw);
1128 return 0;
1129}
1130
1131static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001132textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001133{
Antoine Pitroue033e062010-10-29 10:38:18 +00001134 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001135 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001136 return;
1137 _PyObject_GC_UNTRACK(self);
1138 if (self->weakreflist != NULL)
1139 PyObject_ClearWeakRefs((PyObject *)self);
1140 Py_CLEAR(self->dict);
1141 Py_TYPE(self)->tp_free((PyObject *)self);
1142}
1143
1144static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001145textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001146{
1147 Py_VISIT(self->buffer);
1148 Py_VISIT(self->encoding);
1149 Py_VISIT(self->encoder);
1150 Py_VISIT(self->decoder);
1151 Py_VISIT(self->readnl);
1152 Py_VISIT(self->decoded_chars);
1153 Py_VISIT(self->pending_bytes);
1154 Py_VISIT(self->snapshot);
1155 Py_VISIT(self->errors);
1156 Py_VISIT(self->raw);
1157
1158 Py_VISIT(self->dict);
1159 return 0;
1160}
1161
1162static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001163textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001164{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001165 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001166 return -1;
1167 Py_CLEAR(self->dict);
1168 return 0;
1169}
1170
1171static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173
1174/* This macro takes some shortcuts to make the common case faster. */
1175#define CHECK_CLOSED(self) \
1176 do { \
1177 int r; \
1178 PyObject *_res; \
1179 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1180 if (self->raw != NULL) \
1181 r = _PyFileIO_closed(self->raw); \
1182 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001183 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001184 if (_res == NULL) \
1185 return NULL; \
1186 r = PyObject_IsTrue(_res); \
1187 Py_DECREF(_res); \
1188 if (r < 0) \
1189 return NULL; \
1190 } \
1191 if (r > 0) { \
1192 PyErr_SetString(PyExc_ValueError, \
1193 "I/O operation on closed file."); \
1194 return NULL; \
1195 } \
1196 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001197 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001198 return NULL; \
1199 } while (0)
1200
1201#define CHECK_INITIALIZED(self) \
1202 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001203 if (self->detached) { \
1204 PyErr_SetString(PyExc_ValueError, \
1205 "underlying buffer has been detached"); \
1206 } else { \
1207 PyErr_SetString(PyExc_ValueError, \
1208 "I/O operation on uninitialized object"); \
1209 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001210 return NULL; \
1211 }
1212
1213#define CHECK_INITIALIZED_INT(self) \
1214 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001215 if (self->detached) { \
1216 PyErr_SetString(PyExc_ValueError, \
1217 "underlying buffer has been detached"); \
1218 } else { \
1219 PyErr_SetString(PyExc_ValueError, \
1220 "I/O operation on uninitialized object"); \
1221 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001222 return -1; \
1223 }
1224
1225
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001226static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001227textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001228{
1229 PyObject *buffer, *res;
1230 CHECK_INITIALIZED(self);
1231 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1232 if (res == NULL)
1233 return NULL;
1234 Py_DECREF(res);
1235 buffer = self->buffer;
1236 self->buffer = NULL;
1237 self->detached = 1;
1238 self->ok = 0;
1239 return buffer;
1240}
1241
Antoine Pitrou24f36292009-03-28 22:16:42 +00001242/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001243 underlying buffered object, though. */
1244static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001245_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001246{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001247 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248
1249 if (self->pending_bytes == NULL)
1250 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001251
1252 pending = self->pending_bytes;
1253 Py_INCREF(pending);
1254 self->pending_bytes_count = 0;
1255 Py_CLEAR(self->pending_bytes);
1256
1257 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1258 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 if (b == NULL)
1260 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001261 ret = NULL;
1262 do {
1263 ret = PyObject_CallMethodObjArgs(self->buffer,
1264 _PyIO_str_write, b, NULL);
1265 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001266 Py_DECREF(b);
1267 if (ret == NULL)
1268 return -1;
1269 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001270 return 0;
1271}
1272
1273static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001274textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001275{
1276 PyObject *ret;
1277 PyObject *text; /* owned reference */
1278 PyObject *b;
1279 Py_ssize_t textlen;
1280 int haslf = 0;
1281 int needflush = 0;
1282
1283 CHECK_INITIALIZED(self);
1284
1285 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1286 return NULL;
1287 }
1288
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001289 if (PyUnicode_READY(text) == -1)
1290 return NULL;
1291
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 CHECK_CLOSED(self);
1293
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001294 if (self->encoder == NULL)
1295 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001296
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 Py_INCREF(text);
1298
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001299 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001300
1301 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001302 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 haslf = 1;
1304
1305 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001306 PyObject *newtext = _PyObject_CallMethodId(
1307 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001308 Py_DECREF(text);
1309 if (newtext == NULL)
1310 return NULL;
1311 text = newtext;
1312 }
1313
Antoine Pitroue96ec682011-07-23 21:46:35 +02001314 if (self->write_through)
1315 needflush = 1;
1316 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001317 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001318 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001319 needflush = 1;
1320
1321 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001322 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001323 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001324 self->encoding_start_of_stream = 0;
1325 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001326 else
1327 b = PyObject_CallMethodObjArgs(self->encoder,
1328 _PyIO_str_encode, text, NULL);
1329 Py_DECREF(text);
1330 if (b == NULL)
1331 return NULL;
1332
1333 if (self->pending_bytes == NULL) {
1334 self->pending_bytes = PyList_New(0);
1335 if (self->pending_bytes == NULL) {
1336 Py_DECREF(b);
1337 return NULL;
1338 }
1339 self->pending_bytes_count = 0;
1340 }
1341 if (PyList_Append(self->pending_bytes, b) < 0) {
1342 Py_DECREF(b);
1343 return NULL;
1344 }
1345 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1346 Py_DECREF(b);
1347 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001348 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 return NULL;
1350 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001351
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001352 if (needflush) {
1353 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1354 if (ret == NULL)
1355 return NULL;
1356 Py_DECREF(ret);
1357 }
1358
1359 Py_CLEAR(self->snapshot);
1360
1361 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001362 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001363 if (ret == NULL)
1364 return NULL;
1365 Py_DECREF(ret);
1366 }
1367
1368 return PyLong_FromSsize_t(textlen);
1369}
1370
1371/* Steal a reference to chars and store it in the decoded_char buffer;
1372 */
1373static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001374textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001375{
1376 Py_CLEAR(self->decoded_chars);
1377 self->decoded_chars = chars;
1378 self->decoded_chars_used = 0;
1379}
1380
1381static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001382textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001383{
1384 PyObject *chars;
1385 Py_ssize_t avail;
1386
1387 if (self->decoded_chars == NULL)
1388 return PyUnicode_FromStringAndSize(NULL, 0);
1389
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001390 /* decoded_chars is guaranteed to be "ready". */
1391 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392 - self->decoded_chars_used);
1393
1394 assert(avail >= 0);
1395
1396 if (n < 0 || n > avail)
1397 n = avail;
1398
1399 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001400 chars = PyUnicode_Substring(self->decoded_chars,
1401 self->decoded_chars_used,
1402 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001403 if (chars == NULL)
1404 return NULL;
1405 }
1406 else {
1407 chars = self->decoded_chars;
1408 Py_INCREF(chars);
1409 }
1410
1411 self->decoded_chars_used += n;
1412 return chars;
1413}
1414
1415/* Read and decode the next chunk of data from the BufferedReader.
1416 */
1417static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001418textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001419{
1420 PyObject *dec_buffer = NULL;
1421 PyObject *dec_flags = NULL;
1422 PyObject *input_chunk = NULL;
1423 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001424 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001425 int eof;
1426
1427 /* The return value is True unless EOF was reached. The decoded string is
1428 * placed in self._decoded_chars (replacing its previous value). The
1429 * entire input chunk is sent to the decoder, though some of it may remain
1430 * buffered in the decoder, yet to be converted.
1431 */
1432
1433 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001434 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001435 return -1;
1436 }
1437
1438 if (self->telling) {
1439 /* To prepare for tell(), we need to snapshot a point in the file
1440 * where the decoder's input buffer is empty.
1441 */
1442
1443 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1444 _PyIO_str_getstate, NULL);
1445 if (state == NULL)
1446 return -1;
1447 /* Given this, we know there was a valid snapshot point
1448 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1449 */
1450 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1451 Py_DECREF(state);
1452 return -1;
1453 }
1454 Py_INCREF(dec_buffer);
1455 Py_INCREF(dec_flags);
1456 Py_DECREF(state);
1457 }
1458
1459 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001460 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001461 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001462 }
1463 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001464 if (chunk_size == NULL)
1465 goto fail;
1466 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001467 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1468 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001469 Py_DECREF(chunk_size);
1470 if (input_chunk == NULL)
1471 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001472 if (!PyBytes_Check(input_chunk)) {
1473 PyErr_Format(PyExc_TypeError,
1474 "underlying %s() should have returned a bytes object, "
1475 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1476 Py_TYPE(input_chunk)->tp_name);
1477 goto fail;
1478 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001479
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001480 nbytes = PyBytes_Size(input_chunk);
1481 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001482
1483 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1484 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1485 self->decoder, input_chunk, eof);
1486 }
1487 else {
1488 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1489 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1490 }
1491
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001492 if (check_decoded(decoded_chars) < 0)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001493 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001494 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001495 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001496 if (nchars > 0)
1497 self->b2cratio = (double) nbytes / nchars;
1498 else
1499 self->b2cratio = 0.0;
1500 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001501 eof = 0;
1502
1503 if (self->telling) {
1504 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1505 * next input to be decoded is dec_buffer + input_chunk.
1506 */
1507 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1508 if (next_input == NULL)
1509 goto fail;
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001510 if (!PyBytes_Check(next_input)) {
1511 PyErr_Format(PyExc_TypeError,
1512 "decoder getstate() should have returned a bytes "
1513 "object, not '%.200s'",
1514 Py_TYPE(next_input)->tp_name);
1515 Py_DECREF(next_input);
1516 goto fail;
1517 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 Py_DECREF(dec_buffer);
1519 Py_CLEAR(self->snapshot);
1520 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1521 }
1522 Py_DECREF(input_chunk);
1523
1524 return (eof == 0);
1525
1526 fail:
1527 Py_XDECREF(dec_buffer);
1528 Py_XDECREF(dec_flags);
1529 Py_XDECREF(input_chunk);
1530 return -1;
1531}
1532
1533static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001534textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001535{
1536 Py_ssize_t n = -1;
1537 PyObject *result = NULL, *chunks = NULL;
1538
1539 CHECK_INITIALIZED(self);
1540
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001541 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001542 return NULL;
1543
1544 CHECK_CLOSED(self);
1545
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001546 if (self->decoder == NULL)
1547 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001548
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001549 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001550 return NULL;
1551
1552 if (n < 0) {
1553 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001554 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001555 PyObject *decoded;
1556 if (bytes == NULL)
1557 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001558
1559 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1560 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1561 bytes, 1);
1562 else
1563 decoded = PyObject_CallMethodObjArgs(
1564 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001565 Py_DECREF(bytes);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02001566 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567 goto fail;
1568
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001569 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001570
1571 if (result == NULL) {
1572 Py_DECREF(decoded);
1573 return NULL;
1574 }
1575
1576 PyUnicode_AppendAndDel(&result, decoded);
1577 if (result == NULL)
1578 goto fail;
1579
1580 Py_CLEAR(self->snapshot);
1581 return result;
1582 }
1583 else {
1584 int res = 1;
1585 Py_ssize_t remaining = n;
1586
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001587 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001588 if (result == NULL)
1589 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001590 if (PyUnicode_READY(result) == -1)
1591 goto fail;
1592 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593
1594 /* Keep reading chunks until we have n characters to return */
1595 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001596 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001597 if (res < 0) {
1598 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1599 when EINTR occurs so we needn't do it ourselves. */
1600 if (_PyIO_trap_eintr()) {
1601 continue;
1602 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001603 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001604 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001605 if (res == 0) /* EOF */
1606 break;
1607 if (chunks == NULL) {
1608 chunks = PyList_New(0);
1609 if (chunks == NULL)
1610 goto fail;
1611 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001612 if (PyUnicode_GET_LENGTH(result) > 0 &&
1613 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001614 goto fail;
1615 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001616 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001617 if (result == NULL)
1618 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001619 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620 }
1621 if (chunks != NULL) {
1622 if (result != NULL && PyList_Append(chunks, result) < 0)
1623 goto fail;
1624 Py_CLEAR(result);
1625 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1626 if (result == NULL)
1627 goto fail;
1628 Py_CLEAR(chunks);
1629 }
1630 return result;
1631 }
1632 fail:
1633 Py_XDECREF(result);
1634 Py_XDECREF(chunks);
1635 return NULL;
1636}
1637
1638
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001639/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640 that is to the NUL character. Otherwise the function will produce
1641 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642static char *
1643find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001644{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001645 if (kind == PyUnicode_1BYTE_KIND) {
1646 assert(ch < 256);
1647 return (char *) memchr((void *) s, (char) ch, end - s);
1648 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001650 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001651 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001652 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001653 return s;
1654 if (s == end)
1655 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001656 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001657 }
1658}
1659
1660Py_ssize_t
1661_PyIO_find_line_ending(
1662 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001663 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001665 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001666
1667 if (translated) {
1668 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001669 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001671 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 else {
1673 *consumed = len;
1674 return -1;
1675 }
1676 }
1677 else if (universal) {
1678 /* Universal newline search. Find any of \r, \r\n, \n
1679 * The decoder ensures that \r\n are not split in two pieces
1680 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001681 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001682 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001683 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001684 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001685 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001686 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001687 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (s >= end) {
1689 *consumed = len;
1690 return -1;
1691 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001692 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001693 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001694 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001695 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001696 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001697 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001699 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001700 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001701 }
1702 }
1703 }
1704 else {
1705 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001706 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1707 char *nl = PyUnicode_DATA(readnl);
1708 /* Assume that readnl is an ASCII character. */
1709 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001710 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001711 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001712 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001713 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 *consumed = len;
1715 return -1;
1716 }
1717 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001718 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001719 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001720 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001721 if (e < s)
1722 e = s;
1723 while (s < e) {
1724 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001725 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001726 if (pos == NULL || pos >= e)
1727 break;
1728 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001729 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001730 break;
1731 }
1732 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001733 return (pos - start)/kind + readnl_len;
1734 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001736 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001737 if (pos == NULL)
1738 *consumed = len;
1739 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001740 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 return -1;
1742 }
1743 }
1744}
1745
1746static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001747_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001748{
1749 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1750 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1751 int res;
1752
1753 CHECK_CLOSED(self);
1754
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001755 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 return NULL;
1757
1758 chunked = 0;
1759
1760 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001761 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001762 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001763 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 Py_ssize_t consumed = 0;
1765
1766 /* First, get some data if necessary */
1767 res = 1;
1768 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001769 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001770 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001771 if (res < 0) {
1772 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1773 when EINTR occurs so we needn't do it ourselves. */
1774 if (_PyIO_trap_eintr()) {
1775 continue;
1776 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001778 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001779 if (res == 0)
1780 break;
1781 }
1782 if (res == 0) {
1783 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001784 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 Py_CLEAR(self->snapshot);
1786 start = endpos = offset_to_buffer = 0;
1787 break;
1788 }
1789
1790 if (remaining == NULL) {
1791 line = self->decoded_chars;
1792 start = self->decoded_chars_used;
1793 offset_to_buffer = 0;
1794 Py_INCREF(line);
1795 }
1796 else {
1797 assert(self->decoded_chars_used == 0);
1798 line = PyUnicode_Concat(remaining, self->decoded_chars);
1799 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001800 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001801 Py_CLEAR(remaining);
1802 if (line == NULL)
1803 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001804 if (PyUnicode_READY(line) == -1)
1805 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001806 }
1807
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001808 ptr = PyUnicode_DATA(line);
1809 line_len = PyUnicode_GET_LENGTH(line);
1810 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001811
1812 endpos = _PyIO_find_line_ending(
1813 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001814 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001815 ptr + kind * start,
1816 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 if (endpos >= 0) {
1819 endpos += start;
1820 if (limit >= 0 && (endpos - start) + chunked >= limit)
1821 endpos = start + limit - chunked;
1822 break;
1823 }
1824
1825 /* We can put aside up to `endpos` */
1826 endpos = consumed + start;
1827 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1828 /* Didn't find line ending, but reached length limit */
1829 endpos = start + limit - chunked;
1830 break;
1831 }
1832
1833 if (endpos > start) {
1834 /* No line ending seen yet - put aside current data */
1835 PyObject *s;
1836 if (chunks == NULL) {
1837 chunks = PyList_New(0);
1838 if (chunks == NULL)
1839 goto error;
1840 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001841 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001842 if (s == NULL)
1843 goto error;
1844 if (PyList_Append(chunks, s) < 0) {
1845 Py_DECREF(s);
1846 goto error;
1847 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001848 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001849 Py_DECREF(s);
1850 }
1851 /* There may be some remaining bytes we'll have to prepend to the
1852 next chunk of data */
1853 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001854 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001855 if (remaining == NULL)
1856 goto error;
1857 }
1858 Py_CLEAR(line);
1859 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001860 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001861 }
1862
1863 if (line != NULL) {
1864 /* Our line ends in the current buffer */
1865 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001866 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1867 PyObject *s = PyUnicode_Substring(line, start, endpos);
1868 Py_CLEAR(line);
1869 if (s == NULL)
1870 goto error;
1871 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001872 }
1873 }
1874 if (remaining != NULL) {
1875 if (chunks == NULL) {
1876 chunks = PyList_New(0);
1877 if (chunks == NULL)
1878 goto error;
1879 }
1880 if (PyList_Append(chunks, remaining) < 0)
1881 goto error;
1882 Py_CLEAR(remaining);
1883 }
1884 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001885 if (line != NULL) {
1886 if (PyList_Append(chunks, line) < 0)
1887 goto error;
1888 Py_DECREF(line);
1889 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001890 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1891 if (line == NULL)
1892 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001893 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001894 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001895 if (line == NULL) {
1896 Py_INCREF(_PyIO_empty_str);
1897 line = _PyIO_empty_str;
1898 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001899
1900 return line;
1901
1902 error:
1903 Py_XDECREF(chunks);
1904 Py_XDECREF(remaining);
1905 Py_XDECREF(line);
1906 return NULL;
1907}
1908
1909static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001910textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001911{
1912 Py_ssize_t limit = -1;
1913
1914 CHECK_INITIALIZED(self);
1915 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1916 return NULL;
1917 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001918 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001919}
1920
1921/* Seek and Tell */
1922
1923typedef struct {
1924 Py_off_t start_pos;
1925 int dec_flags;
1926 int bytes_to_feed;
1927 int chars_to_skip;
1928 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001929} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001930
1931/*
1932 To speed up cookie packing/unpacking, we store the fields in a temporary
1933 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1934 The following macros define at which offsets in the intermediary byte
1935 string the various CookieStruct fields will be stored.
1936 */
1937
1938#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1939
1940#if defined(WORDS_BIGENDIAN)
1941
1942# define IS_LITTLE_ENDIAN 0
1943
1944/* We want the least significant byte of start_pos to also be the least
1945 significant byte of the cookie, which means that in big-endian mode we
1946 must copy the fields in reverse order. */
1947
1948# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1949# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1950# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1951# define OFF_CHARS_TO_SKIP (sizeof(char))
1952# define OFF_NEED_EOF 0
1953
1954#else
1955
1956# define IS_LITTLE_ENDIAN 1
1957
1958/* Little-endian mode: the least significant byte of start_pos will
1959 naturally end up the least significant byte of the cookie. */
1960
1961# define OFF_START_POS 0
1962# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1963# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1964# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1965# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1966
1967#endif
1968
1969static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001970textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971{
1972 unsigned char buffer[COOKIE_BUF_LEN];
1973 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1974 if (cookieLong == NULL)
1975 return -1;
1976
1977 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1978 IS_LITTLE_ENDIAN, 0) < 0) {
1979 Py_DECREF(cookieLong);
1980 return -1;
1981 }
1982 Py_DECREF(cookieLong);
1983
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001984 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1985 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1986 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1987 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1988 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989
1990 return 0;
1991}
1992
1993static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001994textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001995{
1996 unsigned char buffer[COOKIE_BUF_LEN];
1997
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001998 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1999 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2000 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2001 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2002 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002003
2004 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
2005}
2006#undef IS_LITTLE_ENDIAN
2007
2008static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002009_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002010{
2011 PyObject *res;
2012 /* When seeking to the start of the stream, we call decoder.reset()
2013 rather than decoder.getstate().
2014 This is for a few decoders such as utf-16 for which the state value
2015 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2016 utf-16, that we are expecting a BOM).
2017 */
2018 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
2019 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
2020 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002021 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2022 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002023 if (res == NULL)
2024 return -1;
2025 Py_DECREF(res);
2026 return 0;
2027}
2028
Antoine Pitroue4501852009-05-14 18:55:55 +00002029static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002030_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002031{
2032 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002033 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002034 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2035 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2036 self->encoding_start_of_stream = 1;
2037 }
2038 else {
2039 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2040 _PyIO_zero, NULL);
2041 self->encoding_start_of_stream = 0;
2042 }
2043 if (res == NULL)
2044 return -1;
2045 Py_DECREF(res);
2046 return 0;
2047}
2048
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002050textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002051{
2052 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002053 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002054 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 PyObject *res;
2056 int cmp;
2057
2058 CHECK_INITIALIZED(self);
2059
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002060 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2061 return NULL;
2062 CHECK_CLOSED(self);
2063
2064 Py_INCREF(cookieObj);
2065
2066 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002067 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 goto fail;
2069 }
2070
2071 if (whence == 1) {
2072 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002073 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 if (cmp < 0)
2075 goto fail;
2076
2077 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002078 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002079 goto fail;
2080 }
2081
2082 /* Seeking to the current position should attempt to
2083 * sync the underlying buffer with the current position.
2084 */
2085 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002086 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002087 if (cookieObj == NULL)
2088 goto fail;
2089 }
2090 else if (whence == 2) {
2091 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002092 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002093 if (cmp < 0)
2094 goto fail;
2095
2096 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002097 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002098 goto fail;
2099 }
2100
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002101 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 if (res == NULL)
2103 goto fail;
2104 Py_DECREF(res);
2105
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002106 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002107 Py_CLEAR(self->snapshot);
2108 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002109 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002110 if (res == NULL)
2111 goto fail;
2112 Py_DECREF(res);
2113 }
2114
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002115 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002116 Py_XDECREF(cookieObj);
2117 return res;
2118 }
2119 else if (whence != 0) {
2120 PyErr_Format(PyExc_ValueError,
2121 "invalid whence (%d, should be 0, 1 or 2)", whence);
2122 goto fail;
2123 }
2124
Antoine Pitroue4501852009-05-14 18:55:55 +00002125 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002126 if (cmp < 0)
2127 goto fail;
2128
2129 if (cmp == 1) {
2130 PyErr_Format(PyExc_ValueError,
2131 "negative seek position %R", cookieObj);
2132 goto fail;
2133 }
2134
2135 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2136 if (res == NULL)
2137 goto fail;
2138 Py_DECREF(res);
2139
2140 /* The strategy of seek() is to go back to the safe start point
2141 * and replay the effect of read(chars_to_skip) from there.
2142 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002143 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002144 goto fail;
2145
2146 /* Seek back to the safe start point. */
2147 posobj = PyLong_FromOff_t(cookie.start_pos);
2148 if (posobj == NULL)
2149 goto fail;
2150 res = PyObject_CallMethodObjArgs(self->buffer,
2151 _PyIO_str_seek, posobj, NULL);
2152 Py_DECREF(posobj);
2153 if (res == NULL)
2154 goto fail;
2155 Py_DECREF(res);
2156
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002157 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002158 Py_CLEAR(self->snapshot);
2159
2160 /* Restore the decoder to its state from the safe start point. */
2161 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002162 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002163 goto fail;
2164 }
2165
2166 if (cookie.chars_to_skip) {
2167 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002168 PyObject *input_chunk = _PyObject_CallMethodId(
2169 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170 PyObject *decoded;
2171
2172 if (input_chunk == NULL)
2173 goto fail;
2174
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002175 if (!PyBytes_Check(input_chunk)) {
2176 PyErr_Format(PyExc_TypeError,
2177 "underlying read() should have returned a bytes "
2178 "object, not '%.200s'",
2179 Py_TYPE(input_chunk)->tp_name);
2180 Py_DECREF(input_chunk);
2181 goto fail;
2182 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183
2184 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2185 if (self->snapshot == NULL) {
2186 Py_DECREF(input_chunk);
2187 goto fail;
2188 }
2189
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002190 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2191 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002193 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002194 goto fail;
2195
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002196 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002197
2198 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002199 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002200 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2201 goto fail;
2202 }
2203 self->decoded_chars_used = cookie.chars_to_skip;
2204 }
2205 else {
2206 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2207 if (self->snapshot == NULL)
2208 goto fail;
2209 }
2210
Antoine Pitroue4501852009-05-14 18:55:55 +00002211 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2212 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002213 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002214 goto fail;
2215 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002216 return cookieObj;
2217 fail:
2218 Py_XDECREF(cookieObj);
2219 return NULL;
2220
2221}
2222
2223static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002224textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225{
2226 PyObject *res;
2227 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002228 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002229 PyObject *next_input;
2230 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002231 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 PyObject *saved_state = NULL;
2233 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002234 char *dec_buffer;
2235 Py_ssize_t dec_buffer_len;
2236 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237
2238 CHECK_INITIALIZED(self);
2239 CHECK_CLOSED(self);
2240
2241 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002242 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002243 goto fail;
2244 }
2245 if (!self->telling) {
2246 PyErr_SetString(PyExc_IOError,
2247 "telling position disabled by next() call");
2248 goto fail;
2249 }
2250
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002251 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002252 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002253 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002254 if (res == NULL)
2255 goto fail;
2256 Py_DECREF(res);
2257
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002258 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002259 if (posobj == NULL)
2260 goto fail;
2261
2262 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002263 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002264 return posobj;
2265 }
2266
2267#if defined(HAVE_LARGEFILE_SUPPORT)
2268 cookie.start_pos = PyLong_AsLongLong(posobj);
2269#else
2270 cookie.start_pos = PyLong_AsLong(posobj);
2271#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002272 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002273 if (PyErr_Occurred())
2274 goto fail;
2275
2276 /* Skip backward to the snapshot point (see _read_chunk). */
2277 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2278 goto fail;
2279
2280 assert (PyBytes_Check(next_input));
2281
2282 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2283
2284 /* How many decoded characters have been used up since the snapshot? */
2285 if (self->decoded_chars_used == 0) {
2286 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002287 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002288 }
2289
2290 chars_to_skip = self->decoded_chars_used;
2291
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002292 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002293 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2294 _PyIO_str_getstate, NULL);
2295 if (saved_state == NULL)
2296 goto fail;
2297
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002298#define DECODER_GETSTATE() do { \
2299 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2300 _PyIO_str_getstate, NULL); \
2301 if (_state == NULL) \
2302 goto fail; \
2303 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2304 Py_DECREF(_state); \
2305 goto fail; \
2306 } \
2307 Py_DECREF(_state); \
2308 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002309
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002310#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002311 PyObject *_decoded = _PyObject_CallMethodId( \
2312 self->decoder, &PyId_decode, "y#", start, len); \
Serhiy Storchakad03ce4a2013-02-03 17:07:32 +02002313 if (check_decoded(_decoded) < 0) \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002314 goto fail; \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002315 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002316 Py_DECREF(_decoded); \
2317 } while (0)
2318
2319 /* Fast search for an acceptable start point, close to our
2320 current pos */
2321 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2322 skip_back = 1;
2323 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2324 input = PyBytes_AS_STRING(next_input);
2325 while (skip_bytes > 0) {
2326 /* Decode up to temptative start point */
2327 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2328 goto fail;
2329 DECODER_DECODE(input, skip_bytes, chars_decoded);
2330 if (chars_decoded <= chars_to_skip) {
2331 DECODER_GETSTATE();
2332 if (dec_buffer_len == 0) {
2333 /* Before pos and no bytes buffered in decoder => OK */
2334 cookie.dec_flags = dec_flags;
2335 chars_to_skip -= chars_decoded;
2336 break;
2337 }
2338 /* Skip back by buffered amount and reset heuristic */
2339 skip_bytes -= dec_buffer_len;
2340 skip_back = 1;
2341 }
2342 else {
2343 /* We're too far ahead, skip back a bit */
2344 skip_bytes -= skip_back;
2345 skip_back *= 2;
2346 }
2347 }
2348 if (skip_bytes <= 0) {
2349 skip_bytes = 0;
2350 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2351 goto fail;
2352 }
2353
2354 /* Note our initial start point. */
2355 cookie.start_pos += skip_bytes;
2356 cookie.chars_to_skip = chars_to_skip;
2357 if (chars_to_skip == 0)
2358 goto finally;
2359
2360 /* We should be close to the desired position. Now feed the decoder one
2361 * byte at a time until we reach the `chars_to_skip` target.
2362 * As we go, note the nearest "safe start point" before the current
2363 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 * can safely start from there and advance to this location).
2365 */
2366 chars_decoded = 0;
2367 input = PyBytes_AS_STRING(next_input);
2368 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002369 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002371 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002372
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002373 DECODER_DECODE(input, 1, n);
2374 /* We got n chars for 1 byte */
2375 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002376 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002377 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002378
2379 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2380 /* Decoder buffer is empty, so this is a safe start point. */
2381 cookie.start_pos += cookie.bytes_to_feed;
2382 chars_to_skip -= chars_decoded;
2383 cookie.dec_flags = dec_flags;
2384 cookie.bytes_to_feed = 0;
2385 chars_decoded = 0;
2386 }
2387 if (chars_decoded >= chars_to_skip)
2388 break;
2389 input++;
2390 }
2391 if (input == input_end) {
2392 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002393 PyObject *decoded = _PyObject_CallMethodId(
2394 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Serhiy Storchaka94dc6732013-02-03 17:03:31 +02002395 if (check_decoded(decoded) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002397 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002398 Py_DECREF(decoded);
2399 cookie.need_eof = 1;
2400
2401 if (chars_decoded < chars_to_skip) {
2402 PyErr_SetString(PyExc_IOError,
2403 "can't reconstruct logical file position");
2404 goto fail;
2405 }
2406 }
2407
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002408finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002409 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002410 Py_DECREF(saved_state);
2411 if (res == NULL)
2412 return NULL;
2413 Py_DECREF(res);
2414
2415 /* The returned cookie corresponds to the last safe start point. */
2416 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002417 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002418
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002419fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002420 if (saved_state) {
2421 PyObject *type, *value, *traceback;
2422 PyErr_Fetch(&type, &value, &traceback);
2423
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002424 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002425 Py_DECREF(saved_state);
2426 if (res == NULL)
2427 return NULL;
2428 Py_DECREF(res);
2429
2430 PyErr_Restore(type, value, traceback);
2431 }
2432 return NULL;
2433}
2434
2435static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002436textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002437{
2438 PyObject *pos = Py_None;
2439 PyObject *res;
2440
2441 CHECK_INITIALIZED(self)
2442 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2443 return NULL;
2444 }
2445
2446 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2447 if (res == NULL)
2448 return NULL;
2449 Py_DECREF(res);
2450
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002451 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002452}
2453
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002454static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002455textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002456{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002457 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002458
2459 CHECK_INITIALIZED(self);
2460
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002461 res = PyUnicode_FromString("<_io.TextIOWrapper");
2462 if (res == NULL)
2463 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002464 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002465 if (nameobj == NULL) {
2466 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2467 PyErr_Clear();
2468 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002469 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002470 }
2471 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002472 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002473 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002474 if (s == NULL)
2475 goto error;
2476 PyUnicode_AppendAndDel(&res, s);
2477 if (res == NULL)
2478 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002479 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002480 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002481 if (modeobj == NULL) {
2482 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2483 PyErr_Clear();
2484 else
2485 goto error;
2486 }
2487 else {
2488 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2489 Py_DECREF(modeobj);
2490 if (s == NULL)
2491 goto error;
2492 PyUnicode_AppendAndDel(&res, s);
2493 if (res == NULL)
2494 return NULL;
2495 }
2496 s = PyUnicode_FromFormat("%U encoding=%R>",
2497 res, self->encoding);
2498 Py_DECREF(res);
2499 return s;
2500error:
2501 Py_XDECREF(res);
2502 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002503}
2504
2505
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506/* Inquiries */
2507
2508static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002509textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510{
2511 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002512 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513}
2514
2515static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002516textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517{
2518 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002519 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520}
2521
2522static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002523textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002524{
2525 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002526 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002527}
2528
2529static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002530textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002531{
2532 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002533 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534}
2535
2536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002537textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538{
2539 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002540 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002541}
2542
2543static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002544textiowrapper_getstate(textio *self, PyObject *args)
2545{
2546 PyErr_Format(PyExc_TypeError,
2547 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2548 return NULL;
2549}
2550
2551static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002552textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002553{
2554 CHECK_INITIALIZED(self);
2555 CHECK_CLOSED(self);
2556 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002557 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002558 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002559 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002560}
2561
2562static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002563textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002564{
2565 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002566 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002568
Antoine Pitrou6be88762010-05-03 16:48:20 +00002569 res = textiowrapper_closed_get(self, NULL);
2570 if (res == NULL)
2571 return NULL;
2572 r = PyObject_IsTrue(res);
2573 Py_DECREF(res);
2574 if (r < 0)
2575 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002576
Antoine Pitrou6be88762010-05-03 16:48:20 +00002577 if (r > 0) {
2578 Py_RETURN_NONE; /* stream already closed */
2579 }
2580 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002581 PyObject *exc = NULL, *val, *tb;
Antoine Pitroue033e062010-10-29 10:38:18 +00002582 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002583 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002584 if (res)
2585 Py_DECREF(res);
2586 else
2587 PyErr_Clear();
2588 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002589 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002590 if (res == NULL)
2591 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002592 else
2593 Py_DECREF(res);
2594
Benjamin Peterson68623612012-12-20 11:53:11 -06002595 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2596 if (exc != NULL) {
2597 if (res != NULL) {
2598 Py_CLEAR(res);
2599 PyErr_Restore(exc, val, tb);
2600 }
2601 else {
2602 PyObject *val2;
2603 Py_DECREF(exc);
2604 Py_XDECREF(tb);
2605 PyErr_Fetch(&exc, &val2, &tb);
2606 PyErr_NormalizeException(&exc, &val2, &tb);
2607 PyException_SetContext(val2, val);
2608 PyErr_Restore(exc, val2, tb);
2609 }
2610 }
2611 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002612 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002613}
2614
2615static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002616textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002617{
2618 PyObject *line;
2619
2620 CHECK_INITIALIZED(self);
2621
2622 self->telling = 0;
2623 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2624 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002625 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002626 }
2627 else {
2628 line = PyObject_CallMethodObjArgs((PyObject *)self,
2629 _PyIO_str_readline, NULL);
2630 if (line && !PyUnicode_Check(line)) {
2631 PyErr_Format(PyExc_IOError,
2632 "readline() should have returned an str object, "
2633 "not '%.200s'", Py_TYPE(line)->tp_name);
2634 Py_DECREF(line);
2635 return NULL;
2636 }
2637 }
2638
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002639 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002640 return NULL;
2641
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002642 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 /* Reached EOF or would have blocked */
2644 Py_DECREF(line);
2645 Py_CLEAR(self->snapshot);
2646 self->telling = self->seekable;
2647 return NULL;
2648 }
2649
2650 return line;
2651}
2652
2653static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002654textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002655{
2656 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002657 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002658}
2659
2660static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002661textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662{
2663 CHECK_INITIALIZED(self);
2664 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2665}
2666
2667static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002668textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002669{
2670 PyObject *res;
2671 CHECK_INITIALIZED(self);
2672 if (self->decoder == NULL)
2673 Py_RETURN_NONE;
2674 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2675 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002676 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2677 PyErr_Clear();
2678 Py_RETURN_NONE;
2679 }
2680 else {
2681 return NULL;
2682 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002683 }
2684 return res;
2685}
2686
2687static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002688textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002689{
2690 CHECK_INITIALIZED(self);
2691 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2692}
2693
2694static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002695textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696{
2697 CHECK_INITIALIZED(self);
2698 return PyLong_FromSsize_t(self->chunk_size);
2699}
2700
2701static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002702textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703{
2704 Py_ssize_t n;
2705 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002706 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002707 if (n == -1 && PyErr_Occurred())
2708 return -1;
2709 if (n <= 0) {
2710 PyErr_SetString(PyExc_ValueError,
2711 "a strictly positive integer is required");
2712 return -1;
2713 }
2714 self->chunk_size = n;
2715 return 0;
2716}
2717
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718static PyMethodDef textiowrapper_methods[] = {
2719 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2720 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2721 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2722 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2723 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2724 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002725
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002726 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2727 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2728 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2729 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2730 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002731 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002732
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002733 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2734 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2735 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002736 {NULL, NULL}
2737};
2738
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002739static PyMemberDef textiowrapper_members[] = {
2740 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2741 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2742 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743 {NULL}
2744};
2745
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002746static PyGetSetDef textiowrapper_getset[] = {
2747 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2748 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002749/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2750*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002751 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2752 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2753 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2754 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002755 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756};
2757
2758PyTypeObject PyTextIOWrapper_Type = {
2759 PyVarObject_HEAD_INIT(NULL, 0)
2760 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002761 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002763 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002764 0, /*tp_print*/
2765 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002766 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002767 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002768 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002769 0, /*tp_as_number*/
2770 0, /*tp_as_sequence*/
2771 0, /*tp_as_mapping*/
2772 0, /*tp_hash */
2773 0, /*tp_call*/
2774 0, /*tp_str*/
2775 0, /*tp_getattro*/
2776 0, /*tp_setattro*/
2777 0, /*tp_as_buffer*/
2778 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2779 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002780 textiowrapper_doc, /* tp_doc */
2781 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2782 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002783 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002784 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002785 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002786 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2787 textiowrapper_methods, /* tp_methods */
2788 textiowrapper_members, /* tp_members */
2789 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002790 0, /* tp_base */
2791 0, /* tp_dict */
2792 0, /* tp_descr_get */
2793 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002794 offsetof(textio, dict), /*tp_dictoffset*/
2795 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002796 0, /* tp_alloc */
2797 PyType_GenericNew, /* tp_new */
2798};