blob: ffaa945b557b7f64ad8d5c751041877bc481fb3e [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200633 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200638 "newline controls how line endings are handled. It can be None, '',\n"
639 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
640 "\n"
641 "* On input, if newline is None, universal newlines mode is\n"
642 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
643 " these are translated into '\\n' before being returned to the\n"
644 " caller. If it is '', universal newline mode is enabled, but line\n"
645 " endings are returned to the caller untranslated. If it has any of\n"
646 " the other legal values, input lines are only terminated by the given\n"
647 " string, and the line ending is returned to the caller untranslated.\n"
648 "\n"
649 "* On output, if newline is None, any '\\n' characters written are\n"
650 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300651 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200652 " of the other legal values, any '\\n' characters written are translated\n"
653 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 "\n"
655 "If line_buffering is True, a call to flush is implied when a call to\n"
656 "write contains a newline character."
657 );
658
659typedef PyObject *
660 (*encodefunc_t)(PyObject *, PyObject *);
661
662typedef struct
663{
664 PyObject_HEAD
665 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000666 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 Py_ssize_t chunk_size;
668 PyObject *buffer;
669 PyObject *encoding;
670 PyObject *encoder;
671 PyObject *decoder;
672 PyObject *readnl;
673 PyObject *errors;
674 const char *writenl; /* utf-8 encoded, NULL stands for \n */
675 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200676 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 char readuniversal;
678 char readtranslate;
679 char writetranslate;
680 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200681 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000683 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 /* Specialized encoding func (see below) */
685 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000686 /* Whether or not it's the start of the stream */
687 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
689 /* Reads and writes are internally buffered in order to speed things up.
690 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000691
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 Please also note that text to be written is first encoded before being
693 buffered. This is necessary so that encoding errors are immediately
694 reported to the caller, but it unfortunately means that the
695 IncrementalEncoder (whose encode() method is always written in Python)
696 becomes a bottleneck for small writes.
697 */
698 PyObject *decoded_chars; /* buffer for text returned from decoder */
699 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
700 PyObject *pending_bytes; /* list of bytes objects waiting to be
701 written, or NULL */
702 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 /* snapshot is either None, or a tuple (dec_flags, next_input) where
705 * dec_flags is the second (integer) item of the decoder state and
706 * next_input is the chunk of input bytes that comes next after the
707 * snapshot point. We use this to reconstruct decoder states in tell().
708 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000709 PyObject *snapshot;
710 /* Bytes-to-characters ratio for the current chunk. Serves as input for
711 the heuristic in tell(). */
712 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714 /* Cache raw object if it's a FileIO object */
715 PyObject *raw;
716
717 PyObject *weakreflist;
718 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720
721
722/* A couple of specialized cases in order to bypass the slow incremental
723 encoding methods for the most popular encodings. */
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF16(text,
742 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747{
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000753 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000755 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF16(text,
757 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758}
759
Antoine Pitroue4501852009-05-14 18:55:55 +0000760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100770 return _PyUnicode_EncodeUTF32(text,
771 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000776{
777 if (!self->encoding_start_of_stream) {
778 /* Skip the BOM and use native byte ordering */
779#if defined(WORDS_BIGENDIAN)
780 return utf32be_encode(self, text);
781#else
782 return utf32le_encode(self, text);
783#endif
784 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100785 return _PyUnicode_EncodeUTF32(text,
786 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799}
800
801/* Map normalized encoding names onto the specialized encoding funcs */
802
803typedef struct {
804 const char *name;
805 encodefunc_t encodefunc;
806} encodefuncentry;
807
Antoine Pitrou24f36292009-03-28 22:16:42 +0000808static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {"ascii", (encodefunc_t) ascii_encode},
810 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {"utf-16-be", (encodefunc_t) utf16be_encode},
813 {"utf-16-le", (encodefunc_t) utf16le_encode},
814 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-32-be", (encodefunc_t) utf32be_encode},
816 {"utf-32-le", (encodefunc_t) utf32le_encode},
817 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 {NULL, NULL}
819};
820
821
822static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 NULL};
828 PyObject *buffer, *raw;
829 char *encoding = NULL;
830 char *errors = NULL;
831 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 _PyIO_State *state = IO_STATE;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000839 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000867 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868
869 if (encoding == NULL) {
870 /* Try os.device_encoding(fileno) */
871 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 /* Ignore only AttributeError and UnsupportedOperation */
874 if (fileno == NULL) {
875 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 PyErr_ExceptionMatches(state->unsupported_operation)) {
877 PyErr_Clear();
878 }
879 else {
880 goto error;
881 }
882 }
883 else {
Serhiy Storchaka9101e232013-01-19 12:41:45 +0200884 int fd = _PyLong_AsInt(fileno);
Brett Cannonefb00c02012-02-29 18:31:31 -0500885 Py_DECREF(fileno);
886 if (fd == -1 && PyErr_Occurred()) {
887 goto error;
888 }
889
890 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 if (self->encoding == NULL)
892 goto error;
893 else if (!PyUnicode_Check(self->encoding))
894 Py_CLEAR(self->encoding);
895 }
896 }
897 if (encoding == NULL && self->encoding == NULL) {
898 if (state->locale_module == NULL) {
899 state->locale_module = PyImport_ImportModule("locale");
900 if (state->locale_module == NULL)
901 goto catch_ImportError;
902 else
903 goto use_locale;
904 }
905 else {
906 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200907 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200908 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 if (self->encoding == NULL) {
910 catch_ImportError:
911 /*
912 Importing locale can raise a ImportError because of
913 _functools, and locale.getpreferredencoding can raise a
914 ImportError if _locale is not available. These will happen
915 during module building.
916 */
917 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
918 PyErr_Clear();
919 self->encoding = PyUnicode_FromString("ascii");
920 }
921 else
922 goto error;
923 }
924 else if (!PyUnicode_Check(self->encoding))
925 Py_CLEAR(self->encoding);
926 }
927 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000928 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000930 if (encoding == NULL)
931 goto error;
932 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 else if (encoding != NULL) {
934 self->encoding = PyUnicode_FromString(encoding);
935 if (self->encoding == NULL)
936 goto error;
937 }
938 else {
939 PyErr_SetString(PyExc_IOError,
940 "could not determine default encoding");
941 }
942
943 if (errors == NULL)
944 errors = "strict";
945 self->errors = PyBytes_FromString(errors);
946 if (self->errors == NULL)
947 goto error;
948
949 self->chunk_size = 8192;
950 self->readuniversal = (newline == NULL || newline[0] == '\0');
951 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200952 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 self->readtranslate = (newline == NULL);
954 if (newline) {
955 self->readnl = PyUnicode_FromString(newline);
956 if (self->readnl == NULL)
957 return -1;
958 }
959 self->writetranslate = (newline == NULL || newline[0] != '\0');
960 if (!self->readuniversal && self->readnl) {
961 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000962 if (self->writenl == NULL)
963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (!strcmp(self->writenl, "\n"))
965 self->writenl = NULL;
966 }
967#ifdef MS_WINDOWS
968 else
969 self->writenl = "\r\n";
970#endif
971
972 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200973 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (res == NULL)
975 goto error;
976 r = PyObject_IsTrue(res);
977 Py_DECREF(res);
978 if (r == -1)
979 goto error;
980 if (r == 1) {
981 self->decoder = PyCodec_IncrementalDecoder(
982 encoding, errors);
983 if (self->decoder == NULL)
984 goto error;
985
986 if (self->readuniversal) {
987 PyObject *incrementalDecoder = PyObject_CallFunction(
988 (PyObject *)&PyIncrementalNewlineDecoder_Type,
989 "Oi", self->decoder, (int)self->readtranslate);
990 if (incrementalDecoder == NULL)
991 goto error;
992 Py_CLEAR(self->decoder);
993 self->decoder = incrementalDecoder;
994 }
995 }
996
997 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 if (res == NULL)
1000 goto error;
1001 r = PyObject_IsTrue(res);
1002 Py_DECREF(res);
1003 if (r == -1)
1004 goto error;
1005 if (r == 1) {
1006 PyObject *ci;
1007 self->encoder = PyCodec_IncrementalEncoder(
1008 encoding, errors);
1009 if (self->encoder == NULL)
1010 goto error;
1011 /* Get the normalized named of the codec */
1012 ci = _PyCodec_Lookup(encoding);
1013 if (ci == NULL)
1014 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001015 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
1024 encodefuncentry *e = encodefuncs;
1025 while (e->name != NULL) {
1026 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
1036 self->buffer = buffer;
1037 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1040 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1041 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001042 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (raw == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (Py_TYPE(raw) == &PyFileIO_Type)
1051 self->raw = raw;
1052 else
1053 Py_DECREF(raw);
1054 }
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 if (res == NULL)
1058 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001059 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001061 if (r < 0)
1062 goto error;
1063 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064
Martin v. Löwis767046a2011-10-14 15:35:36 +02001065 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001066
Antoine Pitroue4501852009-05-14 18:55:55 +00001067 self->encoding_start_of_stream = 0;
1068 if (self->seekable && self->encoder) {
1069 PyObject *cookieObj;
1070 int cmp;
1071
1072 self->encoding_start_of_stream = 1;
1073
1074 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1075 if (cookieObj == NULL)
1076 goto error;
1077
1078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1079 Py_DECREF(cookieObj);
1080 if (cmp < 0) {
1081 goto error;
1082 }
1083
1084 if (cmp == 0) {
1085 self->encoding_start_of_stream = 0;
1086 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1087 _PyIO_zero, NULL);
1088 if (res == NULL)
1089 goto error;
1090 Py_DECREF(res);
1091 }
1092 }
1093
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 self->ok = 1;
1095 return 0;
1096
1097 error:
1098 return -1;
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1105 return -1;
1106 self->ok = 0;
1107 Py_CLEAR(self->buffer);
1108 Py_CLEAR(self->encoding);
1109 Py_CLEAR(self->encoder);
1110 Py_CLEAR(self->decoder);
1111 Py_CLEAR(self->readnl);
1112 Py_CLEAR(self->decoded_chars);
1113 Py_CLEAR(self->pending_bytes);
1114 Py_CLEAR(self->snapshot);
1115 Py_CLEAR(self->errors);
1116 Py_CLEAR(self->raw);
1117 return 0;
1118}
1119
1120static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
Antoine Pitroue033e062010-10-29 10:38:18 +00001123 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 return;
1126 _PyObject_GC_UNTRACK(self);
1127 if (self->weakreflist != NULL)
1128 PyObject_ClearWeakRefs((PyObject *)self);
1129 Py_CLEAR(self->dict);
1130 Py_TYPE(self)->tp_free((PyObject *)self);
1131}
1132
1133static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135{
1136 Py_VISIT(self->buffer);
1137 Py_VISIT(self->encoding);
1138 Py_VISIT(self->encoder);
1139 Py_VISIT(self->decoder);
1140 Py_VISIT(self->readnl);
1141 Py_VISIT(self->decoded_chars);
1142 Py_VISIT(self->pending_bytes);
1143 Py_VISIT(self->snapshot);
1144 Py_VISIT(self->errors);
1145 Py_VISIT(self->raw);
1146
1147 Py_VISIT(self->dict);
1148 return 0;
1149}
1150
1151static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001152textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001153{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return -1;
1156 Py_CLEAR(self->dict);
1157 return 0;
1158}
1159
1160static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001161textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162
1163/* This macro takes some shortcuts to make the common case faster. */
1164#define CHECK_CLOSED(self) \
1165 do { \
1166 int r; \
1167 PyObject *_res; \
1168 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1169 if (self->raw != NULL) \
1170 r = _PyFileIO_closed(self->raw); \
1171 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 if (_res == NULL) \
1174 return NULL; \
1175 r = PyObject_IsTrue(_res); \
1176 Py_DECREF(_res); \
1177 if (r < 0) \
1178 return NULL; \
1179 } \
1180 if (r > 0) { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on closed file."); \
1183 return NULL; \
1184 } \
1185 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001186 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 return NULL; \
1188 } while (0)
1189
1190#define CHECK_INITIALIZED(self) \
1191 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001192 if (self->detached) { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "underlying buffer has been detached"); \
1195 } else { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on uninitialized object"); \
1198 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 return NULL; \
1200 }
1201
1202#define CHECK_INITIALIZED_INT(self) \
1203 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204 if (self->detached) { \
1205 PyErr_SetString(PyExc_ValueError, \
1206 "underlying buffer has been detached"); \
1207 } else { \
1208 PyErr_SetString(PyExc_ValueError, \
1209 "I/O operation on uninitialized object"); \
1210 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 return -1; \
1212 }
1213
1214
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001215static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001217{
1218 PyObject *buffer, *res;
1219 CHECK_INITIALIZED(self);
1220 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1221 if (res == NULL)
1222 return NULL;
1223 Py_DECREF(res);
1224 buffer = self->buffer;
1225 self->buffer = NULL;
1226 self->detached = 1;
1227 self->ok = 0;
1228 return buffer;
1229}
1230
Antoine Pitrou24f36292009-03-28 22:16:42 +00001231/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 underlying buffered object, though. */
1233static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001234_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001236 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237
1238 if (self->pending_bytes == NULL)
1239 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001240
1241 pending = self->pending_bytes;
1242 Py_INCREF(pending);
1243 self->pending_bytes_count = 0;
1244 Py_CLEAR(self->pending_bytes);
1245
1246 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1247 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 if (b == NULL)
1249 return -1;
Gregory P. Smithb9817b02013-02-01 13:03:39 -08001250 ret = NULL;
1251 do {
1252 ret = PyObject_CallMethodObjArgs(self->buffer,
1253 _PyIO_str_write, b, NULL);
1254 } while (ret == NULL && _PyIO_trap_eintr());
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001255 Py_DECREF(b);
1256 if (ret == NULL)
1257 return -1;
1258 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001259 return 0;
1260}
1261
1262static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001263textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001264{
1265 PyObject *ret;
1266 PyObject *text; /* owned reference */
1267 PyObject *b;
1268 Py_ssize_t textlen;
1269 int haslf = 0;
1270 int needflush = 0;
1271
1272 CHECK_INITIALIZED(self);
1273
1274 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1275 return NULL;
1276 }
1277
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001278 if (PyUnicode_READY(text) == -1)
1279 return NULL;
1280
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001281 CHECK_CLOSED(self);
1282
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001283 if (self->encoder == NULL)
1284 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001285
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286 Py_INCREF(text);
1287
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289
1290 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001291 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001292 haslf = 1;
1293
1294 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001295 PyObject *newtext = _PyObject_CallMethodId(
1296 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001297 Py_DECREF(text);
1298 if (newtext == NULL)
1299 return NULL;
1300 text = newtext;
1301 }
1302
Antoine Pitroue96ec682011-07-23 21:46:35 +02001303 if (self->write_through)
1304 needflush = 1;
1305 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001306 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001307 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001308 needflush = 1;
1309
1310 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001311 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001313 self->encoding_start_of_stream = 0;
1314 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001315 else
1316 b = PyObject_CallMethodObjArgs(self->encoder,
1317 _PyIO_str_encode, text, NULL);
1318 Py_DECREF(text);
1319 if (b == NULL)
1320 return NULL;
1321
1322 if (self->pending_bytes == NULL) {
1323 self->pending_bytes = PyList_New(0);
1324 if (self->pending_bytes == NULL) {
1325 Py_DECREF(b);
1326 return NULL;
1327 }
1328 self->pending_bytes_count = 0;
1329 }
1330 if (PyList_Append(self->pending_bytes, b) < 0) {
1331 Py_DECREF(b);
1332 return NULL;
1333 }
1334 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1335 Py_DECREF(b);
1336 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001337 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001338 return NULL;
1339 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001340
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001341 if (needflush) {
1342 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1343 if (ret == NULL)
1344 return NULL;
1345 Py_DECREF(ret);
1346 }
1347
1348 Py_CLEAR(self->snapshot);
1349
1350 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001351 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001352 if (ret == NULL)
1353 return NULL;
1354 Py_DECREF(ret);
1355 }
1356
1357 return PyLong_FromSsize_t(textlen);
1358}
1359
1360/* Steal a reference to chars and store it in the decoded_char buffer;
1361 */
1362static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001363textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001364{
1365 Py_CLEAR(self->decoded_chars);
1366 self->decoded_chars = chars;
1367 self->decoded_chars_used = 0;
1368}
1369
1370static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001371textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001372{
1373 PyObject *chars;
1374 Py_ssize_t avail;
1375
1376 if (self->decoded_chars == NULL)
1377 return PyUnicode_FromStringAndSize(NULL, 0);
1378
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001379 /* decoded_chars is guaranteed to be "ready". */
1380 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001381 - self->decoded_chars_used);
1382
1383 assert(avail >= 0);
1384
1385 if (n < 0 || n > avail)
1386 n = avail;
1387
1388 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001389 chars = PyUnicode_Substring(self->decoded_chars,
1390 self->decoded_chars_used,
1391 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001392 if (chars == NULL)
1393 return NULL;
1394 }
1395 else {
1396 chars = self->decoded_chars;
1397 Py_INCREF(chars);
1398 }
1399
1400 self->decoded_chars_used += n;
1401 return chars;
1402}
1403
1404/* Read and decode the next chunk of data from the BufferedReader.
1405 */
1406static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001407textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001408{
1409 PyObject *dec_buffer = NULL;
1410 PyObject *dec_flags = NULL;
1411 PyObject *input_chunk = NULL;
1412 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001413 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001414 int eof;
1415
1416 /* The return value is True unless EOF was reached. The decoded string is
1417 * placed in self._decoded_chars (replacing its previous value). The
1418 * entire input chunk is sent to the decoder, though some of it may remain
1419 * buffered in the decoder, yet to be converted.
1420 */
1421
1422 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001423 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001424 return -1;
1425 }
1426
1427 if (self->telling) {
1428 /* To prepare for tell(), we need to snapshot a point in the file
1429 * where the decoder's input buffer is empty.
1430 */
1431
1432 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1433 _PyIO_str_getstate, NULL);
1434 if (state == NULL)
1435 return -1;
1436 /* Given this, we know there was a valid snapshot point
1437 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1438 */
1439 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1440 Py_DECREF(state);
1441 return -1;
1442 }
1443 Py_INCREF(dec_buffer);
1444 Py_INCREF(dec_flags);
1445 Py_DECREF(state);
1446 }
1447
1448 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001449 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001450 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001451 }
1452 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001453 if (chunk_size == NULL)
1454 goto fail;
1455 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001456 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1457 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001458 Py_DECREF(chunk_size);
1459 if (input_chunk == NULL)
1460 goto fail;
1461 assert(PyBytes_Check(input_chunk));
1462
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001463 nbytes = PyBytes_Size(input_chunk);
1464 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001465
1466 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1467 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1468 self->decoder, input_chunk, eof);
1469 }
1470 else {
1471 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1472 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1473 }
1474
1475 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1476 if (decoded_chars == NULL)
1477 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001478 if (PyUnicode_READY(decoded_chars) == -1)
1479 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001480 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001481 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001482 if (nchars > 0)
1483 self->b2cratio = (double) nbytes / nchars;
1484 else
1485 self->b2cratio = 0.0;
1486 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001487 eof = 0;
1488
1489 if (self->telling) {
1490 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1491 * next input to be decoded is dec_buffer + input_chunk.
1492 */
1493 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1494 if (next_input == NULL)
1495 goto fail;
1496 assert (PyBytes_Check(next_input));
1497 Py_DECREF(dec_buffer);
1498 Py_CLEAR(self->snapshot);
1499 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1500 }
1501 Py_DECREF(input_chunk);
1502
1503 return (eof == 0);
1504
1505 fail:
1506 Py_XDECREF(dec_buffer);
1507 Py_XDECREF(dec_flags);
1508 Py_XDECREF(input_chunk);
1509 return -1;
1510}
1511
1512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001513textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001514{
1515 Py_ssize_t n = -1;
1516 PyObject *result = NULL, *chunks = NULL;
1517
1518 CHECK_INITIALIZED(self);
1519
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001520 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001521 return NULL;
1522
1523 CHECK_CLOSED(self);
1524
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001525 if (self->decoder == NULL)
1526 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001527
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001528 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001529 return NULL;
1530
1531 if (n < 0) {
1532 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001533 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001534 PyObject *decoded;
1535 if (bytes == NULL)
1536 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001537
1538 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1539 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1540 bytes, 1);
1541 else
1542 decoded = PyObject_CallMethodObjArgs(
1543 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001544 Py_DECREF(bytes);
1545 if (decoded == NULL)
1546 goto fail;
1547
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001548 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001549
1550 if (result == NULL) {
1551 Py_DECREF(decoded);
1552 return NULL;
1553 }
1554
1555 PyUnicode_AppendAndDel(&result, decoded);
1556 if (result == NULL)
1557 goto fail;
1558
1559 Py_CLEAR(self->snapshot);
1560 return result;
1561 }
1562 else {
1563 int res = 1;
1564 Py_ssize_t remaining = n;
1565
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001566 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001567 if (result == NULL)
1568 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001569 if (PyUnicode_READY(result) == -1)
1570 goto fail;
1571 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001572
1573 /* Keep reading chunks until we have n characters to return */
1574 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001575 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001576 if (res < 0) {
1577 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1578 when EINTR occurs so we needn't do it ourselves. */
1579 if (_PyIO_trap_eintr()) {
1580 continue;
1581 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001582 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001583 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001584 if (res == 0) /* EOF */
1585 break;
1586 if (chunks == NULL) {
1587 chunks = PyList_New(0);
1588 if (chunks == NULL)
1589 goto fail;
1590 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001591 if (PyUnicode_GET_LENGTH(result) > 0 &&
1592 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 goto fail;
1594 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001595 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596 if (result == NULL)
1597 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001598 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001599 }
1600 if (chunks != NULL) {
1601 if (result != NULL && PyList_Append(chunks, result) < 0)
1602 goto fail;
1603 Py_CLEAR(result);
1604 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1605 if (result == NULL)
1606 goto fail;
1607 Py_CLEAR(chunks);
1608 }
1609 return result;
1610 }
1611 fail:
1612 Py_XDECREF(result);
1613 Py_XDECREF(chunks);
1614 return NULL;
1615}
1616
1617
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001618/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001619 that is to the NUL character. Otherwise the function will produce
1620 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001621static char *
1622find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001623{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001624 if (kind == PyUnicode_1BYTE_KIND) {
1625 assert(ch < 256);
1626 return (char *) memchr((void *) s, (char) ch, end - s);
1627 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001628 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001629 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001630 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001631 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001632 return s;
1633 if (s == end)
1634 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001635 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001636 }
1637}
1638
1639Py_ssize_t
1640_PyIO_find_line_ending(
1641 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001642 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001643{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001644 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001645
1646 if (translated) {
1647 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001648 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001649 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001650 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001651 else {
1652 *consumed = len;
1653 return -1;
1654 }
1655 }
1656 else if (universal) {
1657 /* Universal newline search. Find any of \r, \r\n, \n
1658 * The decoder ensures that \r\n are not split in two pieces
1659 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001660 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001661 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001663 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001664 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001665 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001666 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001667 if (s >= end) {
1668 *consumed = len;
1669 return -1;
1670 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001671 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001672 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001673 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001674 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001676 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001677 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001678 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001679 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001680 }
1681 }
1682 }
1683 else {
1684 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001685 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1686 char *nl = PyUnicode_DATA(readnl);
1687 /* Assume that readnl is an ASCII character. */
1688 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001689 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001690 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001691 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001692 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001693 *consumed = len;
1694 return -1;
1695 }
1696 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001697 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001698 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001699 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001700 if (e < s)
1701 e = s;
1702 while (s < e) {
1703 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001704 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001705 if (pos == NULL || pos >= e)
1706 break;
1707 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001708 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001709 break;
1710 }
1711 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001712 return (pos - start)/kind + readnl_len;
1713 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001714 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001715 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001716 if (pos == NULL)
1717 *consumed = len;
1718 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001719 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001720 return -1;
1721 }
1722 }
1723}
1724
1725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001726_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001727{
1728 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1729 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1730 int res;
1731
1732 CHECK_CLOSED(self);
1733
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001734 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001735 return NULL;
1736
1737 chunked = 0;
1738
1739 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001740 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001741 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001742 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001743 Py_ssize_t consumed = 0;
1744
1745 /* First, get some data if necessary */
1746 res = 1;
1747 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001748 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001749 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001750 if (res < 0) {
1751 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1752 when EINTR occurs so we needn't do it ourselves. */
1753 if (_PyIO_trap_eintr()) {
1754 continue;
1755 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001756 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001757 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001758 if (res == 0)
1759 break;
1760 }
1761 if (res == 0) {
1762 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001763 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001764 Py_CLEAR(self->snapshot);
1765 start = endpos = offset_to_buffer = 0;
1766 break;
1767 }
1768
1769 if (remaining == NULL) {
1770 line = self->decoded_chars;
1771 start = self->decoded_chars_used;
1772 offset_to_buffer = 0;
1773 Py_INCREF(line);
1774 }
1775 else {
1776 assert(self->decoded_chars_used == 0);
1777 line = PyUnicode_Concat(remaining, self->decoded_chars);
1778 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001779 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001780 Py_CLEAR(remaining);
1781 if (line == NULL)
1782 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001783 if (PyUnicode_READY(line) == -1)
1784 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001785 }
1786
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001787 ptr = PyUnicode_DATA(line);
1788 line_len = PyUnicode_GET_LENGTH(line);
1789 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001790
1791 endpos = _PyIO_find_line_ending(
1792 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001793 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001794 ptr + kind * start,
1795 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001796 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001797 if (endpos >= 0) {
1798 endpos += start;
1799 if (limit >= 0 && (endpos - start) + chunked >= limit)
1800 endpos = start + limit - chunked;
1801 break;
1802 }
1803
1804 /* We can put aside up to `endpos` */
1805 endpos = consumed + start;
1806 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1807 /* Didn't find line ending, but reached length limit */
1808 endpos = start + limit - chunked;
1809 break;
1810 }
1811
1812 if (endpos > start) {
1813 /* No line ending seen yet - put aside current data */
1814 PyObject *s;
1815 if (chunks == NULL) {
1816 chunks = PyList_New(0);
1817 if (chunks == NULL)
1818 goto error;
1819 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001820 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001821 if (s == NULL)
1822 goto error;
1823 if (PyList_Append(chunks, s) < 0) {
1824 Py_DECREF(s);
1825 goto error;
1826 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001827 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001828 Py_DECREF(s);
1829 }
1830 /* There may be some remaining bytes we'll have to prepend to the
1831 next chunk of data */
1832 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001833 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001834 if (remaining == NULL)
1835 goto error;
1836 }
1837 Py_CLEAR(line);
1838 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001839 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001840 }
1841
1842 if (line != NULL) {
1843 /* Our line ends in the current buffer */
1844 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001845 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1846 PyObject *s = PyUnicode_Substring(line, start, endpos);
1847 Py_CLEAR(line);
1848 if (s == NULL)
1849 goto error;
1850 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001851 }
1852 }
1853 if (remaining != NULL) {
1854 if (chunks == NULL) {
1855 chunks = PyList_New(0);
1856 if (chunks == NULL)
1857 goto error;
1858 }
1859 if (PyList_Append(chunks, remaining) < 0)
1860 goto error;
1861 Py_CLEAR(remaining);
1862 }
1863 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001864 if (line != NULL) {
1865 if (PyList_Append(chunks, line) < 0)
1866 goto error;
1867 Py_DECREF(line);
1868 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001869 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1870 if (line == NULL)
1871 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001872 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001873 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001874 if (line == NULL) {
1875 Py_INCREF(_PyIO_empty_str);
1876 line = _PyIO_empty_str;
1877 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001878
1879 return line;
1880
1881 error:
1882 Py_XDECREF(chunks);
1883 Py_XDECREF(remaining);
1884 Py_XDECREF(line);
1885 return NULL;
1886}
1887
1888static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001889textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001890{
1891 Py_ssize_t limit = -1;
1892
1893 CHECK_INITIALIZED(self);
1894 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1895 return NULL;
1896 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001897 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001898}
1899
1900/* Seek and Tell */
1901
1902typedef struct {
1903 Py_off_t start_pos;
1904 int dec_flags;
1905 int bytes_to_feed;
1906 int chars_to_skip;
1907 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001908} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001909
1910/*
1911 To speed up cookie packing/unpacking, we store the fields in a temporary
1912 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1913 The following macros define at which offsets in the intermediary byte
1914 string the various CookieStruct fields will be stored.
1915 */
1916
1917#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1918
1919#if defined(WORDS_BIGENDIAN)
1920
1921# define IS_LITTLE_ENDIAN 0
1922
1923/* We want the least significant byte of start_pos to also be the least
1924 significant byte of the cookie, which means that in big-endian mode we
1925 must copy the fields in reverse order. */
1926
1927# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1928# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1929# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1930# define OFF_CHARS_TO_SKIP (sizeof(char))
1931# define OFF_NEED_EOF 0
1932
1933#else
1934
1935# define IS_LITTLE_ENDIAN 1
1936
1937/* Little-endian mode: the least significant byte of start_pos will
1938 naturally end up the least significant byte of the cookie. */
1939
1940# define OFF_START_POS 0
1941# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1942# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1943# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1944# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1945
1946#endif
1947
1948static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001949textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001950{
1951 unsigned char buffer[COOKIE_BUF_LEN];
1952 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1953 if (cookieLong == NULL)
1954 return -1;
1955
1956 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1957 IS_LITTLE_ENDIAN, 0) < 0) {
1958 Py_DECREF(cookieLong);
1959 return -1;
1960 }
1961 Py_DECREF(cookieLong);
1962
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001963 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1964 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1965 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1966 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1967 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001968
1969 return 0;
1970}
1971
1972static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001973textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001974{
1975 unsigned char buffer[COOKIE_BUF_LEN];
1976
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001977 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1978 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1979 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1980 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1981 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001982
1983 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1984}
1985#undef IS_LITTLE_ENDIAN
1986
1987static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001988_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001989{
1990 PyObject *res;
1991 /* When seeking to the start of the stream, we call decoder.reset()
1992 rather than decoder.getstate().
1993 This is for a few decoders such as utf-16 for which the state value
1994 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1995 utf-16, that we are expecting a BOM).
1996 */
1997 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1998 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1999 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002000 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
2001 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002002 if (res == NULL)
2003 return -1;
2004 Py_DECREF(res);
2005 return 0;
2006}
2007
Antoine Pitroue4501852009-05-14 18:55:55 +00002008static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002009_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002010{
2011 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002012 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002013 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2014 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2015 self->encoding_start_of_stream = 1;
2016 }
2017 else {
2018 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2019 _PyIO_zero, NULL);
2020 self->encoding_start_of_stream = 0;
2021 }
2022 if (res == NULL)
2023 return -1;
2024 Py_DECREF(res);
2025 return 0;
2026}
2027
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002028static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030{
2031 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002032 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002033 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002034 PyObject *res;
2035 int cmp;
2036
2037 CHECK_INITIALIZED(self);
2038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002039 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2040 return NULL;
2041 CHECK_CLOSED(self);
2042
2043 Py_INCREF(cookieObj);
2044
2045 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002046 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002047 goto fail;
2048 }
2049
2050 if (whence == 1) {
2051 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002052 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002053 if (cmp < 0)
2054 goto fail;
2055
2056 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002057 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002058 goto fail;
2059 }
2060
2061 /* Seeking to the current position should attempt to
2062 * sync the underlying buffer with the current position.
2063 */
2064 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002065 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002066 if (cookieObj == NULL)
2067 goto fail;
2068 }
2069 else if (whence == 2) {
2070 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002071 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 if (cmp < 0)
2073 goto fail;
2074
2075 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002076 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 goto fail;
2078 }
2079
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002080 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002081 if (res == NULL)
2082 goto fail;
2083 Py_DECREF(res);
2084
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002085 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 Py_CLEAR(self->snapshot);
2087 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002088 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002089 if (res == NULL)
2090 goto fail;
2091 Py_DECREF(res);
2092 }
2093
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002094 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002095 Py_XDECREF(cookieObj);
2096 return res;
2097 }
2098 else if (whence != 0) {
2099 PyErr_Format(PyExc_ValueError,
2100 "invalid whence (%d, should be 0, 1 or 2)", whence);
2101 goto fail;
2102 }
2103
Antoine Pitroue4501852009-05-14 18:55:55 +00002104 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002105 if (cmp < 0)
2106 goto fail;
2107
2108 if (cmp == 1) {
2109 PyErr_Format(PyExc_ValueError,
2110 "negative seek position %R", cookieObj);
2111 goto fail;
2112 }
2113
2114 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2115 if (res == NULL)
2116 goto fail;
2117 Py_DECREF(res);
2118
2119 /* The strategy of seek() is to go back to the safe start point
2120 * and replay the effect of read(chars_to_skip) from there.
2121 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002122 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002123 goto fail;
2124
2125 /* Seek back to the safe start point. */
2126 posobj = PyLong_FromOff_t(cookie.start_pos);
2127 if (posobj == NULL)
2128 goto fail;
2129 res = PyObject_CallMethodObjArgs(self->buffer,
2130 _PyIO_str_seek, posobj, NULL);
2131 Py_DECREF(posobj);
2132 if (res == NULL)
2133 goto fail;
2134 Py_DECREF(res);
2135
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002136 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002137 Py_CLEAR(self->snapshot);
2138
2139 /* Restore the decoder to its state from the safe start point. */
2140 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002141 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002142 goto fail;
2143 }
2144
2145 if (cookie.chars_to_skip) {
2146 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002147 PyObject *input_chunk = _PyObject_CallMethodId(
2148 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002149 PyObject *decoded;
2150
2151 if (input_chunk == NULL)
2152 goto fail;
2153
2154 assert (PyBytes_Check(input_chunk));
2155
2156 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2157 if (self->snapshot == NULL) {
2158 Py_DECREF(input_chunk);
2159 goto fail;
2160 }
2161
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002162 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2163 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164
2165 if (decoded == NULL)
2166 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002167 if (PyUnicode_READY(decoded) == -1) {
2168 Py_DECREF(decoded);
2169 goto fail;
2170 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002171
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002172 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173
2174 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002175 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002176 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2177 goto fail;
2178 }
2179 self->decoded_chars_used = cookie.chars_to_skip;
2180 }
2181 else {
2182 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2183 if (self->snapshot == NULL)
2184 goto fail;
2185 }
2186
Antoine Pitroue4501852009-05-14 18:55:55 +00002187 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2188 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002189 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002190 goto fail;
2191 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192 return cookieObj;
2193 fail:
2194 Py_XDECREF(cookieObj);
2195 return NULL;
2196
2197}
2198
2199static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002200textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002201{
2202 PyObject *res;
2203 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002204 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205 PyObject *next_input;
2206 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002207 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002208 PyObject *saved_state = NULL;
2209 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002210 char *dec_buffer;
2211 Py_ssize_t dec_buffer_len;
2212 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002213
2214 CHECK_INITIALIZED(self);
2215 CHECK_CLOSED(self);
2216
2217 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002218 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002219 goto fail;
2220 }
2221 if (!self->telling) {
2222 PyErr_SetString(PyExc_IOError,
2223 "telling position disabled by next() call");
2224 goto fail;
2225 }
2226
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002227 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002228 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002229 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002230 if (res == NULL)
2231 goto fail;
2232 Py_DECREF(res);
2233
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002234 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002235 if (posobj == NULL)
2236 goto fail;
2237
2238 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002239 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240 return posobj;
2241 }
2242
2243#if defined(HAVE_LARGEFILE_SUPPORT)
2244 cookie.start_pos = PyLong_AsLongLong(posobj);
2245#else
2246 cookie.start_pos = PyLong_AsLong(posobj);
2247#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002248 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002249 if (PyErr_Occurred())
2250 goto fail;
2251
2252 /* Skip backward to the snapshot point (see _read_chunk). */
2253 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2254 goto fail;
2255
2256 assert (PyBytes_Check(next_input));
2257
2258 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2259
2260 /* How many decoded characters have been used up since the snapshot? */
2261 if (self->decoded_chars_used == 0) {
2262 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002263 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002264 }
2265
2266 chars_to_skip = self->decoded_chars_used;
2267
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002268 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002269 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2270 _PyIO_str_getstate, NULL);
2271 if (saved_state == NULL)
2272 goto fail;
2273
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002274#define DECODER_GETSTATE() do { \
2275 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2276 _PyIO_str_getstate, NULL); \
2277 if (_state == NULL) \
2278 goto fail; \
2279 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2280 Py_DECREF(_state); \
2281 goto fail; \
2282 } \
2283 Py_DECREF(_state); \
2284 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002285
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002286 /* TODO: replace assert with exception */
2287#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002288 PyObject *_decoded = _PyObject_CallMethodId( \
2289 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002290 if (_decoded == NULL) \
2291 goto fail; \
2292 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002293 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002294 Py_DECREF(_decoded); \
2295 } while (0)
2296
2297 /* Fast search for an acceptable start point, close to our
2298 current pos */
2299 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2300 skip_back = 1;
2301 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2302 input = PyBytes_AS_STRING(next_input);
2303 while (skip_bytes > 0) {
2304 /* Decode up to temptative start point */
2305 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2306 goto fail;
2307 DECODER_DECODE(input, skip_bytes, chars_decoded);
2308 if (chars_decoded <= chars_to_skip) {
2309 DECODER_GETSTATE();
2310 if (dec_buffer_len == 0) {
2311 /* Before pos and no bytes buffered in decoder => OK */
2312 cookie.dec_flags = dec_flags;
2313 chars_to_skip -= chars_decoded;
2314 break;
2315 }
2316 /* Skip back by buffered amount and reset heuristic */
2317 skip_bytes -= dec_buffer_len;
2318 skip_back = 1;
2319 }
2320 else {
2321 /* We're too far ahead, skip back a bit */
2322 skip_bytes -= skip_back;
2323 skip_back *= 2;
2324 }
2325 }
2326 if (skip_bytes <= 0) {
2327 skip_bytes = 0;
2328 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2329 goto fail;
2330 }
2331
2332 /* Note our initial start point. */
2333 cookie.start_pos += skip_bytes;
2334 cookie.chars_to_skip = chars_to_skip;
2335 if (chars_to_skip == 0)
2336 goto finally;
2337
2338 /* We should be close to the desired position. Now feed the decoder one
2339 * byte at a time until we reach the `chars_to_skip` target.
2340 * As we go, note the nearest "safe start point" before the current
2341 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002342 * can safely start from there and advance to this location).
2343 */
2344 chars_decoded = 0;
2345 input = PyBytes_AS_STRING(next_input);
2346 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002347 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002348 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002349 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002350
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002351 DECODER_DECODE(input, 1, n);
2352 /* We got n chars for 1 byte */
2353 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002354 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002355 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002356
2357 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2358 /* Decoder buffer is empty, so this is a safe start point. */
2359 cookie.start_pos += cookie.bytes_to_feed;
2360 chars_to_skip -= chars_decoded;
2361 cookie.dec_flags = dec_flags;
2362 cookie.bytes_to_feed = 0;
2363 chars_decoded = 0;
2364 }
2365 if (chars_decoded >= chars_to_skip)
2366 break;
2367 input++;
2368 }
2369 if (input == input_end) {
2370 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002371 PyObject *decoded = _PyObject_CallMethodId(
2372 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002373 if (decoded == NULL)
2374 goto fail;
2375 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002376 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002377 Py_DECREF(decoded);
2378 cookie.need_eof = 1;
2379
2380 if (chars_decoded < chars_to_skip) {
2381 PyErr_SetString(PyExc_IOError,
2382 "can't reconstruct logical file position");
2383 goto fail;
2384 }
2385 }
2386
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002387finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002388 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002389 Py_DECREF(saved_state);
2390 if (res == NULL)
2391 return NULL;
2392 Py_DECREF(res);
2393
2394 /* The returned cookie corresponds to the last safe start point. */
2395 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002396 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002397
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002398fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002399 if (saved_state) {
2400 PyObject *type, *value, *traceback;
2401 PyErr_Fetch(&type, &value, &traceback);
2402
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002403 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002404 Py_DECREF(saved_state);
2405 if (res == NULL)
2406 return NULL;
2407 Py_DECREF(res);
2408
2409 PyErr_Restore(type, value, traceback);
2410 }
2411 return NULL;
2412}
2413
2414static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002415textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002416{
2417 PyObject *pos = Py_None;
2418 PyObject *res;
2419
2420 CHECK_INITIALIZED(self)
2421 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2422 return NULL;
2423 }
2424
2425 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2426 if (res == NULL)
2427 return NULL;
2428 Py_DECREF(res);
2429
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002430 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002431}
2432
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002433static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002434textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002435{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002436 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002437
2438 CHECK_INITIALIZED(self);
2439
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002440 res = PyUnicode_FromString("<_io.TextIOWrapper");
2441 if (res == NULL)
2442 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002443 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002444 if (nameobj == NULL) {
2445 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2446 PyErr_Clear();
2447 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002448 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002449 }
2450 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002451 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002452 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002453 if (s == NULL)
2454 goto error;
2455 PyUnicode_AppendAndDel(&res, s);
2456 if (res == NULL)
2457 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002458 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002459 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002460 if (modeobj == NULL) {
2461 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2462 PyErr_Clear();
2463 else
2464 goto error;
2465 }
2466 else {
2467 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2468 Py_DECREF(modeobj);
2469 if (s == NULL)
2470 goto error;
2471 PyUnicode_AppendAndDel(&res, s);
2472 if (res == NULL)
2473 return NULL;
2474 }
2475 s = PyUnicode_FromFormat("%U encoding=%R>",
2476 res, self->encoding);
2477 Py_DECREF(res);
2478 return s;
2479error:
2480 Py_XDECREF(res);
2481 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002482}
2483
2484
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002485/* Inquiries */
2486
2487static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002488textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489{
2490 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002491 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002492}
2493
2494static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002495textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496{
2497 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002498 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002499}
2500
2501static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002502textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503{
2504 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002505 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002506}
2507
2508static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002509textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510{
2511 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002512 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002513}
2514
2515static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002516textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517{
2518 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002519 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002520}
2521
2522static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002523textiowrapper_getstate(textio *self, PyObject *args)
2524{
2525 PyErr_Format(PyExc_TypeError,
2526 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2527 return NULL;
2528}
2529
2530static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002531textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002532{
2533 CHECK_INITIALIZED(self);
2534 CHECK_CLOSED(self);
2535 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002536 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002538 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002539}
2540
2541static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002542textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543{
2544 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002545 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002546 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002547
Antoine Pitrou6be88762010-05-03 16:48:20 +00002548 res = textiowrapper_closed_get(self, NULL);
2549 if (res == NULL)
2550 return NULL;
2551 r = PyObject_IsTrue(res);
2552 Py_DECREF(res);
2553 if (r < 0)
2554 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002555
Antoine Pitrou6be88762010-05-03 16:48:20 +00002556 if (r > 0) {
2557 Py_RETURN_NONE; /* stream already closed */
2558 }
2559 else {
Benjamin Peterson68623612012-12-20 11:53:11 -06002560 PyObject *exc = NULL, *val, *tb;
Antoine Pitroue033e062010-10-29 10:38:18 +00002561 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002562 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002563 if (res)
2564 Py_DECREF(res);
2565 else
2566 PyErr_Clear();
2567 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002568 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson68623612012-12-20 11:53:11 -06002569 if (res == NULL)
2570 PyErr_Fetch(&exc, &val, &tb);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002571 else
2572 Py_DECREF(res);
2573
Benjamin Peterson68623612012-12-20 11:53:11 -06002574 res = _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
2575 if (exc != NULL) {
2576 if (res != NULL) {
2577 Py_CLEAR(res);
2578 PyErr_Restore(exc, val, tb);
2579 }
2580 else {
2581 PyObject *val2;
2582 Py_DECREF(exc);
2583 Py_XDECREF(tb);
2584 PyErr_Fetch(&exc, &val2, &tb);
2585 PyErr_NormalizeException(&exc, &val2, &tb);
2586 PyException_SetContext(val2, val);
2587 PyErr_Restore(exc, val2, tb);
2588 }
2589 }
2590 return res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002591 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002592}
2593
2594static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002595textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002596{
2597 PyObject *line;
2598
2599 CHECK_INITIALIZED(self);
2600
2601 self->telling = 0;
2602 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2603 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002604 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002605 }
2606 else {
2607 line = PyObject_CallMethodObjArgs((PyObject *)self,
2608 _PyIO_str_readline, NULL);
2609 if (line && !PyUnicode_Check(line)) {
2610 PyErr_Format(PyExc_IOError,
2611 "readline() should have returned an str object, "
2612 "not '%.200s'", Py_TYPE(line)->tp_name);
2613 Py_DECREF(line);
2614 return NULL;
2615 }
2616 }
2617
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002618 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002619 return NULL;
2620
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002621 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622 /* Reached EOF or would have blocked */
2623 Py_DECREF(line);
2624 Py_CLEAR(self->snapshot);
2625 self->telling = self->seekable;
2626 return NULL;
2627 }
2628
2629 return line;
2630}
2631
2632static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002633textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002634{
2635 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002636 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637}
2638
2639static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002640textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002641{
2642 CHECK_INITIALIZED(self);
2643 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2644}
2645
2646static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002647textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002648{
2649 PyObject *res;
2650 CHECK_INITIALIZED(self);
2651 if (self->decoder == NULL)
2652 Py_RETURN_NONE;
2653 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2654 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002655 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2656 PyErr_Clear();
2657 Py_RETURN_NONE;
2658 }
2659 else {
2660 return NULL;
2661 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002662 }
2663 return res;
2664}
2665
2666static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002667textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002668{
2669 CHECK_INITIALIZED(self);
2670 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2671}
2672
2673static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002674textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002675{
2676 CHECK_INITIALIZED(self);
2677 return PyLong_FromSsize_t(self->chunk_size);
2678}
2679
2680static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002681textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002682{
2683 Py_ssize_t n;
2684 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002685 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686 if (n == -1 && PyErr_Occurred())
2687 return -1;
2688 if (n <= 0) {
2689 PyErr_SetString(PyExc_ValueError,
2690 "a strictly positive integer is required");
2691 return -1;
2692 }
2693 self->chunk_size = n;
2694 return 0;
2695}
2696
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002697static PyMethodDef textiowrapper_methods[] = {
2698 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2699 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2700 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2701 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2702 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2703 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002704
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002705 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2706 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2707 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2708 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2709 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002710 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002711
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002712 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2713 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2714 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002715 {NULL, NULL}
2716};
2717
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002718static PyMemberDef textiowrapper_members[] = {
2719 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2720 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2721 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722 {NULL}
2723};
2724
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002725static PyGetSetDef textiowrapper_getset[] = {
2726 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2727 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002728/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2729*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002730 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2731 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2732 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2733 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002734 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002735};
2736
2737PyTypeObject PyTextIOWrapper_Type = {
2738 PyVarObject_HEAD_INIT(NULL, 0)
2739 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002740 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002741 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002742 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743 0, /*tp_print*/
2744 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002745 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002746 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002747 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002748 0, /*tp_as_number*/
2749 0, /*tp_as_sequence*/
2750 0, /*tp_as_mapping*/
2751 0, /*tp_hash */
2752 0, /*tp_call*/
2753 0, /*tp_str*/
2754 0, /*tp_getattro*/
2755 0, /*tp_setattro*/
2756 0, /*tp_as_buffer*/
2757 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2758 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002759 textiowrapper_doc, /* tp_doc */
2760 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2761 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002762 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002763 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002764 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002765 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2766 textiowrapper_methods, /* tp_methods */
2767 textiowrapper_members, /* tp_members */
2768 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002769 0, /* tp_base */
2770 0, /* tp_dict */
2771 0, /* tp_descr_get */
2772 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002773 offsetof(textio, dict), /*tp_dictoffset*/
2774 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002775 0, /* tp_alloc */
2776 PyType_GenericNew, /* tp_new */
2777};