blob: 96434a81b74a4438201e93cdc44e0cdda0226338 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200633 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200638 "newline controls how line endings are handled. It can be None, '',\n"
639 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
640 "\n"
641 "* On input, if newline is None, universal newlines mode is\n"
642 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
643 " these are translated into '\\n' before being returned to the\n"
644 " caller. If it is '', universal newline mode is enabled, but line\n"
645 " endings are returned to the caller untranslated. If it has any of\n"
646 " the other legal values, input lines are only terminated by the given\n"
647 " string, and the line ending is returned to the caller untranslated.\n"
648 "\n"
649 "* On output, if newline is None, any '\\n' characters written are\n"
650 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300651 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200652 " of the other legal values, any '\\n' characters written are translated\n"
653 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 "\n"
655 "If line_buffering is True, a call to flush is implied when a call to\n"
656 "write contains a newline character."
657 );
658
659typedef PyObject *
660 (*encodefunc_t)(PyObject *, PyObject *);
661
662typedef struct
663{
664 PyObject_HEAD
665 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000666 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 Py_ssize_t chunk_size;
668 PyObject *buffer;
669 PyObject *encoding;
670 PyObject *encoder;
671 PyObject *decoder;
672 PyObject *readnl;
673 PyObject *errors;
674 const char *writenl; /* utf-8 encoded, NULL stands for \n */
675 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200676 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 char readuniversal;
678 char readtranslate;
679 char writetranslate;
680 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200681 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000683 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 /* Specialized encoding func (see below) */
685 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000686 /* Whether or not it's the start of the stream */
687 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
689 /* Reads and writes are internally buffered in order to speed things up.
690 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000691
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 Please also note that text to be written is first encoded before being
693 buffered. This is necessary so that encoding errors are immediately
694 reported to the caller, but it unfortunately means that the
695 IncrementalEncoder (whose encode() method is always written in Python)
696 becomes a bottleneck for small writes.
697 */
698 PyObject *decoded_chars; /* buffer for text returned from decoder */
699 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
700 PyObject *pending_bytes; /* list of bytes objects waiting to be
701 written, or NULL */
702 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 /* snapshot is either None, or a tuple (dec_flags, next_input) where
705 * dec_flags is the second (integer) item of the decoder state and
706 * next_input is the chunk of input bytes that comes next after the
707 * snapshot point. We use this to reconstruct decoder states in tell().
708 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000709 PyObject *snapshot;
710 /* Bytes-to-characters ratio for the current chunk. Serves as input for
711 the heuristic in tell(). */
712 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714 /* Cache raw object if it's a FileIO object */
715 PyObject *raw;
716
717 PyObject *weakreflist;
718 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720
721
722/* A couple of specialized cases in order to bypass the slow incremental
723 encoding methods for the most popular encodings. */
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF16(text,
742 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747{
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000750#if defined(WORDS_BIGENDIAN)
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000753 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000755 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF16(text,
757 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758}
759
Antoine Pitroue4501852009-05-14 18:55:55 +0000760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100770 return _PyUnicode_EncodeUTF32(text,
771 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000776{
777 if (!self->encoding_start_of_stream) {
778 /* Skip the BOM and use native byte ordering */
779#if defined(WORDS_BIGENDIAN)
780 return utf32be_encode(self, text);
781#else
782 return utf32le_encode(self, text);
783#endif
784 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100785 return _PyUnicode_EncodeUTF32(text,
786 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799}
800
801/* Map normalized encoding names onto the specialized encoding funcs */
802
803typedef struct {
804 const char *name;
805 encodefunc_t encodefunc;
806} encodefuncentry;
807
Antoine Pitrou24f36292009-03-28 22:16:42 +0000808static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {"ascii", (encodefunc_t) ascii_encode},
810 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {"utf-16-be", (encodefunc_t) utf16be_encode},
813 {"utf-16-le", (encodefunc_t) utf16le_encode},
814 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-32-be", (encodefunc_t) utf32be_encode},
816 {"utf-32-le", (encodefunc_t) utf32le_encode},
817 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 {NULL, NULL}
819};
820
821
822static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 NULL};
828 PyObject *buffer, *raw;
829 char *encoding = NULL;
830 char *errors = NULL;
831 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 _PyIO_State *state = IO_STATE;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000839 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000867 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868
869 if (encoding == NULL) {
870 /* Try os.device_encoding(fileno) */
871 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 /* Ignore only AttributeError and UnsupportedOperation */
874 if (fileno == NULL) {
875 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 PyErr_ExceptionMatches(state->unsupported_operation)) {
877 PyErr_Clear();
878 }
879 else {
880 goto error;
881 }
882 }
883 else {
Brett Cannonefb00c02012-02-29 18:31:31 -0500884 int fd = (int) PyLong_AsLong(fileno);
885 Py_DECREF(fileno);
886 if (fd == -1 && PyErr_Occurred()) {
887 goto error;
888 }
889
890 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 if (self->encoding == NULL)
892 goto error;
893 else if (!PyUnicode_Check(self->encoding))
894 Py_CLEAR(self->encoding);
895 }
896 }
897 if (encoding == NULL && self->encoding == NULL) {
898 if (state->locale_module == NULL) {
899 state->locale_module = PyImport_ImportModule("locale");
900 if (state->locale_module == NULL)
901 goto catch_ImportError;
902 else
903 goto use_locale;
904 }
905 else {
906 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200907 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200908 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 if (self->encoding == NULL) {
910 catch_ImportError:
911 /*
912 Importing locale can raise a ImportError because of
913 _functools, and locale.getpreferredencoding can raise a
914 ImportError if _locale is not available. These will happen
915 during module building.
916 */
917 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
918 PyErr_Clear();
919 self->encoding = PyUnicode_FromString("ascii");
920 }
921 else
922 goto error;
923 }
924 else if (!PyUnicode_Check(self->encoding))
925 Py_CLEAR(self->encoding);
926 }
927 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000928 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000930 if (encoding == NULL)
931 goto error;
932 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 else if (encoding != NULL) {
934 self->encoding = PyUnicode_FromString(encoding);
935 if (self->encoding == NULL)
936 goto error;
937 }
938 else {
939 PyErr_SetString(PyExc_IOError,
940 "could not determine default encoding");
941 }
942
943 if (errors == NULL)
944 errors = "strict";
945 self->errors = PyBytes_FromString(errors);
946 if (self->errors == NULL)
947 goto error;
948
949 self->chunk_size = 8192;
950 self->readuniversal = (newline == NULL || newline[0] == '\0');
951 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200952 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 self->readtranslate = (newline == NULL);
954 if (newline) {
955 self->readnl = PyUnicode_FromString(newline);
956 if (self->readnl == NULL)
957 return -1;
958 }
959 self->writetranslate = (newline == NULL || newline[0] != '\0');
960 if (!self->readuniversal && self->readnl) {
961 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000962 if (self->writenl == NULL)
963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (!strcmp(self->writenl, "\n"))
965 self->writenl = NULL;
966 }
967#ifdef MS_WINDOWS
968 else
969 self->writenl = "\r\n";
970#endif
971
972 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200973 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (res == NULL)
975 goto error;
976 r = PyObject_IsTrue(res);
977 Py_DECREF(res);
978 if (r == -1)
979 goto error;
980 if (r == 1) {
981 self->decoder = PyCodec_IncrementalDecoder(
982 encoding, errors);
983 if (self->decoder == NULL)
984 goto error;
985
986 if (self->readuniversal) {
987 PyObject *incrementalDecoder = PyObject_CallFunction(
988 (PyObject *)&PyIncrementalNewlineDecoder_Type,
989 "Oi", self->decoder, (int)self->readtranslate);
990 if (incrementalDecoder == NULL)
991 goto error;
992 Py_CLEAR(self->decoder);
993 self->decoder = incrementalDecoder;
994 }
995 }
996
997 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 if (res == NULL)
1000 goto error;
1001 r = PyObject_IsTrue(res);
1002 Py_DECREF(res);
1003 if (r == -1)
1004 goto error;
1005 if (r == 1) {
1006 PyObject *ci;
1007 self->encoder = PyCodec_IncrementalEncoder(
1008 encoding, errors);
1009 if (self->encoder == NULL)
1010 goto error;
1011 /* Get the normalized named of the codec */
1012 ci = _PyCodec_Lookup(encoding);
1013 if (ci == NULL)
1014 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001015 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
1024 encodefuncentry *e = encodefuncs;
1025 while (e->name != NULL) {
1026 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
1036 self->buffer = buffer;
1037 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1040 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1041 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001042 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (raw == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (Py_TYPE(raw) == &PyFileIO_Type)
1051 self->raw = raw;
1052 else
1053 Py_DECREF(raw);
1054 }
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 if (res == NULL)
1058 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001059 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001061 if (r < 0)
1062 goto error;
1063 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064
Martin v. Löwis767046a2011-10-14 15:35:36 +02001065 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001066
Antoine Pitroue4501852009-05-14 18:55:55 +00001067 self->encoding_start_of_stream = 0;
1068 if (self->seekable && self->encoder) {
1069 PyObject *cookieObj;
1070 int cmp;
1071
1072 self->encoding_start_of_stream = 1;
1073
1074 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1075 if (cookieObj == NULL)
1076 goto error;
1077
1078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1079 Py_DECREF(cookieObj);
1080 if (cmp < 0) {
1081 goto error;
1082 }
1083
1084 if (cmp == 0) {
1085 self->encoding_start_of_stream = 0;
1086 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1087 _PyIO_zero, NULL);
1088 if (res == NULL)
1089 goto error;
1090 Py_DECREF(res);
1091 }
1092 }
1093
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 self->ok = 1;
1095 return 0;
1096
1097 error:
1098 return -1;
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1105 return -1;
1106 self->ok = 0;
1107 Py_CLEAR(self->buffer);
1108 Py_CLEAR(self->encoding);
1109 Py_CLEAR(self->encoder);
1110 Py_CLEAR(self->decoder);
1111 Py_CLEAR(self->readnl);
1112 Py_CLEAR(self->decoded_chars);
1113 Py_CLEAR(self->pending_bytes);
1114 Py_CLEAR(self->snapshot);
1115 Py_CLEAR(self->errors);
1116 Py_CLEAR(self->raw);
1117 return 0;
1118}
1119
1120static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
Antoine Pitroue033e062010-10-29 10:38:18 +00001123 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 return;
1126 _PyObject_GC_UNTRACK(self);
1127 if (self->weakreflist != NULL)
1128 PyObject_ClearWeakRefs((PyObject *)self);
1129 Py_CLEAR(self->dict);
1130 Py_TYPE(self)->tp_free((PyObject *)self);
1131}
1132
1133static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135{
1136 Py_VISIT(self->buffer);
1137 Py_VISIT(self->encoding);
1138 Py_VISIT(self->encoder);
1139 Py_VISIT(self->decoder);
1140 Py_VISIT(self->readnl);
1141 Py_VISIT(self->decoded_chars);
1142 Py_VISIT(self->pending_bytes);
1143 Py_VISIT(self->snapshot);
1144 Py_VISIT(self->errors);
1145 Py_VISIT(self->raw);
1146
1147 Py_VISIT(self->dict);
1148 return 0;
1149}
1150
1151static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001152textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001153{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return -1;
1156 Py_CLEAR(self->dict);
1157 return 0;
1158}
1159
1160static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001161textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162
1163/* This macro takes some shortcuts to make the common case faster. */
1164#define CHECK_CLOSED(self) \
1165 do { \
1166 int r; \
1167 PyObject *_res; \
1168 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1169 if (self->raw != NULL) \
1170 r = _PyFileIO_closed(self->raw); \
1171 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 if (_res == NULL) \
1174 return NULL; \
1175 r = PyObject_IsTrue(_res); \
1176 Py_DECREF(_res); \
1177 if (r < 0) \
1178 return NULL; \
1179 } \
1180 if (r > 0) { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on closed file."); \
1183 return NULL; \
1184 } \
1185 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001186 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 return NULL; \
1188 } while (0)
1189
1190#define CHECK_INITIALIZED(self) \
1191 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001192 if (self->detached) { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "underlying buffer has been detached"); \
1195 } else { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on uninitialized object"); \
1198 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 return NULL; \
1200 }
1201
1202#define CHECK_INITIALIZED_INT(self) \
1203 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204 if (self->detached) { \
1205 PyErr_SetString(PyExc_ValueError, \
1206 "underlying buffer has been detached"); \
1207 } else { \
1208 PyErr_SetString(PyExc_ValueError, \
1209 "I/O operation on uninitialized object"); \
1210 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 return -1; \
1212 }
1213
1214
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001215static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001217{
1218 PyObject *buffer, *res;
1219 CHECK_INITIALIZED(self);
1220 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1221 if (res == NULL)
1222 return NULL;
1223 Py_DECREF(res);
1224 buffer = self->buffer;
1225 self->buffer = NULL;
1226 self->detached = 1;
1227 self->ok = 0;
1228 return buffer;
1229}
1230
Antoine Pitrou24f36292009-03-28 22:16:42 +00001231/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 underlying buffered object, though. */
1233static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001234_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001236 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237
1238 if (self->pending_bytes == NULL)
1239 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001240
1241 pending = self->pending_bytes;
1242 Py_INCREF(pending);
1243 self->pending_bytes_count = 0;
1244 Py_CLEAR(self->pending_bytes);
1245
1246 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1247 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 if (b == NULL)
1249 return -1;
1250 ret = PyObject_CallMethodObjArgs(self->buffer,
1251 _PyIO_str_write, b, NULL);
1252 Py_DECREF(b);
1253 if (ret == NULL)
1254 return -1;
1255 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001256 return 0;
1257}
1258
1259static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001260textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261{
1262 PyObject *ret;
1263 PyObject *text; /* owned reference */
1264 PyObject *b;
1265 Py_ssize_t textlen;
1266 int haslf = 0;
1267 int needflush = 0;
1268
1269 CHECK_INITIALIZED(self);
1270
1271 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1272 return NULL;
1273 }
1274
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 if (PyUnicode_READY(text) == -1)
1276 return NULL;
1277
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 CHECK_CLOSED(self);
1279
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001280 if (self->encoder == NULL)
1281 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001282
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 Py_INCREF(text);
1284
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
1287 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 haslf = 1;
1290
1291 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292 PyObject *newtext = _PyObject_CallMethodId(
1293 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 Py_DECREF(text);
1295 if (newtext == NULL)
1296 return NULL;
1297 text = newtext;
1298 }
1299
Antoine Pitroue96ec682011-07-23 21:46:35 +02001300 if (self->write_through)
1301 needflush = 1;
1302 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 needflush = 1;
1306
1307 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001308 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001310 self->encoding_start_of_stream = 0;
1311 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 else
1313 b = PyObject_CallMethodObjArgs(self->encoder,
1314 _PyIO_str_encode, text, NULL);
1315 Py_DECREF(text);
1316 if (b == NULL)
1317 return NULL;
1318
1319 if (self->pending_bytes == NULL) {
1320 self->pending_bytes = PyList_New(0);
1321 if (self->pending_bytes == NULL) {
1322 Py_DECREF(b);
1323 return NULL;
1324 }
1325 self->pending_bytes_count = 0;
1326 }
1327 if (PyList_Append(self->pending_bytes, b) < 0) {
1328 Py_DECREF(b);
1329 return NULL;
1330 }
1331 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1332 Py_DECREF(b);
1333 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001334 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335 return NULL;
1336 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001337
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001338 if (needflush) {
1339 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1340 if (ret == NULL)
1341 return NULL;
1342 Py_DECREF(ret);
1343 }
1344
1345 Py_CLEAR(self->snapshot);
1346
1347 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001348 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 if (ret == NULL)
1350 return NULL;
1351 Py_DECREF(ret);
1352 }
1353
1354 return PyLong_FromSsize_t(textlen);
1355}
1356
1357/* Steal a reference to chars and store it in the decoded_char buffer;
1358 */
1359static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001360textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361{
1362 Py_CLEAR(self->decoded_chars);
1363 self->decoded_chars = chars;
1364 self->decoded_chars_used = 0;
1365}
1366
1367static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001368textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369{
1370 PyObject *chars;
1371 Py_ssize_t avail;
1372
1373 if (self->decoded_chars == NULL)
1374 return PyUnicode_FromStringAndSize(NULL, 0);
1375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 /* decoded_chars is guaranteed to be "ready". */
1377 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 - self->decoded_chars_used);
1379
1380 assert(avail >= 0);
1381
1382 if (n < 0 || n > avail)
1383 n = avail;
1384
1385 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001386 chars = PyUnicode_Substring(self->decoded_chars,
1387 self->decoded_chars_used,
1388 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 if (chars == NULL)
1390 return NULL;
1391 }
1392 else {
1393 chars = self->decoded_chars;
1394 Py_INCREF(chars);
1395 }
1396
1397 self->decoded_chars_used += n;
1398 return chars;
1399}
1400
1401/* Read and decode the next chunk of data from the BufferedReader.
1402 */
1403static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001404textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001405{
1406 PyObject *dec_buffer = NULL;
1407 PyObject *dec_flags = NULL;
1408 PyObject *input_chunk = NULL;
1409 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001410 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 int eof;
1412
1413 /* The return value is True unless EOF was reached. The decoded string is
1414 * placed in self._decoded_chars (replacing its previous value). The
1415 * entire input chunk is sent to the decoder, though some of it may remain
1416 * buffered in the decoder, yet to be converted.
1417 */
1418
1419 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001420 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001421 return -1;
1422 }
1423
1424 if (self->telling) {
1425 /* To prepare for tell(), we need to snapshot a point in the file
1426 * where the decoder's input buffer is empty.
1427 */
1428
1429 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1430 _PyIO_str_getstate, NULL);
1431 if (state == NULL)
1432 return -1;
1433 /* Given this, we know there was a valid snapshot point
1434 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1435 */
1436 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1437 Py_DECREF(state);
1438 return -1;
1439 }
1440 Py_INCREF(dec_buffer);
1441 Py_INCREF(dec_flags);
1442 Py_DECREF(state);
1443 }
1444
1445 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001446 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001447 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001448 }
1449 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450 if (chunk_size == NULL)
1451 goto fail;
1452 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001453 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1454 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 Py_DECREF(chunk_size);
1456 if (input_chunk == NULL)
1457 goto fail;
1458 assert(PyBytes_Check(input_chunk));
1459
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001460 nbytes = PyBytes_Size(input_chunk);
1461 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462
1463 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1464 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1465 self->decoder, input_chunk, eof);
1466 }
1467 else {
1468 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1469 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1470 }
1471
1472 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1473 if (decoded_chars == NULL)
1474 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001475 if (PyUnicode_READY(decoded_chars) == -1)
1476 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001477 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001478 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001479 if (nchars > 0)
1480 self->b2cratio = (double) nbytes / nchars;
1481 else
1482 self->b2cratio = 0.0;
1483 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 eof = 0;
1485
1486 if (self->telling) {
1487 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1488 * next input to be decoded is dec_buffer + input_chunk.
1489 */
1490 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1491 if (next_input == NULL)
1492 goto fail;
1493 assert (PyBytes_Check(next_input));
1494 Py_DECREF(dec_buffer);
1495 Py_CLEAR(self->snapshot);
1496 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1497 }
1498 Py_DECREF(input_chunk);
1499
1500 return (eof == 0);
1501
1502 fail:
1503 Py_XDECREF(dec_buffer);
1504 Py_XDECREF(dec_flags);
1505 Py_XDECREF(input_chunk);
1506 return -1;
1507}
1508
1509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511{
1512 Py_ssize_t n = -1;
1513 PyObject *result = NULL, *chunks = NULL;
1514
1515 CHECK_INITIALIZED(self);
1516
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001517 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 return NULL;
1519
1520 CHECK_CLOSED(self);
1521
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001522 if (self->decoder == NULL)
1523 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001524
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001525 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 return NULL;
1527
1528 if (n < 0) {
1529 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001530 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 PyObject *decoded;
1532 if (bytes == NULL)
1533 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001534
1535 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1536 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1537 bytes, 1);
1538 else
1539 decoded = PyObject_CallMethodObjArgs(
1540 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 Py_DECREF(bytes);
1542 if (decoded == NULL)
1543 goto fail;
1544
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001545 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546
1547 if (result == NULL) {
1548 Py_DECREF(decoded);
1549 return NULL;
1550 }
1551
1552 PyUnicode_AppendAndDel(&result, decoded);
1553 if (result == NULL)
1554 goto fail;
1555
1556 Py_CLEAR(self->snapshot);
1557 return result;
1558 }
1559 else {
1560 int res = 1;
1561 Py_ssize_t remaining = n;
1562
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001563 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 if (result == NULL)
1565 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001566 if (PyUnicode_READY(result) == -1)
1567 goto fail;
1568 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569
1570 /* Keep reading chunks until we have n characters to return */
1571 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001572 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001573 if (res < 0) {
1574 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1575 when EINTR occurs so we needn't do it ourselves. */
1576 if (_PyIO_trap_eintr()) {
1577 continue;
1578 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001580 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581 if (res == 0) /* EOF */
1582 break;
1583 if (chunks == NULL) {
1584 chunks = PyList_New(0);
1585 if (chunks == NULL)
1586 goto fail;
1587 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001588 if (PyUnicode_GET_LENGTH(result) > 0 &&
1589 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001590 goto fail;
1591 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001592 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 if (result == NULL)
1594 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001595 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596 }
1597 if (chunks != NULL) {
1598 if (result != NULL && PyList_Append(chunks, result) < 0)
1599 goto fail;
1600 Py_CLEAR(result);
1601 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1602 if (result == NULL)
1603 goto fail;
1604 Py_CLEAR(chunks);
1605 }
1606 return result;
1607 }
1608 fail:
1609 Py_XDECREF(result);
1610 Py_XDECREF(chunks);
1611 return NULL;
1612}
1613
1614
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001615/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 that is to the NUL character. Otherwise the function will produce
1617 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618static char *
1619find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001621 if (kind == PyUnicode_1BYTE_KIND) {
1622 assert(ch < 256);
1623 return (char *) memchr((void *) s, (char) ch, end - s);
1624 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001626 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001627 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 return s;
1630 if (s == end)
1631 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001632 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001633 }
1634}
1635
1636Py_ssize_t
1637_PyIO_find_line_ending(
1638 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001641 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642
1643 if (translated) {
1644 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001647 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 else {
1649 *consumed = len;
1650 return -1;
1651 }
1652 }
1653 else if (universal) {
1654 /* Universal newline search. Find any of \r, \r\n, \n
1655 * The decoder ensures that \r\n are not split in two pieces
1656 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001661 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001663 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664 if (s >= end) {
1665 *consumed = len;
1666 return -1;
1667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001669 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001671 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001674 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001676 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 }
1678 }
1679 }
1680 else {
1681 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1683 char *nl = PyUnicode_DATA(readnl);
1684 /* Assume that readnl is an ASCII character. */
1685 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001687 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001689 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 *consumed = len;
1691 return -1;
1692 }
1693 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001694 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001695 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (e < s)
1698 e = s;
1699 while (s < e) {
1700 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001702 if (pos == NULL || pos >= e)
1703 break;
1704 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001705 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 break;
1707 }
1708 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001709 return (pos - start)/kind + readnl_len;
1710 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001712 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 if (pos == NULL)
1714 *consumed = len;
1715 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001716 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 return -1;
1718 }
1719 }
1720}
1721
1722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001723_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724{
1725 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1726 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1727 int res;
1728
1729 CHECK_CLOSED(self);
1730
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001731 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 return NULL;
1733
1734 chunked = 0;
1735
1736 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001737 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001739 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 Py_ssize_t consumed = 0;
1741
1742 /* First, get some data if necessary */
1743 res = 1;
1744 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001746 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001747 if (res < 0) {
1748 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1749 when EINTR occurs so we needn't do it ourselves. */
1750 if (_PyIO_trap_eintr()) {
1751 continue;
1752 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001754 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755 if (res == 0)
1756 break;
1757 }
1758 if (res == 0) {
1759 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001760 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 Py_CLEAR(self->snapshot);
1762 start = endpos = offset_to_buffer = 0;
1763 break;
1764 }
1765
1766 if (remaining == NULL) {
1767 line = self->decoded_chars;
1768 start = self->decoded_chars_used;
1769 offset_to_buffer = 0;
1770 Py_INCREF(line);
1771 }
1772 else {
1773 assert(self->decoded_chars_used == 0);
1774 line = PyUnicode_Concat(remaining, self->decoded_chars);
1775 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001776 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 Py_CLEAR(remaining);
1778 if (line == NULL)
1779 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001780 if (PyUnicode_READY(line) == -1)
1781 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 }
1783
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001784 ptr = PyUnicode_DATA(line);
1785 line_len = PyUnicode_GET_LENGTH(line);
1786 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787
1788 endpos = _PyIO_find_line_ending(
1789 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001791 ptr + kind * start,
1792 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001793 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 if (endpos >= 0) {
1795 endpos += start;
1796 if (limit >= 0 && (endpos - start) + chunked >= limit)
1797 endpos = start + limit - chunked;
1798 break;
1799 }
1800
1801 /* We can put aside up to `endpos` */
1802 endpos = consumed + start;
1803 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1804 /* Didn't find line ending, but reached length limit */
1805 endpos = start + limit - chunked;
1806 break;
1807 }
1808
1809 if (endpos > start) {
1810 /* No line ending seen yet - put aside current data */
1811 PyObject *s;
1812 if (chunks == NULL) {
1813 chunks = PyList_New(0);
1814 if (chunks == NULL)
1815 goto error;
1816 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 if (s == NULL)
1819 goto error;
1820 if (PyList_Append(chunks, s) < 0) {
1821 Py_DECREF(s);
1822 goto error;
1823 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001824 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 Py_DECREF(s);
1826 }
1827 /* There may be some remaining bytes we'll have to prepend to the
1828 next chunk of data */
1829 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001830 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 if (remaining == NULL)
1832 goto error;
1833 }
1834 Py_CLEAR(line);
1835 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001836 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837 }
1838
1839 if (line != NULL) {
1840 /* Our line ends in the current buffer */
1841 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001842 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1843 PyObject *s = PyUnicode_Substring(line, start, endpos);
1844 Py_CLEAR(line);
1845 if (s == NULL)
1846 goto error;
1847 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 }
1849 }
1850 if (remaining != NULL) {
1851 if (chunks == NULL) {
1852 chunks = PyList_New(0);
1853 if (chunks == NULL)
1854 goto error;
1855 }
1856 if (PyList_Append(chunks, remaining) < 0)
1857 goto error;
1858 Py_CLEAR(remaining);
1859 }
1860 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001861 if (line != NULL) {
1862 if (PyList_Append(chunks, line) < 0)
1863 goto error;
1864 Py_DECREF(line);
1865 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1867 if (line == NULL)
1868 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001869 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001871 if (line == NULL) {
1872 Py_INCREF(_PyIO_empty_str);
1873 line = _PyIO_empty_str;
1874 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001875
1876 return line;
1877
1878 error:
1879 Py_XDECREF(chunks);
1880 Py_XDECREF(remaining);
1881 Py_XDECREF(line);
1882 return NULL;
1883}
1884
1885static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001886textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001887{
1888 Py_ssize_t limit = -1;
1889
1890 CHECK_INITIALIZED(self);
1891 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1892 return NULL;
1893 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001894 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001895}
1896
1897/* Seek and Tell */
1898
1899typedef struct {
1900 Py_off_t start_pos;
1901 int dec_flags;
1902 int bytes_to_feed;
1903 int chars_to_skip;
1904 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001905} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906
1907/*
1908 To speed up cookie packing/unpacking, we store the fields in a temporary
1909 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1910 The following macros define at which offsets in the intermediary byte
1911 string the various CookieStruct fields will be stored.
1912 */
1913
1914#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1915
1916#if defined(WORDS_BIGENDIAN)
1917
1918# define IS_LITTLE_ENDIAN 0
1919
1920/* We want the least significant byte of start_pos to also be the least
1921 significant byte of the cookie, which means that in big-endian mode we
1922 must copy the fields in reverse order. */
1923
1924# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1925# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1926# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1927# define OFF_CHARS_TO_SKIP (sizeof(char))
1928# define OFF_NEED_EOF 0
1929
1930#else
1931
1932# define IS_LITTLE_ENDIAN 1
1933
1934/* Little-endian mode: the least significant byte of start_pos will
1935 naturally end up the least significant byte of the cookie. */
1936
1937# define OFF_START_POS 0
1938# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1939# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1940# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1941# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1942
1943#endif
1944
1945static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001946textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001947{
1948 unsigned char buffer[COOKIE_BUF_LEN];
1949 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1950 if (cookieLong == NULL)
1951 return -1;
1952
1953 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
1954 IS_LITTLE_ENDIAN, 0) < 0) {
1955 Py_DECREF(cookieLong);
1956 return -1;
1957 }
1958 Py_DECREF(cookieLong);
1959
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001960 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1961 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1962 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1963 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1964 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965
1966 return 0;
1967}
1968
1969static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001970textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001971{
1972 unsigned char buffer[COOKIE_BUF_LEN];
1973
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001974 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1975 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1976 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1977 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1978 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001979
1980 return _PyLong_FromByteArray(buffer, sizeof(buffer), IS_LITTLE_ENDIAN, 0);
1981}
1982#undef IS_LITTLE_ENDIAN
1983
1984static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001985_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001986{
1987 PyObject *res;
1988 /* When seeking to the start of the stream, we call decoder.reset()
1989 rather than decoder.getstate().
1990 This is for a few decoders such as utf-16 for which the state value
1991 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1992 utf-16, that we are expecting a BOM).
1993 */
1994 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1995 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1996 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001997 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1998 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001999 if (res == NULL)
2000 return -1;
2001 Py_DECREF(res);
2002 return 0;
2003}
2004
Antoine Pitroue4501852009-05-14 18:55:55 +00002005static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002006_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002007{
2008 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002009 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002010 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2011 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2012 self->encoding_start_of_stream = 1;
2013 }
2014 else {
2015 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2016 _PyIO_zero, NULL);
2017 self->encoding_start_of_stream = 0;
2018 }
2019 if (res == NULL)
2020 return -1;
2021 Py_DECREF(res);
2022 return 0;
2023}
2024
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002026textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002027{
2028 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002029 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002031 PyObject *res;
2032 int cmp;
2033
2034 CHECK_INITIALIZED(self);
2035
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002036 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2037 return NULL;
2038 CHECK_CLOSED(self);
2039
2040 Py_INCREF(cookieObj);
2041
2042 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002043 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 goto fail;
2045 }
2046
2047 if (whence == 1) {
2048 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002049 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002050 if (cmp < 0)
2051 goto fail;
2052
2053 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002054 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002055 goto fail;
2056 }
2057
2058 /* Seeking to the current position should attempt to
2059 * sync the underlying buffer with the current position.
2060 */
2061 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002062 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (cookieObj == NULL)
2064 goto fail;
2065 }
2066 else if (whence == 2) {
2067 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002068 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002069 if (cmp < 0)
2070 goto fail;
2071
2072 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002073 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002074 goto fail;
2075 }
2076
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002077 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002078 if (res == NULL)
2079 goto fail;
2080 Py_DECREF(res);
2081
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002082 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002083 Py_CLEAR(self->snapshot);
2084 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002085 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 if (res == NULL)
2087 goto fail;
2088 Py_DECREF(res);
2089 }
2090
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002091 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002092 Py_XDECREF(cookieObj);
2093 return res;
2094 }
2095 else if (whence != 0) {
2096 PyErr_Format(PyExc_ValueError,
2097 "invalid whence (%d, should be 0, 1 or 2)", whence);
2098 goto fail;
2099 }
2100
Antoine Pitroue4501852009-05-14 18:55:55 +00002101 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002102 if (cmp < 0)
2103 goto fail;
2104
2105 if (cmp == 1) {
2106 PyErr_Format(PyExc_ValueError,
2107 "negative seek position %R", cookieObj);
2108 goto fail;
2109 }
2110
2111 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2112 if (res == NULL)
2113 goto fail;
2114 Py_DECREF(res);
2115
2116 /* The strategy of seek() is to go back to the safe start point
2117 * and replay the effect of read(chars_to_skip) from there.
2118 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002119 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002120 goto fail;
2121
2122 /* Seek back to the safe start point. */
2123 posobj = PyLong_FromOff_t(cookie.start_pos);
2124 if (posobj == NULL)
2125 goto fail;
2126 res = PyObject_CallMethodObjArgs(self->buffer,
2127 _PyIO_str_seek, posobj, NULL);
2128 Py_DECREF(posobj);
2129 if (res == NULL)
2130 goto fail;
2131 Py_DECREF(res);
2132
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002133 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002134 Py_CLEAR(self->snapshot);
2135
2136 /* Restore the decoder to its state from the safe start point. */
2137 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002138 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002139 goto fail;
2140 }
2141
2142 if (cookie.chars_to_skip) {
2143 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002144 PyObject *input_chunk = _PyObject_CallMethodId(
2145 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002146 PyObject *decoded;
2147
2148 if (input_chunk == NULL)
2149 goto fail;
2150
2151 assert (PyBytes_Check(input_chunk));
2152
2153 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2154 if (self->snapshot == NULL) {
2155 Py_DECREF(input_chunk);
2156 goto fail;
2157 }
2158
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002159 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2160 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002161
2162 if (decoded == NULL)
2163 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002164 if (PyUnicode_READY(decoded) == -1) {
2165 Py_DECREF(decoded);
2166 goto fail;
2167 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002168
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002169 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002170
2171 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002172 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002173 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2174 goto fail;
2175 }
2176 self->decoded_chars_used = cookie.chars_to_skip;
2177 }
2178 else {
2179 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2180 if (self->snapshot == NULL)
2181 goto fail;
2182 }
2183
Antoine Pitroue4501852009-05-14 18:55:55 +00002184 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2185 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002186 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002187 goto fail;
2188 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002189 return cookieObj;
2190 fail:
2191 Py_XDECREF(cookieObj);
2192 return NULL;
2193
2194}
2195
2196static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002197textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002198{
2199 PyObject *res;
2200 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002201 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002202 PyObject *next_input;
2203 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002204 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002205 PyObject *saved_state = NULL;
2206 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002207 char *dec_buffer;
2208 Py_ssize_t dec_buffer_len;
2209 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210
2211 CHECK_INITIALIZED(self);
2212 CHECK_CLOSED(self);
2213
2214 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002215 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002216 goto fail;
2217 }
2218 if (!self->telling) {
2219 PyErr_SetString(PyExc_IOError,
2220 "telling position disabled by next() call");
2221 goto fail;
2222 }
2223
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002224 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002225 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002226 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002227 if (res == NULL)
2228 goto fail;
2229 Py_DECREF(res);
2230
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002231 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002232 if (posobj == NULL)
2233 goto fail;
2234
2235 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002236 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002237 return posobj;
2238 }
2239
2240#if defined(HAVE_LARGEFILE_SUPPORT)
2241 cookie.start_pos = PyLong_AsLongLong(posobj);
2242#else
2243 cookie.start_pos = PyLong_AsLong(posobj);
2244#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002245 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002246 if (PyErr_Occurred())
2247 goto fail;
2248
2249 /* Skip backward to the snapshot point (see _read_chunk). */
2250 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2251 goto fail;
2252
2253 assert (PyBytes_Check(next_input));
2254
2255 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2256
2257 /* How many decoded characters have been used up since the snapshot? */
2258 if (self->decoded_chars_used == 0) {
2259 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002260 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002261 }
2262
2263 chars_to_skip = self->decoded_chars_used;
2264
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002265 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002266 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2267 _PyIO_str_getstate, NULL);
2268 if (saved_state == NULL)
2269 goto fail;
2270
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002271#define DECODER_GETSTATE() do { \
2272 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2273 _PyIO_str_getstate, NULL); \
2274 if (_state == NULL) \
2275 goto fail; \
2276 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2277 Py_DECREF(_state); \
2278 goto fail; \
2279 } \
2280 Py_DECREF(_state); \
2281 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002282
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002283 /* TODO: replace assert with exception */
2284#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002285 PyObject *_decoded = _PyObject_CallMethodId( \
2286 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002287 if (_decoded == NULL) \
2288 goto fail; \
2289 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002290 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002291 Py_DECREF(_decoded); \
2292 } while (0)
2293
2294 /* Fast search for an acceptable start point, close to our
2295 current pos */
2296 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2297 skip_back = 1;
2298 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2299 input = PyBytes_AS_STRING(next_input);
2300 while (skip_bytes > 0) {
2301 /* Decode up to temptative start point */
2302 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2303 goto fail;
2304 DECODER_DECODE(input, skip_bytes, chars_decoded);
2305 if (chars_decoded <= chars_to_skip) {
2306 DECODER_GETSTATE();
2307 if (dec_buffer_len == 0) {
2308 /* Before pos and no bytes buffered in decoder => OK */
2309 cookie.dec_flags = dec_flags;
2310 chars_to_skip -= chars_decoded;
2311 break;
2312 }
2313 /* Skip back by buffered amount and reset heuristic */
2314 skip_bytes -= dec_buffer_len;
2315 skip_back = 1;
2316 }
2317 else {
2318 /* We're too far ahead, skip back a bit */
2319 skip_bytes -= skip_back;
2320 skip_back *= 2;
2321 }
2322 }
2323 if (skip_bytes <= 0) {
2324 skip_bytes = 0;
2325 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2326 goto fail;
2327 }
2328
2329 /* Note our initial start point. */
2330 cookie.start_pos += skip_bytes;
2331 cookie.chars_to_skip = chars_to_skip;
2332 if (chars_to_skip == 0)
2333 goto finally;
2334
2335 /* We should be close to the desired position. Now feed the decoder one
2336 * byte at a time until we reach the `chars_to_skip` target.
2337 * As we go, note the nearest "safe start point" before the current
2338 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002339 * can safely start from there and advance to this location).
2340 */
2341 chars_decoded = 0;
2342 input = PyBytes_AS_STRING(next_input);
2343 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002344 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002346 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002348 DECODER_DECODE(input, 1, n);
2349 /* We got n chars for 1 byte */
2350 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002351 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002352 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002353
2354 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2355 /* Decoder buffer is empty, so this is a safe start point. */
2356 cookie.start_pos += cookie.bytes_to_feed;
2357 chars_to_skip -= chars_decoded;
2358 cookie.dec_flags = dec_flags;
2359 cookie.bytes_to_feed = 0;
2360 chars_decoded = 0;
2361 }
2362 if (chars_decoded >= chars_to_skip)
2363 break;
2364 input++;
2365 }
2366 if (input == input_end) {
2367 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002368 PyObject *decoded = _PyObject_CallMethodId(
2369 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002370 if (decoded == NULL)
2371 goto fail;
2372 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002373 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002374 Py_DECREF(decoded);
2375 cookie.need_eof = 1;
2376
2377 if (chars_decoded < chars_to_skip) {
2378 PyErr_SetString(PyExc_IOError,
2379 "can't reconstruct logical file position");
2380 goto fail;
2381 }
2382 }
2383
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002384finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002385 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002386 Py_DECREF(saved_state);
2387 if (res == NULL)
2388 return NULL;
2389 Py_DECREF(res);
2390
2391 /* The returned cookie corresponds to the last safe start point. */
2392 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002393 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002394
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002395fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002396 if (saved_state) {
2397 PyObject *type, *value, *traceback;
2398 PyErr_Fetch(&type, &value, &traceback);
2399
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002400 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002401 Py_DECREF(saved_state);
2402 if (res == NULL)
2403 return NULL;
2404 Py_DECREF(res);
2405
2406 PyErr_Restore(type, value, traceback);
2407 }
2408 return NULL;
2409}
2410
2411static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002412textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002413{
2414 PyObject *pos = Py_None;
2415 PyObject *res;
2416
2417 CHECK_INITIALIZED(self)
2418 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2419 return NULL;
2420 }
2421
2422 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2423 if (res == NULL)
2424 return NULL;
2425 Py_DECREF(res);
2426
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002427 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002428}
2429
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002430static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002431textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002432{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002433 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002434
2435 CHECK_INITIALIZED(self);
2436
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002437 res = PyUnicode_FromString("<_io.TextIOWrapper");
2438 if (res == NULL)
2439 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002440 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002441 if (nameobj == NULL) {
2442 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2443 PyErr_Clear();
2444 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002445 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002446 }
2447 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002448 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002449 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002450 if (s == NULL)
2451 goto error;
2452 PyUnicode_AppendAndDel(&res, s);
2453 if (res == NULL)
2454 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002455 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002456 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002457 if (modeobj == NULL) {
2458 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2459 PyErr_Clear();
2460 else
2461 goto error;
2462 }
2463 else {
2464 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2465 Py_DECREF(modeobj);
2466 if (s == NULL)
2467 goto error;
2468 PyUnicode_AppendAndDel(&res, s);
2469 if (res == NULL)
2470 return NULL;
2471 }
2472 s = PyUnicode_FromFormat("%U encoding=%R>",
2473 res, self->encoding);
2474 Py_DECREF(res);
2475 return s;
2476error:
2477 Py_XDECREF(res);
2478 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002479}
2480
2481
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002482/* Inquiries */
2483
2484static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002485textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002486{
2487 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002488 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002489}
2490
2491static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002492textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002493{
2494 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002495 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002496}
2497
2498static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002499textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002500{
2501 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002502 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002503}
2504
2505static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002506textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002507{
2508 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002509 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002510}
2511
2512static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002513textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002514{
2515 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002516 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002517}
2518
2519static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002520textiowrapper_getstate(textio *self, PyObject *args)
2521{
2522 PyErr_Format(PyExc_TypeError,
2523 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2524 return NULL;
2525}
2526
2527static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002528textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002529{
2530 CHECK_INITIALIZED(self);
2531 CHECK_CLOSED(self);
2532 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002533 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002535 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002536}
2537
2538static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002539textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002540{
2541 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002542 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002543 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002544
Antoine Pitrou6be88762010-05-03 16:48:20 +00002545 res = textiowrapper_closed_get(self, NULL);
2546 if (res == NULL)
2547 return NULL;
2548 r = PyObject_IsTrue(res);
2549 Py_DECREF(res);
2550 if (r < 0)
2551 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002552
Antoine Pitrou6be88762010-05-03 16:48:20 +00002553 if (r > 0) {
2554 Py_RETURN_NONE; /* stream already closed */
2555 }
2556 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002557 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002558 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002559 if (res)
2560 Py_DECREF(res);
2561 else
2562 PyErr_Clear();
2563 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002564 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002565 if (res == NULL) {
2566 return NULL;
2567 }
2568 else
2569 Py_DECREF(res);
2570
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002571 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002572 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002573}
2574
2575static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002576textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002577{
2578 PyObject *line;
2579
2580 CHECK_INITIALIZED(self);
2581
2582 self->telling = 0;
2583 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2584 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002585 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002586 }
2587 else {
2588 line = PyObject_CallMethodObjArgs((PyObject *)self,
2589 _PyIO_str_readline, NULL);
2590 if (line && !PyUnicode_Check(line)) {
2591 PyErr_Format(PyExc_IOError,
2592 "readline() should have returned an str object, "
2593 "not '%.200s'", Py_TYPE(line)->tp_name);
2594 Py_DECREF(line);
2595 return NULL;
2596 }
2597 }
2598
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002599 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002600 return NULL;
2601
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002602 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002603 /* Reached EOF or would have blocked */
2604 Py_DECREF(line);
2605 Py_CLEAR(self->snapshot);
2606 self->telling = self->seekable;
2607 return NULL;
2608 }
2609
2610 return line;
2611}
2612
2613static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002614textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002615{
2616 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002617 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002618}
2619
2620static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002621textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002622{
2623 CHECK_INITIALIZED(self);
2624 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2625}
2626
2627static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002628textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002629{
2630 PyObject *res;
2631 CHECK_INITIALIZED(self);
2632 if (self->decoder == NULL)
2633 Py_RETURN_NONE;
2634 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2635 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002636 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2637 PyErr_Clear();
2638 Py_RETURN_NONE;
2639 }
2640 else {
2641 return NULL;
2642 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002643 }
2644 return res;
2645}
2646
2647static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002648textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002649{
2650 CHECK_INITIALIZED(self);
2651 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2652}
2653
2654static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002655textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002656{
2657 CHECK_INITIALIZED(self);
2658 return PyLong_FromSsize_t(self->chunk_size);
2659}
2660
2661static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002662textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002663{
2664 Py_ssize_t n;
2665 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002666 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002667 if (n == -1 && PyErr_Occurred())
2668 return -1;
2669 if (n <= 0) {
2670 PyErr_SetString(PyExc_ValueError,
2671 "a strictly positive integer is required");
2672 return -1;
2673 }
2674 self->chunk_size = n;
2675 return 0;
2676}
2677
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002678static PyMethodDef textiowrapper_methods[] = {
2679 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2680 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2681 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2682 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2683 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2684 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002685
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002686 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2687 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2688 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2689 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2690 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002691 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002692
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002693 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2694 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2695 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002696 {NULL, NULL}
2697};
2698
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002699static PyMemberDef textiowrapper_members[] = {
2700 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2701 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2702 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703 {NULL}
2704};
2705
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002706static PyGetSetDef textiowrapper_getset[] = {
2707 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2708 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002709/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2710*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002711 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2712 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2713 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2714 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002715 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002716};
2717
2718PyTypeObject PyTextIOWrapper_Type = {
2719 PyVarObject_HEAD_INIT(NULL, 0)
2720 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002721 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002722 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002723 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002724 0, /*tp_print*/
2725 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002726 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002727 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002728 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002729 0, /*tp_as_number*/
2730 0, /*tp_as_sequence*/
2731 0, /*tp_as_mapping*/
2732 0, /*tp_hash */
2733 0, /*tp_call*/
2734 0, /*tp_str*/
2735 0, /*tp_getattro*/
2736 0, /*tp_setattro*/
2737 0, /*tp_as_buffer*/
2738 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2739 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002740 textiowrapper_doc, /* tp_doc */
2741 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2742 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002743 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002744 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002745 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002746 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2747 textiowrapper_methods, /* tp_methods */
2748 textiowrapper_members, /* tp_members */
2749 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750 0, /* tp_base */
2751 0, /* tp_dict */
2752 0, /* tp_descr_get */
2753 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002754 offsetof(textio, dict), /*tp_dictoffset*/
2755 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002756 0, /* tp_alloc */
2757 PyType_GenericNew, /* tp_new */
2758};