blob: 8344d43767e6e32469d72e50c58ba11ef67521d3 [file] [log] [blame]
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001/*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
Antoine Pitrou24f36292009-03-28 22:16:42 +00003
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00004 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
Antoine Pitrou24f36292009-03-28 22:16:42 +00005
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00006 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7*/
8
9#define PY_SSIZE_T_CLEAN
10#include "Python.h"
11#include "structmember.h"
12#include "_iomodule.h"
13
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020014_Py_IDENTIFIER(close);
15_Py_IDENTIFIER(_dealloc_warn);
16_Py_IDENTIFIER(decode);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020017_Py_IDENTIFIER(fileno);
18_Py_IDENTIFIER(flush);
19_Py_IDENTIFIER(getpreferredencoding);
20_Py_IDENTIFIER(isatty);
Martin v. Löwis767046a2011-10-14 15:35:36 +020021_Py_IDENTIFIER(mode);
22_Py_IDENTIFIER(name);
23_Py_IDENTIFIER(raw);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020024_Py_IDENTIFIER(read);
Martin v. Löwis767046a2011-10-14 15:35:36 +020025_Py_IDENTIFIER(read1);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +020026_Py_IDENTIFIER(readable);
27_Py_IDENTIFIER(replace);
28_Py_IDENTIFIER(reset);
29_Py_IDENTIFIER(seek);
30_Py_IDENTIFIER(seekable);
31_Py_IDENTIFIER(setstate);
32_Py_IDENTIFIER(tell);
33_Py_IDENTIFIER(writable);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +020034
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000035/* TextIOBase */
36
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000037PyDoc_STRVAR(textiobase_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000038 "Base class for text I/O.\n"
39 "\n"
40 "This class provides a character and line based interface to stream\n"
41 "I/O. There is no readinto method because Python's character strings\n"
42 "are immutable. There is no public constructor.\n"
43 );
44
45static PyObject *
46_unsupported(const char *message)
47{
48 PyErr_SetString(IO_STATE->unsupported_operation, message);
49 return NULL;
50}
51
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000052PyDoc_STRVAR(textiobase_detach_doc,
Benjamin Petersond2e0c792009-05-01 20:40:59 +000053 "Separate the underlying buffer from the TextIOBase and return it.\n"
54 "\n"
55 "After the underlying buffer has been detached, the TextIO is in an\n"
56 "unusable state.\n"
57 );
58
59static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000060textiobase_detach(PyObject *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +000061{
62 return _unsupported("detach");
63}
64
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000065PyDoc_STRVAR(textiobase_read_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000066 "Read at most n characters from stream.\n"
67 "\n"
68 "Read from underlying buffer until we have n characters or we hit EOF.\n"
69 "If n is negative or omitted, read until EOF.\n"
70 );
71
72static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000073textiobase_read(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000074{
75 return _unsupported("read");
76}
77
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000078PyDoc_STRVAR(textiobase_readline_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000079 "Read until newline or EOF.\n"
80 "\n"
81 "Returns an empty string if EOF is hit immediately.\n"
82 );
83
84static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000085textiobase_readline(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000086{
87 return _unsupported("readline");
88}
89
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000090PyDoc_STRVAR(textiobase_write_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000091 "Write string to stream.\n"
92 "Returns the number of characters written (which is always equal to\n"
93 "the length of the string).\n"
94 );
95
96static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +000097textiobase_write(PyObject *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +000098{
99 return _unsupported("write");
100}
101
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000102PyDoc_STRVAR(textiobase_encoding_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000103 "Encoding of the text stream.\n"
104 "\n"
105 "Subclasses should override.\n"
106 );
107
108static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000109textiobase_encoding_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000110{
111 Py_RETURN_NONE;
112}
113
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000114PyDoc_STRVAR(textiobase_newlines_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000115 "Line endings translated so far.\n"
116 "\n"
117 "Only line endings translated during reading are considered.\n"
118 "\n"
119 "Subclasses should override.\n"
120 );
121
122static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000123textiobase_newlines_get(PyObject *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000124{
125 Py_RETURN_NONE;
126}
127
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000128PyDoc_STRVAR(textiobase_errors_doc,
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000129 "The error setting of the decoder or encoder.\n"
130 "\n"
131 "Subclasses should override.\n"
132 );
133
134static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000135textiobase_errors_get(PyObject *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +0000136{
137 Py_RETURN_NONE;
138}
139
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000140
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000141static PyMethodDef textiobase_methods[] = {
142 {"detach", (PyCFunction)textiobase_detach, METH_NOARGS, textiobase_detach_doc},
143 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
144 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
145 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000146 {NULL, NULL}
147};
148
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000149static PyGetSetDef textiobase_getset[] = {
150 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
151 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
152 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000153 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000154};
155
156PyTypeObject PyTextIOBase_Type = {
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "_io._TextIOBase", /*tp_name*/
159 0, /*tp_basicsize*/
160 0, /*tp_itemsize*/
161 0, /*tp_dealloc*/
162 0, /*tp_print*/
163 0, /*tp_getattr*/
164 0, /*tp_setattr*/
165 0, /*tp_compare */
166 0, /*tp_repr*/
167 0, /*tp_as_number*/
168 0, /*tp_as_sequence*/
169 0, /*tp_as_mapping*/
170 0, /*tp_hash */
171 0, /*tp_call*/
172 0, /*tp_str*/
173 0, /*tp_getattro*/
174 0, /*tp_setattro*/
175 0, /*tp_as_buffer*/
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000177 textiobase_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000178 0, /* tp_traverse */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
183 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000184 textiobase_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000185 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000186 textiobase_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000187 &PyIOBase_Type, /* tp_base */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
193 0, /* tp_alloc */
194 0, /* tp_new */
195};
196
197
198/* IncrementalNewlineDecoder */
199
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000200PyDoc_STRVAR(incrementalnewlinedecoder_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000201 "Codec used when reading a file in universal newlines mode. It wraps\n"
202 "another incremental decoder, translating \\r\\n and \\r into \\n. It also\n"
203 "records the types of newlines encountered. When used with\n"
204 "translate=False, it ensures that the newline sequence is returned in\n"
205 "one piece. When used with decoder=None, it expects unicode strings as\n"
206 "decode input and translates newlines without first invoking an external\n"
207 "decoder.\n"
208 );
209
210typedef struct {
211 PyObject_HEAD
212 PyObject *decoder;
213 PyObject *errors;
Antoine Pitrouca767bd2009-09-21 21:37:02 +0000214 signed int pendingcr: 1;
215 signed int translate: 1;
216 unsigned int seennl: 3;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000217} nldecoder_object;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000218
219static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000220incrementalnewlinedecoder_init(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000221 PyObject *args, PyObject *kwds)
222{
223 PyObject *decoder;
224 int translate;
225 PyObject *errors = NULL;
226 char *kwlist[] = {"decoder", "translate", "errors", NULL};
227
228 if (!PyArg_ParseTupleAndKeywords(args, kwds, "Oi|O:IncrementalNewlineDecoder",
229 kwlist, &decoder, &translate, &errors))
230 return -1;
231
232 self->decoder = decoder;
233 Py_INCREF(decoder);
234
235 if (errors == NULL) {
236 self->errors = PyUnicode_FromString("strict");
237 if (self->errors == NULL)
238 return -1;
239 }
240 else {
241 Py_INCREF(errors);
242 self->errors = errors;
243 }
244
245 self->translate = translate;
246 self->seennl = 0;
247 self->pendingcr = 0;
248
249 return 0;
250}
251
252static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000253incrementalnewlinedecoder_dealloc(nldecoder_object *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000254{
255 Py_CLEAR(self->decoder);
256 Py_CLEAR(self->errors);
257 Py_TYPE(self)->tp_free((PyObject *)self);
258}
259
260#define SEEN_CR 1
261#define SEEN_LF 2
262#define SEEN_CRLF 4
263#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
264
265PyObject *
Antoine Pitrou24f36292009-03-28 22:16:42 +0000266_PyIncrementalNewlineDecoder_decode(PyObject *_self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000267 PyObject *input, int final)
268{
269 PyObject *output;
270 Py_ssize_t output_len;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000271 nldecoder_object *self = (nldecoder_object *) _self;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000272
273 if (self->decoder == NULL) {
274 PyErr_SetString(PyExc_ValueError,
275 "IncrementalNewlineDecoder.__init__ not called");
276 return NULL;
277 }
278
279 /* decode input (with the eventual \r from a previous pass) */
280 if (self->decoder != Py_None) {
281 output = PyObject_CallMethodObjArgs(self->decoder,
282 _PyIO_str_decode, input, final ? Py_True : Py_False, NULL);
283 }
284 else {
285 output = input;
286 Py_INCREF(output);
287 }
288
289 if (output == NULL)
290 return NULL;
291
292 if (!PyUnicode_Check(output)) {
293 PyErr_SetString(PyExc_TypeError,
294 "decoder should return a string result");
295 goto error;
296 }
297
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200298 if (PyUnicode_READY(output) == -1)
299 goto error;
300
301 output_len = PyUnicode_GET_LENGTH(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000302 if (self->pendingcr && (final || output_len > 0)) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200303 /* Prefix output with CR */
304 int kind;
305 PyObject *modified;
306 char *out;
307
308 modified = PyUnicode_New(output_len + 1,
309 PyUnicode_MAX_CHAR_VALUE(output));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000310 if (modified == NULL)
311 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200312 kind = PyUnicode_KIND(modified);
313 out = PyUnicode_DATA(modified);
314 PyUnicode_WRITE(kind, PyUnicode_DATA(modified), 0, '\r');
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200315 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000316 Py_DECREF(output);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200317 output = modified; /* output remains ready */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000318 self->pendingcr = 0;
319 output_len++;
320 }
321
322 /* retain last \r even when not translating data:
323 * then readline() is sure to get \r\n in one pass
324 */
325 if (!final) {
Antoine Pitrou24f36292009-03-28 22:16:42 +0000326 if (output_len > 0
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200327 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
328 {
329 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
330 if (modified == NULL)
331 goto error;
332 Py_DECREF(output);
333 output = modified;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000334 self->pendingcr = 1;
335 }
336 }
337
338 /* Record which newlines are read and do newline translation if desired,
339 all in one pass. */
340 {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200341 void *in_str;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000342 Py_ssize_t len;
343 int seennl = self->seennl;
344 int only_lf = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200345 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000346
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200347 in_str = PyUnicode_DATA(output);
348 len = PyUnicode_GET_LENGTH(output);
349 kind = PyUnicode_KIND(output);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000350
351 if (len == 0)
352 return output;
353
354 /* If, up to now, newlines are consistently \n, do a quick check
355 for the \r *byte* with the libc's optimized memchr.
356 */
357 if (seennl == SEEN_LF || seennl == 0) {
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200358 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000359 }
360
Antoine Pitrou66913e22009-03-06 23:40:56 +0000361 if (only_lf) {
362 /* If not already seen, quick scan for a possible "\n" character.
363 (there's nothing else to be done, even when in translation mode)
364 */
365 if (seennl == 0 &&
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200366 memchr(in_str, '\n', kind * len) != NULL) {
Antoine Pitrouc28e2e52011-11-13 03:53:42 +0100367 if (kind == PyUnicode_1BYTE_KIND)
368 seennl |= SEEN_LF;
369 else {
370 Py_ssize_t i = 0;
371 for (;;) {
372 Py_UCS4 c;
373 /* Fast loop for non-control characters */
374 while (PyUnicode_READ(kind, in_str, i) > '\n')
375 i++;
376 c = PyUnicode_READ(kind, in_str, i++);
377 if (c == '\n') {
378 seennl |= SEEN_LF;
379 break;
380 }
381 if (i >= len)
382 break;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000383 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000384 }
385 }
386 /* Finished: we have scanned for newlines, and none of them
387 need translating */
388 }
389 else if (!self->translate) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200390 Py_ssize_t i = 0;
Antoine Pitrou66913e22009-03-06 23:40:56 +0000391 /* We have already seen all newline types, no need to scan again */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000392 if (seennl == SEEN_ALL)
393 goto endscan;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000394 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200395 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000396 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200397 while (PyUnicode_READ(kind, in_str, i) > '\r')
398 i++;
399 c = PyUnicode_READ(kind, in_str, i++);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000400 if (c == '\n')
401 seennl |= SEEN_LF;
402 else if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200403 if (PyUnicode_READ(kind, in_str, i) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000404 seennl |= SEEN_CRLF;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200405 i++;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000406 }
407 else
408 seennl |= SEEN_CR;
409 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200410 if (i >= len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000411 break;
412 if (seennl == SEEN_ALL)
413 break;
414 }
415 endscan:
416 ;
417 }
Antoine Pitrou66913e22009-03-06 23:40:56 +0000418 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200419 void *translated;
420 int kind = PyUnicode_KIND(output);
421 void *in_str = PyUnicode_DATA(output);
422 Py_ssize_t in, out;
423 /* XXX: Previous in-place translation here is disabled as
424 resizing is not possible anymore */
425 /* We could try to optimize this so that we only do a copy
426 when there is something to translate. On the other hand,
427 we already know there is a \r byte, so chances are high
428 that something needs to be done. */
Martin v. Löwisc47adb02011-10-07 20:55:35 +0200429 translated = PyMem_Malloc(kind * len);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200430 if (translated == NULL) {
431 PyErr_NoMemory();
432 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000433 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200434 in = out = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000435 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200436 Py_UCS4 c;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000437 /* Fast loop for non-control characters */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200438 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
439 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000440 if (c == '\n') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200441 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000442 seennl |= SEEN_LF;
443 continue;
444 }
445 if (c == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200446 if (PyUnicode_READ(kind, in_str, in) == '\n') {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000447 in++;
448 seennl |= SEEN_CRLF;
449 }
450 else
451 seennl |= SEEN_CR;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200452 PyUnicode_WRITE(kind, translated, out++, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000453 continue;
454 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200455 if (in > len)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000456 break;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200457 PyUnicode_WRITE(kind, translated, out++, c);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000458 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200459 Py_DECREF(output);
460 output = PyUnicode_FromKindAndData(kind, translated, out);
Antoine Pitrouc1b0bfd2011-11-12 22:34:28 +0100461 PyMem_Free(translated);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200462 if (!output)
Ross Lagerwall0f9eec12012-04-07 07:09:57 +0200463 return NULL;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000464 }
465 self->seennl |= seennl;
466 }
467
468 return output;
469
470 error:
471 Py_DECREF(output);
472 return NULL;
473}
474
475static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000476incrementalnewlinedecoder_decode(nldecoder_object *self,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000477 PyObject *args, PyObject *kwds)
478{
479 char *kwlist[] = {"input", "final", NULL};
480 PyObject *input;
481 int final = 0;
482
483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|i:IncrementalNewlineDecoder",
484 kwlist, &input, &final))
485 return NULL;
486 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
487}
488
489static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000490incrementalnewlinedecoder_getstate(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000491{
492 PyObject *buffer;
493 unsigned PY_LONG_LONG flag;
494
495 if (self->decoder != Py_None) {
496 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
497 _PyIO_str_getstate, NULL);
498 if (state == NULL)
499 return NULL;
500 if (!PyArg_Parse(state, "(OK)", &buffer, &flag)) {
501 Py_DECREF(state);
502 return NULL;
503 }
504 Py_INCREF(buffer);
505 Py_DECREF(state);
506 }
507 else {
508 buffer = PyBytes_FromString("");
509 flag = 0;
510 }
511 flag <<= 1;
512 if (self->pendingcr)
513 flag |= 1;
514 return Py_BuildValue("NK", buffer, flag);
515}
516
517static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000518incrementalnewlinedecoder_setstate(nldecoder_object *self, PyObject *state)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000519{
520 PyObject *buffer;
521 unsigned PY_LONG_LONG flag;
522
523 if (!PyArg_Parse(state, "(OK)", &buffer, &flag))
524 return NULL;
525
526 self->pendingcr = (int) flag & 1;
527 flag >>= 1;
528
529 if (self->decoder != Py_None)
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200530 return _PyObject_CallMethodId(self->decoder,
531 &PyId_setstate, "((OK))", buffer, flag);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000532 else
533 Py_RETURN_NONE;
534}
535
536static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000537incrementalnewlinedecoder_reset(nldecoder_object *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000538{
539 self->seennl = 0;
540 self->pendingcr = 0;
541 if (self->decoder != Py_None)
542 return PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
543 else
544 Py_RETURN_NONE;
545}
546
547static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000548incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000549{
550 switch (self->seennl) {
551 case SEEN_CR:
552 return PyUnicode_FromString("\r");
553 case SEEN_LF:
554 return PyUnicode_FromString("\n");
555 case SEEN_CRLF:
556 return PyUnicode_FromString("\r\n");
557 case SEEN_CR | SEEN_LF:
558 return Py_BuildValue("ss", "\r", "\n");
559 case SEEN_CR | SEEN_CRLF:
560 return Py_BuildValue("ss", "\r", "\r\n");
561 case SEEN_LF | SEEN_CRLF:
562 return Py_BuildValue("ss", "\n", "\r\n");
563 case SEEN_CR | SEEN_LF | SEEN_CRLF:
564 return Py_BuildValue("sss", "\r", "\n", "\r\n");
565 default:
566 Py_RETURN_NONE;
567 }
568
569}
570
571
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000572static PyMethodDef incrementalnewlinedecoder_methods[] = {
573 {"decode", (PyCFunction)incrementalnewlinedecoder_decode, METH_VARARGS|METH_KEYWORDS},
574 {"getstate", (PyCFunction)incrementalnewlinedecoder_getstate, METH_NOARGS},
575 {"setstate", (PyCFunction)incrementalnewlinedecoder_setstate, METH_O},
576 {"reset", (PyCFunction)incrementalnewlinedecoder_reset, METH_NOARGS},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000577 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000578};
579
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000580static PyGetSetDef incrementalnewlinedecoder_getset[] = {
581 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +0000582 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000583};
584
585PyTypeObject PyIncrementalNewlineDecoder_Type = {
586 PyVarObject_HEAD_INIT(NULL, 0)
587 "_io.IncrementalNewlineDecoder", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000588 sizeof(nldecoder_object), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000589 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000590 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000591 0, /*tp_print*/
592 0, /*tp_getattr*/
593 0, /*tp_setattr*/
594 0, /*tp_compare */
595 0, /*tp_repr*/
596 0, /*tp_as_number*/
597 0, /*tp_as_sequence*/
598 0, /*tp_as_mapping*/
599 0, /*tp_hash */
600 0, /*tp_call*/
601 0, /*tp_str*/
602 0, /*tp_getattro*/
603 0, /*tp_setattro*/
604 0, /*tp_as_buffer*/
605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000606 incrementalnewlinedecoder_doc, /* tp_doc */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000607 0, /* tp_traverse */
608 0, /* tp_clear */
609 0, /* tp_richcompare */
610 0, /*tp_weaklistoffset*/
611 0, /* tp_iter */
612 0, /* tp_iternext */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000613 incrementalnewlinedecoder_methods, /* tp_methods */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000614 0, /* tp_members */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000615 incrementalnewlinedecoder_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000616 0, /* tp_base */
617 0, /* tp_dict */
618 0, /* tp_descr_get */
619 0, /* tp_descr_set */
620 0, /* tp_dictoffset */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000621 (initproc)incrementalnewlinedecoder_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000622 0, /* tp_alloc */
623 PyType_GenericNew, /* tp_new */
624};
625
626
627/* TextIOWrapper */
628
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000629PyDoc_STRVAR(textiowrapper_doc,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000630 "Character and line based layer over a BufferedIOBase object, buffer.\n"
631 "\n"
632 "encoding gives the name of the encoding that the stream will be\n"
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200633 "decoded or encoded with. It defaults to locale.getpreferredencoding(False).\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000634 "\n"
635 "errors determines the strictness of encoding and decoding (see the\n"
636 "codecs.register) and defaults to \"strict\".\n"
637 "\n"
Antoine Pitrou0c1c0d42012-08-04 00:55:38 +0200638 "newline controls how line endings are handled. It can be None, '',\n"
639 "'\\n', '\\r', and '\\r\\n'. It works as follows:\n"
640 "\n"
641 "* On input, if newline is None, universal newlines mode is\n"
642 " enabled. Lines in the input can end in '\\n', '\\r', or '\\r\\n', and\n"
643 " these are translated into '\\n' before being returned to the\n"
644 " caller. If it is '', universal newline mode is enabled, but line\n"
645 " endings are returned to the caller untranslated. If it has any of\n"
646 " the other legal values, input lines are only terminated by the given\n"
647 " string, and the line ending is returned to the caller untranslated.\n"
648 "\n"
649 "* On output, if newline is None, any '\\n' characters written are\n"
650 " translated to the system default line separator, os.linesep. If\n"
Ezio Melotti16d2b472012-09-18 07:20:18 +0300651 " newline is '' or '\\n', no translation takes place. If newline is any\n"
Victor Stinner401e17d2012-08-04 01:18:56 +0200652 " of the other legal values, any '\\n' characters written are translated\n"
653 " to the given string.\n"
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000654 "\n"
655 "If line_buffering is True, a call to flush is implied when a call to\n"
656 "write contains a newline character."
657 );
658
659typedef PyObject *
660 (*encodefunc_t)(PyObject *, PyObject *);
661
662typedef struct
663{
664 PyObject_HEAD
665 int ok; /* initialized? */
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000666 int detached;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000667 Py_ssize_t chunk_size;
668 PyObject *buffer;
669 PyObject *encoding;
670 PyObject *encoder;
671 PyObject *decoder;
672 PyObject *readnl;
673 PyObject *errors;
674 const char *writenl; /* utf-8 encoded, NULL stands for \n */
675 char line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200676 char write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000677 char readuniversal;
678 char readtranslate;
679 char writetranslate;
680 char seekable;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200681 char has_read1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000682 char telling;
Antoine Pitroue033e062010-10-29 10:38:18 +0000683 char deallocating;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000684 /* Specialized encoding func (see below) */
685 encodefunc_t encodefunc;
Antoine Pitroue4501852009-05-14 18:55:55 +0000686 /* Whether or not it's the start of the stream */
687 char encoding_start_of_stream;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000688
689 /* Reads and writes are internally buffered in order to speed things up.
690 However, any read will first flush the write buffer if itsn't empty.
Antoine Pitrou24f36292009-03-28 22:16:42 +0000691
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000692 Please also note that text to be written is first encoded before being
693 buffered. This is necessary so that encoding errors are immediately
694 reported to the caller, but it unfortunately means that the
695 IncrementalEncoder (whose encode() method is always written in Python)
696 becomes a bottleneck for small writes.
697 */
698 PyObject *decoded_chars; /* buffer for text returned from decoder */
699 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
700 PyObject *pending_bytes; /* list of bytes objects waiting to be
701 written, or NULL */
702 Py_ssize_t pending_bytes_count;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000703
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000704 /* snapshot is either None, or a tuple (dec_flags, next_input) where
705 * dec_flags is the second (integer) item of the decoder state and
706 * next_input is the chunk of input bytes that comes next after the
707 * snapshot point. We use this to reconstruct decoder states in tell().
708 */
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000709 PyObject *snapshot;
710 /* Bytes-to-characters ratio for the current chunk. Serves as input for
711 the heuristic in tell(). */
712 double b2cratio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000713
714 /* Cache raw object if it's a FileIO object */
715 PyObject *raw;
716
717 PyObject *weakreflist;
718 PyObject *dict;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000719} textio;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000720
721
722/* A couple of specialized cases in order to bypass the slow incremental
723 encoding methods for the most popular encodings. */
724
725static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000726ascii_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000727{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200728 return _PyUnicode_AsASCIIString(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000729}
730
731static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000732utf16be_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000733{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100734 return _PyUnicode_EncodeUTF16(text,
735 PyBytes_AS_STRING(self->errors), 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000736}
737
738static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000739utf16le_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000740{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100741 return _PyUnicode_EncodeUTF16(text,
742 PyBytes_AS_STRING(self->errors), -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000743}
744
745static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000746utf16_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000747{
Antoine Pitroue4501852009-05-14 18:55:55 +0000748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200750#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000751 return utf16be_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000752#else
Antoine Pitroue4501852009-05-14 18:55:55 +0000753 return utf16le_encode(self, text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000754#endif
Antoine Pitroue4501852009-05-14 18:55:55 +0000755 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100756 return _PyUnicode_EncodeUTF16(text,
757 PyBytes_AS_STRING(self->errors), 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000758}
759
Antoine Pitroue4501852009-05-14 18:55:55 +0000760static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000761utf32be_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000762{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100763 return _PyUnicode_EncodeUTF32(text,
764 PyBytes_AS_STRING(self->errors), 1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000765}
766
767static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000768utf32le_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000769{
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100770 return _PyUnicode_EncodeUTF32(text,
771 PyBytes_AS_STRING(self->errors), -1);
Antoine Pitroue4501852009-05-14 18:55:55 +0000772}
773
774static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000775utf32_encode(textio *self, PyObject *text)
Antoine Pitroue4501852009-05-14 18:55:55 +0000776{
777 if (!self->encoding_start_of_stream) {
778 /* Skip the BOM and use native byte ordering */
Christian Heimes743e0cd2012-10-17 23:52:17 +0200779#if PY_BIG_ENDIAN
Antoine Pitroue4501852009-05-14 18:55:55 +0000780 return utf32be_encode(self, text);
781#else
782 return utf32le_encode(self, text);
783#endif
784 }
Antoine Pitrou5c398e82011-11-13 04:11:37 +0100785 return _PyUnicode_EncodeUTF32(text,
786 PyBytes_AS_STRING(self->errors), 0);
Antoine Pitroue4501852009-05-14 18:55:55 +0000787}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000788
789static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000790utf8_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000791{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200792 return _PyUnicode_AsUTF8String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000793}
794
795static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000796latin1_encode(textio *self, PyObject *text)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000797{
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200798 return _PyUnicode_AsLatin1String(text, PyBytes_AS_STRING(self->errors));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000799}
800
801/* Map normalized encoding names onto the specialized encoding funcs */
802
803typedef struct {
804 const char *name;
805 encodefunc_t encodefunc;
806} encodefuncentry;
807
Antoine Pitrou24f36292009-03-28 22:16:42 +0000808static encodefuncentry encodefuncs[] = {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000809 {"ascii", (encodefunc_t) ascii_encode},
810 {"iso8859-1", (encodefunc_t) latin1_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000811 {"utf-8", (encodefunc_t) utf8_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000812 {"utf-16-be", (encodefunc_t) utf16be_encode},
813 {"utf-16-le", (encodefunc_t) utf16le_encode},
814 {"utf-16", (encodefunc_t) utf16_encode},
Antoine Pitroue4501852009-05-14 18:55:55 +0000815 {"utf-32-be", (encodefunc_t) utf32be_encode},
816 {"utf-32-le", (encodefunc_t) utf32le_encode},
817 {"utf-32", (encodefunc_t) utf32_encode},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000818 {NULL, NULL}
819};
820
821
822static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +0000823textiowrapper_init(textio *self, PyObject *args, PyObject *kwds)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000824{
825 char *kwlist[] = {"buffer", "encoding", "errors",
Antoine Pitroue96ec682011-07-23 21:46:35 +0200826 "newline", "line_buffering", "write_through",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000827 NULL};
828 PyObject *buffer, *raw;
829 char *encoding = NULL;
830 char *errors = NULL;
831 char *newline = NULL;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200832 int line_buffering = 0, write_through = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000833 _PyIO_State *state = IO_STATE;
834
835 PyObject *res;
836 int r;
837
838 self->ok = 0;
Benjamin Petersond2e0c792009-05-01 20:40:59 +0000839 self->detached = 0;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200840 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzii:fileio",
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000841 kwlist, &buffer, &encoding, &errors,
Antoine Pitroue96ec682011-07-23 21:46:35 +0200842 &newline, &line_buffering, &write_through))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000843 return -1;
844
845 if (newline && newline[0] != '\0'
846 && !(newline[0] == '\n' && newline[1] == '\0')
847 && !(newline[0] == '\r' && newline[1] == '\0')
848 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
849 PyErr_Format(PyExc_ValueError,
850 "illegal newline value: %s", newline);
851 return -1;
852 }
853
854 Py_CLEAR(self->buffer);
855 Py_CLEAR(self->encoding);
856 Py_CLEAR(self->encoder);
857 Py_CLEAR(self->decoder);
858 Py_CLEAR(self->readnl);
859 Py_CLEAR(self->decoded_chars);
860 Py_CLEAR(self->pending_bytes);
861 Py_CLEAR(self->snapshot);
862 Py_CLEAR(self->errors);
863 Py_CLEAR(self->raw);
864 self->decoded_chars_used = 0;
865 self->pending_bytes_count = 0;
866 self->encodefunc = NULL;
Antoine Pitrou211b81d2011-02-25 20:27:33 +0000867 self->b2cratio = 0.0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000868
869 if (encoding == NULL) {
870 /* Try os.device_encoding(fileno) */
871 PyObject *fileno;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200872 fileno = _PyObject_CallMethodId(buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000873 /* Ignore only AttributeError and UnsupportedOperation */
874 if (fileno == NULL) {
875 if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
876 PyErr_ExceptionMatches(state->unsupported_operation)) {
877 PyErr_Clear();
878 }
879 else {
880 goto error;
881 }
882 }
883 else {
Brett Cannonefb00c02012-02-29 18:31:31 -0500884 int fd = (int) PyLong_AsLong(fileno);
885 Py_DECREF(fileno);
886 if (fd == -1 && PyErr_Occurred()) {
887 goto error;
888 }
889
890 self->encoding = _Py_device_encoding(fd);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000891 if (self->encoding == NULL)
892 goto error;
893 else if (!PyUnicode_Check(self->encoding))
894 Py_CLEAR(self->encoding);
895 }
896 }
897 if (encoding == NULL && self->encoding == NULL) {
898 if (state->locale_module == NULL) {
899 state->locale_module = PyImport_ImportModule("locale");
900 if (state->locale_module == NULL)
901 goto catch_ImportError;
902 else
903 goto use_locale;
904 }
905 else {
906 use_locale:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200907 self->encoding = _PyObject_CallMethodId(
Victor Stinnerf86a5e82012-06-05 13:43:22 +0200908 state->locale_module, &PyId_getpreferredencoding, "O", Py_False);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000909 if (self->encoding == NULL) {
910 catch_ImportError:
911 /*
912 Importing locale can raise a ImportError because of
913 _functools, and locale.getpreferredencoding can raise a
914 ImportError if _locale is not available. These will happen
915 during module building.
916 */
917 if (PyErr_ExceptionMatches(PyExc_ImportError)) {
918 PyErr_Clear();
919 self->encoding = PyUnicode_FromString("ascii");
920 }
921 else
922 goto error;
923 }
924 else if (!PyUnicode_Check(self->encoding))
925 Py_CLEAR(self->encoding);
926 }
927 }
Victor Stinnerf6c57832010-05-19 01:17:01 +0000928 if (self->encoding != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000929 encoding = _PyUnicode_AsString(self->encoding);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000930 if (encoding == NULL)
931 goto error;
932 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000933 else if (encoding != NULL) {
934 self->encoding = PyUnicode_FromString(encoding);
935 if (self->encoding == NULL)
936 goto error;
937 }
938 else {
939 PyErr_SetString(PyExc_IOError,
940 "could not determine default encoding");
941 }
942
943 if (errors == NULL)
944 errors = "strict";
945 self->errors = PyBytes_FromString(errors);
946 if (self->errors == NULL)
947 goto error;
948
949 self->chunk_size = 8192;
950 self->readuniversal = (newline == NULL || newline[0] == '\0');
951 self->line_buffering = line_buffering;
Antoine Pitroue96ec682011-07-23 21:46:35 +0200952 self->write_through = write_through;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000953 self->readtranslate = (newline == NULL);
954 if (newline) {
955 self->readnl = PyUnicode_FromString(newline);
956 if (self->readnl == NULL)
957 return -1;
958 }
959 self->writetranslate = (newline == NULL || newline[0] != '\0');
960 if (!self->readuniversal && self->readnl) {
961 self->writenl = _PyUnicode_AsString(self->readnl);
Victor Stinnerf6c57832010-05-19 01:17:01 +0000962 if (self->writenl == NULL)
963 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000964 if (!strcmp(self->writenl, "\n"))
965 self->writenl = NULL;
966 }
967#ifdef MS_WINDOWS
968 else
969 self->writenl = "\r\n";
970#endif
971
972 /* Build the decoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200973 res = _PyObject_CallMethodId(buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000974 if (res == NULL)
975 goto error;
976 r = PyObject_IsTrue(res);
977 Py_DECREF(res);
978 if (r == -1)
979 goto error;
980 if (r == 1) {
981 self->decoder = PyCodec_IncrementalDecoder(
982 encoding, errors);
983 if (self->decoder == NULL)
984 goto error;
985
986 if (self->readuniversal) {
987 PyObject *incrementalDecoder = PyObject_CallFunction(
988 (PyObject *)&PyIncrementalNewlineDecoder_Type,
989 "Oi", self->decoder, (int)self->readtranslate);
990 if (incrementalDecoder == NULL)
991 goto error;
992 Py_CLEAR(self->decoder);
993 self->decoder = incrementalDecoder;
994 }
995 }
996
997 /* Build the encoder object */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +0200998 res = _PyObject_CallMethodId(buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +0000999 if (res == NULL)
1000 goto error;
1001 r = PyObject_IsTrue(res);
1002 Py_DECREF(res);
1003 if (r == -1)
1004 goto error;
1005 if (r == 1) {
1006 PyObject *ci;
1007 self->encoder = PyCodec_IncrementalEncoder(
1008 encoding, errors);
1009 if (self->encoder == NULL)
1010 goto error;
1011 /* Get the normalized named of the codec */
1012 ci = _PyCodec_Lookup(encoding);
1013 if (ci == NULL)
1014 goto error;
Martin v. Löwis767046a2011-10-14 15:35:36 +02001015 res = _PyObject_GetAttrId(ci, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001016 Py_DECREF(ci);
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001017 if (res == NULL) {
1018 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1019 PyErr_Clear();
1020 else
1021 goto error;
1022 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001023 else if (PyUnicode_Check(res)) {
1024 encodefuncentry *e = encodefuncs;
1025 while (e->name != NULL) {
1026 if (!PyUnicode_CompareWithASCIIString(res, e->name)) {
1027 self->encodefunc = e->encodefunc;
1028 break;
1029 }
1030 e++;
1031 }
1032 }
1033 Py_XDECREF(res);
1034 }
1035
1036 self->buffer = buffer;
1037 Py_INCREF(buffer);
Antoine Pitrou24f36292009-03-28 22:16:42 +00001038
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001039 if (Py_TYPE(buffer) == &PyBufferedReader_Type ||
1040 Py_TYPE(buffer) == &PyBufferedWriter_Type ||
1041 Py_TYPE(buffer) == &PyBufferedRandom_Type) {
Martin v. Löwis767046a2011-10-14 15:35:36 +02001042 raw = _PyObject_GetAttrId(buffer, &PyId_raw);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001043 /* Cache the raw FileIO object to speed up 'closed' checks */
Benjamin Peterson2cfca792009-06-06 20:46:48 +00001044 if (raw == NULL) {
1045 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1046 PyErr_Clear();
1047 else
1048 goto error;
1049 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001050 else if (Py_TYPE(raw) == &PyFileIO_Type)
1051 self->raw = raw;
1052 else
1053 Py_DECREF(raw);
1054 }
1055
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001056 res = _PyObject_CallMethodId(buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001057 if (res == NULL)
1058 goto error;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001059 r = PyObject_IsTrue(res);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001060 Py_DECREF(res);
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001061 if (r < 0)
1062 goto error;
1063 self->seekable = self->telling = r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001064
Martin v. Löwis767046a2011-10-14 15:35:36 +02001065 self->has_read1 = _PyObject_HasAttrId(buffer, &PyId_read1);
Antoine Pitroue96ec682011-07-23 21:46:35 +02001066
Antoine Pitroue4501852009-05-14 18:55:55 +00001067 self->encoding_start_of_stream = 0;
1068 if (self->seekable && self->encoder) {
1069 PyObject *cookieObj;
1070 int cmp;
1071
1072 self->encoding_start_of_stream = 1;
1073
1074 cookieObj = PyObject_CallMethodObjArgs(buffer, _PyIO_str_tell, NULL);
1075 if (cookieObj == NULL)
1076 goto error;
1077
1078 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
1079 Py_DECREF(cookieObj);
1080 if (cmp < 0) {
1081 goto error;
1082 }
1083
1084 if (cmp == 0) {
1085 self->encoding_start_of_stream = 0;
1086 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
1087 _PyIO_zero, NULL);
1088 if (res == NULL)
1089 goto error;
1090 Py_DECREF(res);
1091 }
1092 }
1093
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001094 self->ok = 1;
1095 return 0;
1096
1097 error:
1098 return -1;
1099}
1100
1101static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001102_textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001103{
1104 if (self->ok && _PyIOBase_finalize((PyObject *) self) < 0)
1105 return -1;
1106 self->ok = 0;
1107 Py_CLEAR(self->buffer);
1108 Py_CLEAR(self->encoding);
1109 Py_CLEAR(self->encoder);
1110 Py_CLEAR(self->decoder);
1111 Py_CLEAR(self->readnl);
1112 Py_CLEAR(self->decoded_chars);
1113 Py_CLEAR(self->pending_bytes);
1114 Py_CLEAR(self->snapshot);
1115 Py_CLEAR(self->errors);
1116 Py_CLEAR(self->raw);
1117 return 0;
1118}
1119
1120static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001121textiowrapper_dealloc(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001122{
Antoine Pitroue033e062010-10-29 10:38:18 +00001123 self->deallocating = 1;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001124 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001125 return;
1126 _PyObject_GC_UNTRACK(self);
1127 if (self->weakreflist != NULL)
1128 PyObject_ClearWeakRefs((PyObject *)self);
1129 Py_CLEAR(self->dict);
1130 Py_TYPE(self)->tp_free((PyObject *)self);
1131}
1132
1133static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001134textiowrapper_traverse(textio *self, visitproc visit, void *arg)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001135{
1136 Py_VISIT(self->buffer);
1137 Py_VISIT(self->encoding);
1138 Py_VISIT(self->encoder);
1139 Py_VISIT(self->decoder);
1140 Py_VISIT(self->readnl);
1141 Py_VISIT(self->decoded_chars);
1142 Py_VISIT(self->pending_bytes);
1143 Py_VISIT(self->snapshot);
1144 Py_VISIT(self->errors);
1145 Py_VISIT(self->raw);
1146
1147 Py_VISIT(self->dict);
1148 return 0;
1149}
1150
1151static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001152textiowrapper_clear(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001153{
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001154 if (_textiowrapper_clear(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001155 return -1;
1156 Py_CLEAR(self->dict);
1157 return 0;
1158}
1159
1160static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001161textiowrapper_closed_get(textio *self, void *context);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001162
1163/* This macro takes some shortcuts to make the common case faster. */
1164#define CHECK_CLOSED(self) \
1165 do { \
1166 int r; \
1167 PyObject *_res; \
1168 if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
1169 if (self->raw != NULL) \
1170 r = _PyFileIO_closed(self->raw); \
1171 else { \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001172 _res = textiowrapper_closed_get(self, NULL); \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001173 if (_res == NULL) \
1174 return NULL; \
1175 r = PyObject_IsTrue(_res); \
1176 Py_DECREF(_res); \
1177 if (r < 0) \
1178 return NULL; \
1179 } \
1180 if (r > 0) { \
1181 PyErr_SetString(PyExc_ValueError, \
1182 "I/O operation on closed file."); \
1183 return NULL; \
1184 } \
1185 } \
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001186 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001187 return NULL; \
1188 } while (0)
1189
1190#define CHECK_INITIALIZED(self) \
1191 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001192 if (self->detached) { \
1193 PyErr_SetString(PyExc_ValueError, \
1194 "underlying buffer has been detached"); \
1195 } else { \
1196 PyErr_SetString(PyExc_ValueError, \
1197 "I/O operation on uninitialized object"); \
1198 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001199 return NULL; \
1200 }
1201
1202#define CHECK_INITIALIZED_INT(self) \
1203 if (self->ok <= 0) { \
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001204 if (self->detached) { \
1205 PyErr_SetString(PyExc_ValueError, \
1206 "underlying buffer has been detached"); \
1207 } else { \
1208 PyErr_SetString(PyExc_ValueError, \
1209 "I/O operation on uninitialized object"); \
1210 } \
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001211 return -1; \
1212 }
1213
1214
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001215static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001216textiowrapper_detach(textio *self)
Benjamin Petersond2e0c792009-05-01 20:40:59 +00001217{
1218 PyObject *buffer, *res;
1219 CHECK_INITIALIZED(self);
1220 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
1221 if (res == NULL)
1222 return NULL;
1223 Py_DECREF(res);
1224 buffer = self->buffer;
1225 self->buffer = NULL;
1226 self->detached = 1;
1227 self->ok = 0;
1228 return buffer;
1229}
1230
Antoine Pitrou24f36292009-03-28 22:16:42 +00001231/* Flush the internal write buffer. This doesn't explicitly flush the
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001232 underlying buffered object, though. */
1233static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001234_textiowrapper_writeflush(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001235{
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001236 PyObject *pending, *b, *ret;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001237
1238 if (self->pending_bytes == NULL)
1239 return 0;
Amaury Forgeot d'Arcccd686a2009-08-29 23:00:38 +00001240
1241 pending = self->pending_bytes;
1242 Py_INCREF(pending);
1243 self->pending_bytes_count = 0;
1244 Py_CLEAR(self->pending_bytes);
1245
1246 b = _PyBytes_Join(_PyIO_empty_bytes, pending);
1247 Py_DECREF(pending);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001248 if (b == NULL)
1249 return -1;
1250 ret = PyObject_CallMethodObjArgs(self->buffer,
1251 _PyIO_str_write, b, NULL);
1252 Py_DECREF(b);
1253 if (ret == NULL)
1254 return -1;
1255 Py_DECREF(ret);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001256 return 0;
1257}
1258
1259static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001260textiowrapper_write(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001261{
1262 PyObject *ret;
1263 PyObject *text; /* owned reference */
1264 PyObject *b;
1265 Py_ssize_t textlen;
1266 int haslf = 0;
1267 int needflush = 0;
1268
1269 CHECK_INITIALIZED(self);
1270
1271 if (!PyArg_ParseTuple(args, "U:write", &text)) {
1272 return NULL;
1273 }
1274
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001275 if (PyUnicode_READY(text) == -1)
1276 return NULL;
1277
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001278 CHECK_CLOSED(self);
1279
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001280 if (self->encoder == NULL)
1281 return _unsupported("not writable");
Benjamin Peterson81971ea2009-05-14 22:01:31 +00001282
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001283 Py_INCREF(text);
1284
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001285 textlen = PyUnicode_GET_LENGTH(text);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001286
1287 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001288 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001289 haslf = 1;
1290
1291 if (haslf && self->writetranslate && self->writenl != NULL) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001292 PyObject *newtext = _PyObject_CallMethodId(
1293 text, &PyId_replace, "ss", "\n", self->writenl);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001294 Py_DECREF(text);
1295 if (newtext == NULL)
1296 return NULL;
1297 text = newtext;
1298 }
1299
Antoine Pitroue96ec682011-07-23 21:46:35 +02001300 if (self->write_through)
1301 needflush = 1;
1302 else if (self->line_buffering &&
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001303 (haslf ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001304 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001305 needflush = 1;
1306
1307 /* XXX What if we were just reading? */
Antoine Pitroue4501852009-05-14 18:55:55 +00001308 if (self->encodefunc != NULL) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001309 b = (*self->encodefunc)((PyObject *) self, text);
Antoine Pitroue4501852009-05-14 18:55:55 +00001310 self->encoding_start_of_stream = 0;
1311 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001312 else
1313 b = PyObject_CallMethodObjArgs(self->encoder,
1314 _PyIO_str_encode, text, NULL);
1315 Py_DECREF(text);
1316 if (b == NULL)
1317 return NULL;
1318
1319 if (self->pending_bytes == NULL) {
1320 self->pending_bytes = PyList_New(0);
1321 if (self->pending_bytes == NULL) {
1322 Py_DECREF(b);
1323 return NULL;
1324 }
1325 self->pending_bytes_count = 0;
1326 }
1327 if (PyList_Append(self->pending_bytes, b) < 0) {
1328 Py_DECREF(b);
1329 return NULL;
1330 }
1331 self->pending_bytes_count += PyBytes_GET_SIZE(b);
1332 Py_DECREF(b);
1333 if (self->pending_bytes_count > self->chunk_size || needflush) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001334 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001335 return NULL;
1336 }
Antoine Pitrou24f36292009-03-28 22:16:42 +00001337
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001338 if (needflush) {
1339 ret = PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_flush, NULL);
1340 if (ret == NULL)
1341 return NULL;
1342 Py_DECREF(ret);
1343 }
1344
1345 Py_CLEAR(self->snapshot);
1346
1347 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001348 ret = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001349 if (ret == NULL)
1350 return NULL;
1351 Py_DECREF(ret);
1352 }
1353
1354 return PyLong_FromSsize_t(textlen);
1355}
1356
1357/* Steal a reference to chars and store it in the decoded_char buffer;
1358 */
1359static void
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001360textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001361{
1362 Py_CLEAR(self->decoded_chars);
1363 self->decoded_chars = chars;
1364 self->decoded_chars_used = 0;
1365}
1366
1367static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001368textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001369{
1370 PyObject *chars;
1371 Py_ssize_t avail;
1372
1373 if (self->decoded_chars == NULL)
1374 return PyUnicode_FromStringAndSize(NULL, 0);
1375
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001376 /* decoded_chars is guaranteed to be "ready". */
1377 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001378 - self->decoded_chars_used);
1379
1380 assert(avail >= 0);
1381
1382 if (n < 0 || n > avail)
1383 n = avail;
1384
1385 if (self->decoded_chars_used > 0 || n < avail) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001386 chars = PyUnicode_Substring(self->decoded_chars,
1387 self->decoded_chars_used,
1388 self->decoded_chars_used + n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001389 if (chars == NULL)
1390 return NULL;
1391 }
1392 else {
1393 chars = self->decoded_chars;
1394 Py_INCREF(chars);
1395 }
1396
1397 self->decoded_chars_used += n;
1398 return chars;
1399}
1400
1401/* Read and decode the next chunk of data from the BufferedReader.
1402 */
1403static int
Antoine Pitroue5324562011-11-19 00:39:01 +01001404textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001405{
1406 PyObject *dec_buffer = NULL;
1407 PyObject *dec_flags = NULL;
1408 PyObject *input_chunk = NULL;
1409 PyObject *decoded_chars, *chunk_size;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001410 Py_ssize_t nbytes, nchars;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001411 int eof;
1412
1413 /* The return value is True unless EOF was reached. The decoded string is
1414 * placed in self._decoded_chars (replacing its previous value). The
1415 * entire input chunk is sent to the decoder, though some of it may remain
1416 * buffered in the decoder, yet to be converted.
1417 */
1418
1419 if (self->decoder == NULL) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001420 _unsupported("not readable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001421 return -1;
1422 }
1423
1424 if (self->telling) {
1425 /* To prepare for tell(), we need to snapshot a point in the file
1426 * where the decoder's input buffer is empty.
1427 */
1428
1429 PyObject *state = PyObject_CallMethodObjArgs(self->decoder,
1430 _PyIO_str_getstate, NULL);
1431 if (state == NULL)
1432 return -1;
1433 /* Given this, we know there was a valid snapshot point
1434 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1435 */
1436 if (PyArg_Parse(state, "(OO)", &dec_buffer, &dec_flags) < 0) {
1437 Py_DECREF(state);
1438 return -1;
1439 }
1440 Py_INCREF(dec_buffer);
1441 Py_INCREF(dec_flags);
1442 Py_DECREF(state);
1443 }
1444
1445 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
Antoine Pitroue5324562011-11-19 00:39:01 +01001446 if (size_hint > 0) {
Victor Stinnerf8facac2011-11-22 02:30:47 +01001447 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
Antoine Pitroue5324562011-11-19 00:39:01 +01001448 }
1449 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001450 if (chunk_size == NULL)
1451 goto fail;
1452 input_chunk = PyObject_CallMethodObjArgs(self->buffer,
Antoine Pitroue96ec682011-07-23 21:46:35 +02001453 (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
1454 chunk_size, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001455 Py_DECREF(chunk_size);
1456 if (input_chunk == NULL)
1457 goto fail;
1458 assert(PyBytes_Check(input_chunk));
1459
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001460 nbytes = PyBytes_Size(input_chunk);
1461 eof = (nbytes == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001462
1463 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
1464 decoded_chars = _PyIncrementalNewlineDecoder_decode(
1465 self->decoder, input_chunk, eof);
1466 }
1467 else {
1468 decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
1469 _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
1470 }
1471
1472 /* TODO sanity check: isinstance(decoded_chars, unicode) */
1473 if (decoded_chars == NULL)
1474 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001475 if (PyUnicode_READY(decoded_chars) == -1)
1476 goto fail;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001477 textiowrapper_set_decoded_chars(self, decoded_chars);
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001478 nchars = PyUnicode_GET_LENGTH(decoded_chars);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00001479 if (nchars > 0)
1480 self->b2cratio = (double) nbytes / nchars;
1481 else
1482 self->b2cratio = 0.0;
1483 if (nchars > 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001484 eof = 0;
1485
1486 if (self->telling) {
1487 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1488 * next input to be decoded is dec_buffer + input_chunk.
1489 */
1490 PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
1491 if (next_input == NULL)
1492 goto fail;
1493 assert (PyBytes_Check(next_input));
1494 Py_DECREF(dec_buffer);
1495 Py_CLEAR(self->snapshot);
1496 self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
1497 }
1498 Py_DECREF(input_chunk);
1499
1500 return (eof == 0);
1501
1502 fail:
1503 Py_XDECREF(dec_buffer);
1504 Py_XDECREF(dec_flags);
1505 Py_XDECREF(input_chunk);
1506 return -1;
1507}
1508
1509static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001510textiowrapper_read(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001511{
1512 Py_ssize_t n = -1;
1513 PyObject *result = NULL, *chunks = NULL;
1514
1515 CHECK_INITIALIZED(self);
1516
Benjamin Petersonbf5ff762009-12-13 19:25:34 +00001517 if (!PyArg_ParseTuple(args, "|O&:read", &_PyIO_ConvertSsize_t, &n))
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001518 return NULL;
1519
1520 CHECK_CLOSED(self);
1521
Antoine Pitrou0d739d72010-09-05 23:01:12 +00001522 if (self->decoder == NULL)
1523 return _unsupported("not readable");
Benjamin Petersona1b49012009-03-31 23:11:32 +00001524
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001525 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001526 return NULL;
1527
1528 if (n < 0) {
1529 /* Read everything */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001530 PyObject *bytes = _PyObject_CallMethodId(self->buffer, &PyId_read, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001531 PyObject *decoded;
1532 if (bytes == NULL)
1533 goto fail;
Victor Stinnerfd821132011-05-25 22:01:33 +02001534
1535 if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type)
1536 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1537 bytes, 1);
1538 else
1539 decoded = PyObject_CallMethodObjArgs(
1540 self->decoder, _PyIO_str_decode, bytes, Py_True, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001541 Py_DECREF(bytes);
1542 if (decoded == NULL)
1543 goto fail;
1544
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001545 result = textiowrapper_get_decoded_chars(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001546
1547 if (result == NULL) {
1548 Py_DECREF(decoded);
1549 return NULL;
1550 }
1551
1552 PyUnicode_AppendAndDel(&result, decoded);
1553 if (result == NULL)
1554 goto fail;
1555
1556 Py_CLEAR(self->snapshot);
1557 return result;
1558 }
1559 else {
1560 int res = 1;
1561 Py_ssize_t remaining = n;
1562
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001563 result = textiowrapper_get_decoded_chars(self, n);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001564 if (result == NULL)
1565 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001566 if (PyUnicode_READY(result) == -1)
1567 goto fail;
1568 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001569
1570 /* Keep reading chunks until we have n characters to return */
1571 while (remaining > 0) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001572 res = textiowrapper_read_chunk(self, remaining);
Gregory P. Smith51359922012-06-23 23:55:39 -07001573 if (res < 0) {
1574 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1575 when EINTR occurs so we needn't do it ourselves. */
1576 if (_PyIO_trap_eintr()) {
1577 continue;
1578 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001579 goto fail;
Gregory P. Smith51359922012-06-23 23:55:39 -07001580 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001581 if (res == 0) /* EOF */
1582 break;
1583 if (chunks == NULL) {
1584 chunks = PyList_New(0);
1585 if (chunks == NULL)
1586 goto fail;
1587 }
Antoine Pitroue5324562011-11-19 00:39:01 +01001588 if (PyUnicode_GET_LENGTH(result) > 0 &&
1589 PyList_Append(chunks, result) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001590 goto fail;
1591 Py_DECREF(result);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001592 result = textiowrapper_get_decoded_chars(self, remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001593 if (result == NULL)
1594 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001595 remaining -= PyUnicode_GET_LENGTH(result);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001596 }
1597 if (chunks != NULL) {
1598 if (result != NULL && PyList_Append(chunks, result) < 0)
1599 goto fail;
1600 Py_CLEAR(result);
1601 result = PyUnicode_Join(_PyIO_empty_str, chunks);
1602 if (result == NULL)
1603 goto fail;
1604 Py_CLEAR(chunks);
1605 }
1606 return result;
1607 }
1608 fail:
1609 Py_XDECREF(result);
1610 Py_XDECREF(chunks);
1611 return NULL;
1612}
1613
1614
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001615/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001616 that is to the NUL character. Otherwise the function will produce
1617 incorrect results. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001618static char *
1619find_control_char(int kind, char *s, char *end, Py_UCS4 ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001620{
Antoine Pitrouc28e2e52011-11-13 03:53:42 +01001621 if (kind == PyUnicode_1BYTE_KIND) {
1622 assert(ch < 256);
1623 return (char *) memchr((void *) s, (char) ch, end - s);
1624 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001625 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001626 while (PyUnicode_READ(kind, s, 0) > ch)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001627 s += kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001628 if (PyUnicode_READ(kind, s, 0) == ch)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001629 return s;
1630 if (s == end)
1631 return NULL;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001632 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001633 }
1634}
1635
1636Py_ssize_t
1637_PyIO_find_line_ending(
1638 int translated, int universal, PyObject *readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001639 int kind, char *start, char *end, Py_ssize_t *consumed)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001640{
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001641 Py_ssize_t len = ((char*)end - (char*)start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001642
1643 if (translated) {
1644 /* Newlines are already translated, only search for \n */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001645 char *pos = find_control_char(kind, start, end, '\n');
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001646 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001647 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001648 else {
1649 *consumed = len;
1650 return -1;
1651 }
1652 }
1653 else if (universal) {
1654 /* Universal newline search. Find any of \r, \r\n, \n
1655 * The decoder ensures that \r\n are not split in two pieces
1656 */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001657 char *s = start;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001658 for (;;) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001659 Py_UCS4 ch;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001660 /* Fast path for non-control chars. The loop always ends
Victor Stinnerf7b8cb62011-09-29 03:28:17 +02001661 since the Unicode string is NUL-terminated. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001662 while (PyUnicode_READ(kind, s, 0) > '\r')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001663 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001664 if (s >= end) {
1665 *consumed = len;
1666 return -1;
1667 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001668 ch = PyUnicode_READ(kind, s, 0);
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001669 s += kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001670 if (ch == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001671 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001672 if (ch == '\r') {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001673 if (PyUnicode_READ(kind, s, 0) == '\n')
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001674 return (s - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001675 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001676 return (s - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001677 }
1678 }
1679 }
1680 else {
1681 /* Non-universal mode. */
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001682 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
1683 char *nl = PyUnicode_DATA(readnl);
1684 /* Assume that readnl is an ASCII character. */
1685 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001686 if (readnl_len == 1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001687 char *pos = find_control_char(kind, start, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001688 if (pos != NULL)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001689 return (pos - start)/kind + 1;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001690 *consumed = len;
1691 return -1;
1692 }
1693 else {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001694 char *s = start;
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001695 char *e = end - (readnl_len - 1)*kind;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001696 char *pos;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001697 if (e < s)
1698 e = s;
1699 while (s < e) {
1700 Py_ssize_t i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001701 char *pos = find_control_char(kind, s, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001702 if (pos == NULL || pos >= e)
1703 break;
1704 for (i = 1; i < readnl_len; i++) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001705 if (PyUnicode_READ(kind, pos, i) != nl[i])
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001706 break;
1707 }
1708 if (i == readnl_len)
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001709 return (pos - start)/kind + readnl_len;
1710 s = pos + kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001711 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001712 pos = find_control_char(kind, e, end, nl[0]);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001713 if (pos == NULL)
1714 *consumed = len;
1715 else
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001716 *consumed = (pos - start)/kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001717 return -1;
1718 }
1719 }
1720}
1721
1722static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001723_textiowrapper_readline(textio *self, Py_ssize_t limit)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001724{
1725 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
1726 Py_ssize_t start, endpos, chunked, offset_to_buffer;
1727 int res;
1728
1729 CHECK_CLOSED(self);
1730
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001731 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001732 return NULL;
1733
1734 chunked = 0;
1735
1736 while (1) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001737 char *ptr;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001738 Py_ssize_t line_len;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001739 int kind;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001740 Py_ssize_t consumed = 0;
1741
1742 /* First, get some data if necessary */
1743 res = 1;
1744 while (!self->decoded_chars ||
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001745 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
Antoine Pitroue5324562011-11-19 00:39:01 +01001746 res = textiowrapper_read_chunk(self, 0);
Gregory P. Smith51359922012-06-23 23:55:39 -07001747 if (res < 0) {
1748 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1749 when EINTR occurs so we needn't do it ourselves. */
1750 if (_PyIO_trap_eintr()) {
1751 continue;
1752 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001753 goto error;
Gregory P. Smith51359922012-06-23 23:55:39 -07001754 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001755 if (res == 0)
1756 break;
1757 }
1758 if (res == 0) {
1759 /* end of file */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001760 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001761 Py_CLEAR(self->snapshot);
1762 start = endpos = offset_to_buffer = 0;
1763 break;
1764 }
1765
1766 if (remaining == NULL) {
1767 line = self->decoded_chars;
1768 start = self->decoded_chars_used;
1769 offset_to_buffer = 0;
1770 Py_INCREF(line);
1771 }
1772 else {
1773 assert(self->decoded_chars_used == 0);
1774 line = PyUnicode_Concat(remaining, self->decoded_chars);
1775 start = 0;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001776 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001777 Py_CLEAR(remaining);
1778 if (line == NULL)
1779 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001780 if (PyUnicode_READY(line) == -1)
1781 goto error;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001782 }
1783
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001784 ptr = PyUnicode_DATA(line);
1785 line_len = PyUnicode_GET_LENGTH(line);
1786 kind = PyUnicode_KIND(line);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001787
1788 endpos = _PyIO_find_line_ending(
1789 self->readtranslate, self->readuniversal, self->readnl,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001790 kind,
Martin v. Löwisc47adb02011-10-07 20:55:35 +02001791 ptr + kind * start,
1792 ptr + kind * line_len,
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001793 &consumed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001794 if (endpos >= 0) {
1795 endpos += start;
1796 if (limit >= 0 && (endpos - start) + chunked >= limit)
1797 endpos = start + limit - chunked;
1798 break;
1799 }
1800
1801 /* We can put aside up to `endpos` */
1802 endpos = consumed + start;
1803 if (limit >= 0 && (endpos - start) + chunked >= limit) {
1804 /* Didn't find line ending, but reached length limit */
1805 endpos = start + limit - chunked;
1806 break;
1807 }
1808
1809 if (endpos > start) {
1810 /* No line ending seen yet - put aside current data */
1811 PyObject *s;
1812 if (chunks == NULL) {
1813 chunks = PyList_New(0);
1814 if (chunks == NULL)
1815 goto error;
1816 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001817 s = PyUnicode_Substring(line, start, endpos);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001818 if (s == NULL)
1819 goto error;
1820 if (PyList_Append(chunks, s) < 0) {
1821 Py_DECREF(s);
1822 goto error;
1823 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001824 chunked += PyUnicode_GET_LENGTH(s);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001825 Py_DECREF(s);
1826 }
1827 /* There may be some remaining bytes we'll have to prepend to the
1828 next chunk of data */
1829 if (endpos < line_len) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001830 remaining = PyUnicode_Substring(line, endpos, line_len);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001831 if (remaining == NULL)
1832 goto error;
1833 }
1834 Py_CLEAR(line);
1835 /* We have consumed the buffer */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001836 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001837 }
1838
1839 if (line != NULL) {
1840 /* Our line ends in the current buffer */
1841 self->decoded_chars_used = endpos - offset_to_buffer;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001842 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
1843 PyObject *s = PyUnicode_Substring(line, start, endpos);
1844 Py_CLEAR(line);
1845 if (s == NULL)
1846 goto error;
1847 line = s;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001848 }
1849 }
1850 if (remaining != NULL) {
1851 if (chunks == NULL) {
1852 chunks = PyList_New(0);
1853 if (chunks == NULL)
1854 goto error;
1855 }
1856 if (PyList_Append(chunks, remaining) < 0)
1857 goto error;
1858 Py_CLEAR(remaining);
1859 }
1860 if (chunks != NULL) {
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001861 if (line != NULL) {
1862 if (PyList_Append(chunks, line) < 0)
1863 goto error;
1864 Py_DECREF(line);
1865 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001866 line = PyUnicode_Join(_PyIO_empty_str, chunks);
1867 if (line == NULL)
1868 goto error;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001869 Py_CLEAR(chunks);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001870 }
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001871 if (line == NULL) {
1872 Py_INCREF(_PyIO_empty_str);
1873 line = _PyIO_empty_str;
1874 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001875
1876 return line;
1877
1878 error:
1879 Py_XDECREF(chunks);
1880 Py_XDECREF(remaining);
1881 Py_XDECREF(line);
1882 return NULL;
1883}
1884
1885static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001886textiowrapper_readline(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001887{
1888 Py_ssize_t limit = -1;
1889
1890 CHECK_INITIALIZED(self);
1891 if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
1892 return NULL;
1893 }
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001894 return _textiowrapper_readline(self, limit);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001895}
1896
1897/* Seek and Tell */
1898
1899typedef struct {
1900 Py_off_t start_pos;
1901 int dec_flags;
1902 int bytes_to_feed;
1903 int chars_to_skip;
1904 char need_eof;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001905} cookie_type;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001906
1907/*
1908 To speed up cookie packing/unpacking, we store the fields in a temporary
1909 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
1910 The following macros define at which offsets in the intermediary byte
1911 string the various CookieStruct fields will be stored.
1912 */
1913
1914#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
1915
Christian Heimes743e0cd2012-10-17 23:52:17 +02001916#if PY_BIG_ENDIAN
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001917/* We want the least significant byte of start_pos to also be the least
1918 significant byte of the cookie, which means that in big-endian mode we
1919 must copy the fields in reverse order. */
1920
1921# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
1922# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
1923# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
1924# define OFF_CHARS_TO_SKIP (sizeof(char))
1925# define OFF_NEED_EOF 0
1926
1927#else
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001928/* Little-endian mode: the least significant byte of start_pos will
1929 naturally end up the least significant byte of the cookie. */
1930
1931# define OFF_START_POS 0
1932# define OFF_DEC_FLAGS (sizeof(Py_off_t))
1933# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
1934# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
1935# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
1936
1937#endif
1938
1939static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001940textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001941{
1942 unsigned char buffer[COOKIE_BUF_LEN];
1943 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
1944 if (cookieLong == NULL)
1945 return -1;
1946
1947 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
Christian Heimes743e0cd2012-10-17 23:52:17 +02001948 PY_LITTLE_ENDIAN, 0) < 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001949 Py_DECREF(cookieLong);
1950 return -1;
1951 }
1952 Py_DECREF(cookieLong);
1953
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001954 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
1955 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
1956 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
1957 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
1958 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001959
1960 return 0;
1961}
1962
1963static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001964textiowrapper_build_cookie(cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001965{
1966 unsigned char buffer[COOKIE_BUF_LEN];
1967
Antoine Pitrou2db74c22009-03-06 21:49:02 +00001968 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
1969 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
1970 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
1971 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
1972 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001973
Christian Heimes743e0cd2012-10-17 23:52:17 +02001974 return _PyLong_FromByteArray(buffer, sizeof(buffer),
1975 PY_LITTLE_ENDIAN, 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001976}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001977
1978static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00001979_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001980{
1981 PyObject *res;
1982 /* When seeking to the start of the stream, we call decoder.reset()
1983 rather than decoder.getstate().
1984 This is for a few decoders such as utf-16 for which the state value
1985 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
1986 utf-16, that we are expecting a BOM).
1987 */
1988 if (cookie->start_pos == 0 && cookie->dec_flags == 0)
1989 res = PyObject_CallMethodObjArgs(self->decoder, _PyIO_str_reset, NULL);
1990 else
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02001991 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate,
1992 "((yi))", "", cookie->dec_flags);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00001993 if (res == NULL)
1994 return -1;
1995 Py_DECREF(res);
1996 return 0;
1997}
1998
Antoine Pitroue4501852009-05-14 18:55:55 +00001999static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002000_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
Antoine Pitroue4501852009-05-14 18:55:55 +00002001{
2002 PyObject *res;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002003 /* Same as _textiowrapper_decoder_setstate() above. */
Antoine Pitroue4501852009-05-14 18:55:55 +00002004 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2005 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_reset, NULL);
2006 self->encoding_start_of_stream = 1;
2007 }
2008 else {
2009 res = PyObject_CallMethodObjArgs(self->encoder, _PyIO_str_setstate,
2010 _PyIO_zero, NULL);
2011 self->encoding_start_of_stream = 0;
2012 }
2013 if (res == NULL)
2014 return -1;
2015 Py_DECREF(res);
2016 return 0;
2017}
2018
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002019static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002020textiowrapper_seek(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002021{
2022 PyObject *cookieObj, *posobj;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002023 cookie_type cookie;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002024 int whence = 0;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002025 PyObject *res;
2026 int cmp;
2027
2028 CHECK_INITIALIZED(self);
2029
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002030 if (!PyArg_ParseTuple(args, "O|i:seek", &cookieObj, &whence))
2031 return NULL;
2032 CHECK_CLOSED(self);
2033
2034 Py_INCREF(cookieObj);
2035
2036 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002037 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002038 goto fail;
2039 }
2040
2041 if (whence == 1) {
2042 /* seek relative to current position */
Antoine Pitroue4501852009-05-14 18:55:55 +00002043 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002044 if (cmp < 0)
2045 goto fail;
2046
2047 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002048 _unsupported("can't do nonzero cur-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002049 goto fail;
2050 }
2051
2052 /* Seeking to the current position should attempt to
2053 * sync the underlying buffer with the current position.
2054 */
2055 Py_DECREF(cookieObj);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002056 cookieObj = _PyObject_CallMethodId((PyObject *)self, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002057 if (cookieObj == NULL)
2058 goto fail;
2059 }
2060 else if (whence == 2) {
2061 /* seek relative to end of file */
Antoine Pitroue4501852009-05-14 18:55:55 +00002062 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_EQ);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002063 if (cmp < 0)
2064 goto fail;
2065
2066 if (cmp == 0) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002067 _unsupported("can't do nonzero end-relative seeks");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002068 goto fail;
2069 }
2070
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002071 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002072 if (res == NULL)
2073 goto fail;
2074 Py_DECREF(res);
2075
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002076 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002077 Py_CLEAR(self->snapshot);
2078 if (self->decoder) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002079 res = _PyObject_CallMethodId(self->decoder, &PyId_reset, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002080 if (res == NULL)
2081 goto fail;
2082 Py_DECREF(res);
2083 }
2084
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002085 res = _PyObject_CallMethodId(self->buffer, &PyId_seek, "ii", 0, 2);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002086 Py_XDECREF(cookieObj);
2087 return res;
2088 }
2089 else if (whence != 0) {
2090 PyErr_Format(PyExc_ValueError,
2091 "invalid whence (%d, should be 0, 1 or 2)", whence);
2092 goto fail;
2093 }
2094
Antoine Pitroue4501852009-05-14 18:55:55 +00002095 cmp = PyObject_RichCompareBool(cookieObj, _PyIO_zero, Py_LT);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002096 if (cmp < 0)
2097 goto fail;
2098
2099 if (cmp == 1) {
2100 PyErr_Format(PyExc_ValueError,
2101 "negative seek position %R", cookieObj);
2102 goto fail;
2103 }
2104
2105 res = PyObject_CallMethodObjArgs((PyObject *)self, _PyIO_str_flush, NULL);
2106 if (res == NULL)
2107 goto fail;
2108 Py_DECREF(res);
2109
2110 /* The strategy of seek() is to go back to the safe start point
2111 * and replay the effect of read(chars_to_skip) from there.
2112 */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002113 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002114 goto fail;
2115
2116 /* Seek back to the safe start point. */
2117 posobj = PyLong_FromOff_t(cookie.start_pos);
2118 if (posobj == NULL)
2119 goto fail;
2120 res = PyObject_CallMethodObjArgs(self->buffer,
2121 _PyIO_str_seek, posobj, NULL);
2122 Py_DECREF(posobj);
2123 if (res == NULL)
2124 goto fail;
2125 Py_DECREF(res);
2126
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002127 textiowrapper_set_decoded_chars(self, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002128 Py_CLEAR(self->snapshot);
2129
2130 /* Restore the decoder to its state from the safe start point. */
2131 if (self->decoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002132 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002133 goto fail;
2134 }
2135
2136 if (cookie.chars_to_skip) {
2137 /* Just like _read_chunk, feed the decoder and save a snapshot. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002138 PyObject *input_chunk = _PyObject_CallMethodId(
2139 self->buffer, &PyId_read, "i", cookie.bytes_to_feed);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002140 PyObject *decoded;
2141
2142 if (input_chunk == NULL)
2143 goto fail;
2144
2145 assert (PyBytes_Check(input_chunk));
2146
2147 self->snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2148 if (self->snapshot == NULL) {
2149 Py_DECREF(input_chunk);
2150 goto fail;
2151 }
2152
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002153 decoded = _PyObject_CallMethodId(self->decoder, &PyId_decode,
2154 "Oi", input_chunk, (int)cookie.need_eof);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002155
2156 if (decoded == NULL)
2157 goto fail;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002158 if (PyUnicode_READY(decoded) == -1) {
2159 Py_DECREF(decoded);
2160 goto fail;
2161 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002162
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002163 textiowrapper_set_decoded_chars(self, decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002164
2165 /* Skip chars_to_skip of the decoded characters. */
Victor Stinner9e30aa52011-11-21 02:49:52 +01002166 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002167 PyErr_SetString(PyExc_IOError, "can't restore logical file position");
2168 goto fail;
2169 }
2170 self->decoded_chars_used = cookie.chars_to_skip;
2171 }
2172 else {
2173 self->snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2174 if (self->snapshot == NULL)
2175 goto fail;
2176 }
2177
Antoine Pitroue4501852009-05-14 18:55:55 +00002178 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2179 if (self->encoder) {
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002180 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
Antoine Pitroue4501852009-05-14 18:55:55 +00002181 goto fail;
2182 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002183 return cookieObj;
2184 fail:
2185 Py_XDECREF(cookieObj);
2186 return NULL;
2187
2188}
2189
2190static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002191textiowrapper_tell(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002192{
2193 PyObject *res;
2194 PyObject *posobj = NULL;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002195 cookie_type cookie = {0,0,0,0,0};
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002196 PyObject *next_input;
2197 Py_ssize_t chars_to_skip, chars_decoded;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002198 Py_ssize_t skip_bytes, skip_back;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002199 PyObject *saved_state = NULL;
2200 char *input, *input_end;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002201 char *dec_buffer;
2202 Py_ssize_t dec_buffer_len;
2203 int dec_flags;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002204
2205 CHECK_INITIALIZED(self);
2206 CHECK_CLOSED(self);
2207
2208 if (!self->seekable) {
Antoine Pitrou0d739d72010-09-05 23:01:12 +00002209 _unsupported("underlying stream is not seekable");
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002210 goto fail;
2211 }
2212 if (!self->telling) {
2213 PyErr_SetString(PyExc_IOError,
2214 "telling position disabled by next() call");
2215 goto fail;
2216 }
2217
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002218 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002219 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002220 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002221 if (res == NULL)
2222 goto fail;
2223 Py_DECREF(res);
2224
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002225 posobj = _PyObject_CallMethodId(self->buffer, &PyId_tell, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002226 if (posobj == NULL)
2227 goto fail;
2228
2229 if (self->decoder == NULL || self->snapshot == NULL) {
Victor Stinner9e30aa52011-11-21 02:49:52 +01002230 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002231 return posobj;
2232 }
2233
2234#if defined(HAVE_LARGEFILE_SUPPORT)
2235 cookie.start_pos = PyLong_AsLongLong(posobj);
2236#else
2237 cookie.start_pos = PyLong_AsLong(posobj);
2238#endif
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002239 Py_DECREF(posobj);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002240 if (PyErr_Occurred())
2241 goto fail;
2242
2243 /* Skip backward to the snapshot point (see _read_chunk). */
2244 if (!PyArg_Parse(self->snapshot, "(iO)", &cookie.dec_flags, &next_input))
2245 goto fail;
2246
2247 assert (PyBytes_Check(next_input));
2248
2249 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2250
2251 /* How many decoded characters have been used up since the snapshot? */
2252 if (self->decoded_chars_used == 0) {
2253 /* We haven't moved from the snapshot point. */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002254 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002255 }
2256
2257 chars_to_skip = self->decoded_chars_used;
2258
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002259 /* Decoder state will be restored at the end */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002260 saved_state = PyObject_CallMethodObjArgs(self->decoder,
2261 _PyIO_str_getstate, NULL);
2262 if (saved_state == NULL)
2263 goto fail;
2264
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002265#define DECODER_GETSTATE() do { \
2266 PyObject *_state = PyObject_CallMethodObjArgs(self->decoder, \
2267 _PyIO_str_getstate, NULL); \
2268 if (_state == NULL) \
2269 goto fail; \
2270 if (!PyArg_Parse(_state, "(y#i)", &dec_buffer, &dec_buffer_len, &dec_flags)) { \
2271 Py_DECREF(_state); \
2272 goto fail; \
2273 } \
2274 Py_DECREF(_state); \
2275 } while (0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002276
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002277 /* TODO: replace assert with exception */
2278#define DECODER_DECODE(start, len, res) do { \
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002279 PyObject *_decoded = _PyObject_CallMethodId( \
2280 self->decoder, &PyId_decode, "y#", start, len); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002281 if (_decoded == NULL) \
2282 goto fail; \
2283 assert (PyUnicode_Check(_decoded)); \
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002284 res = PyUnicode_GET_LENGTH(_decoded); \
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002285 Py_DECREF(_decoded); \
2286 } while (0)
2287
2288 /* Fast search for an acceptable start point, close to our
2289 current pos */
2290 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2291 skip_back = 1;
2292 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2293 input = PyBytes_AS_STRING(next_input);
2294 while (skip_bytes > 0) {
2295 /* Decode up to temptative start point */
2296 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2297 goto fail;
2298 DECODER_DECODE(input, skip_bytes, chars_decoded);
2299 if (chars_decoded <= chars_to_skip) {
2300 DECODER_GETSTATE();
2301 if (dec_buffer_len == 0) {
2302 /* Before pos and no bytes buffered in decoder => OK */
2303 cookie.dec_flags = dec_flags;
2304 chars_to_skip -= chars_decoded;
2305 break;
2306 }
2307 /* Skip back by buffered amount and reset heuristic */
2308 skip_bytes -= dec_buffer_len;
2309 skip_back = 1;
2310 }
2311 else {
2312 /* We're too far ahead, skip back a bit */
2313 skip_bytes -= skip_back;
2314 skip_back *= 2;
2315 }
2316 }
2317 if (skip_bytes <= 0) {
2318 skip_bytes = 0;
2319 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2320 goto fail;
2321 }
2322
2323 /* Note our initial start point. */
2324 cookie.start_pos += skip_bytes;
2325 cookie.chars_to_skip = chars_to_skip;
2326 if (chars_to_skip == 0)
2327 goto finally;
2328
2329 /* We should be close to the desired position. Now feed the decoder one
2330 * byte at a time until we reach the `chars_to_skip` target.
2331 * As we go, note the nearest "safe start point" before the current
2332 * location (a point where the decoder has nothing buffered, so seek()
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002333 * can safely start from there and advance to this location).
2334 */
2335 chars_decoded = 0;
2336 input = PyBytes_AS_STRING(next_input);
2337 input_end = input + PyBytes_GET_SIZE(next_input);
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002338 input += skip_bytes;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002339 while (input < input_end) {
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002340 Py_ssize_t n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002341
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002342 DECODER_DECODE(input, 1, n);
2343 /* We got n chars for 1 byte */
2344 chars_decoded += n;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002345 cookie.bytes_to_feed += 1;
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002346 DECODER_GETSTATE();
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002347
2348 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2349 /* Decoder buffer is empty, so this is a safe start point. */
2350 cookie.start_pos += cookie.bytes_to_feed;
2351 chars_to_skip -= chars_decoded;
2352 cookie.dec_flags = dec_flags;
2353 cookie.bytes_to_feed = 0;
2354 chars_decoded = 0;
2355 }
2356 if (chars_decoded >= chars_to_skip)
2357 break;
2358 input++;
2359 }
2360 if (input == input_end) {
2361 /* We didn't get enough decoded data; signal EOF to get more. */
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002362 PyObject *decoded = _PyObject_CallMethodId(
2363 self->decoder, &PyId_decode, "yi", "", /* final = */ 1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002364 if (decoded == NULL)
2365 goto fail;
2366 assert (PyUnicode_Check(decoded));
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002367 chars_decoded += PyUnicode_GET_LENGTH(decoded);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002368 Py_DECREF(decoded);
2369 cookie.need_eof = 1;
2370
2371 if (chars_decoded < chars_to_skip) {
2372 PyErr_SetString(PyExc_IOError,
2373 "can't reconstruct logical file position");
2374 goto fail;
2375 }
2376 }
2377
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002378finally:
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002379 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002380 Py_DECREF(saved_state);
2381 if (res == NULL)
2382 return NULL;
2383 Py_DECREF(res);
2384
2385 /* The returned cookie corresponds to the last safe start point. */
2386 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002387 return textiowrapper_build_cookie(&cookie);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002388
Antoine Pitrou211b81d2011-02-25 20:27:33 +00002389fail:
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002390 if (saved_state) {
2391 PyObject *type, *value, *traceback;
2392 PyErr_Fetch(&type, &value, &traceback);
2393
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002394 res = _PyObject_CallMethodId(self->decoder, &PyId_setstate, "(O)", saved_state);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002395 Py_DECREF(saved_state);
2396 if (res == NULL)
2397 return NULL;
2398 Py_DECREF(res);
2399
2400 PyErr_Restore(type, value, traceback);
2401 }
2402 return NULL;
2403}
2404
2405static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002406textiowrapper_truncate(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002407{
2408 PyObject *pos = Py_None;
2409 PyObject *res;
2410
2411 CHECK_INITIALIZED(self)
2412 if (!PyArg_ParseTuple(args, "|O:truncate", &pos)) {
2413 return NULL;
2414 }
2415
2416 res = PyObject_CallMethodObjArgs((PyObject *) self, _PyIO_str_flush, NULL);
2417 if (res == NULL)
2418 return NULL;
2419 Py_DECREF(res);
2420
Antoine Pitrou905a2ff2010-01-31 22:47:27 +00002421 return PyObject_CallMethodObjArgs(self->buffer, _PyIO_str_truncate, pos, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002422}
2423
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002424static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002425textiowrapper_repr(textio *self)
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002426{
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002427 PyObject *nameobj, *modeobj, *res, *s;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002428
2429 CHECK_INITIALIZED(self);
2430
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002431 res = PyUnicode_FromString("<_io.TextIOWrapper");
2432 if (res == NULL)
2433 return NULL;
Martin v. Löwis767046a2011-10-14 15:35:36 +02002434 nameobj = _PyObject_GetAttrId((PyObject *) self, &PyId_name);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002435 if (nameobj == NULL) {
2436 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2437 PyErr_Clear();
2438 else
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002439 goto error;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002440 }
2441 else {
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002442 s = PyUnicode_FromFormat(" name=%R", nameobj);
Antoine Pitrou716c4442009-05-23 19:04:03 +00002443 Py_DECREF(nameobj);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002444 if (s == NULL)
2445 goto error;
2446 PyUnicode_AppendAndDel(&res, s);
2447 if (res == NULL)
2448 return NULL;
Antoine Pitrou716c4442009-05-23 19:04:03 +00002449 }
Martin v. Löwis767046a2011-10-14 15:35:36 +02002450 modeobj = _PyObject_GetAttrId((PyObject *) self, &PyId_mode);
Antoine Pitroua4815ca2011-01-09 20:38:15 +00002451 if (modeobj == NULL) {
2452 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2453 PyErr_Clear();
2454 else
2455 goto error;
2456 }
2457 else {
2458 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2459 Py_DECREF(modeobj);
2460 if (s == NULL)
2461 goto error;
2462 PyUnicode_AppendAndDel(&res, s);
2463 if (res == NULL)
2464 return NULL;
2465 }
2466 s = PyUnicode_FromFormat("%U encoding=%R>",
2467 res, self->encoding);
2468 Py_DECREF(res);
2469 return s;
2470error:
2471 Py_XDECREF(res);
2472 return NULL;
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002473}
2474
2475
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002476/* Inquiries */
2477
2478static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002479textiowrapper_fileno(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002480{
2481 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002482 return _PyObject_CallMethodId(self->buffer, &PyId_fileno, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002483}
2484
2485static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002486textiowrapper_seekable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002487{
2488 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002489 return _PyObject_CallMethodId(self->buffer, &PyId_seekable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002490}
2491
2492static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002493textiowrapper_readable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002494{
2495 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002496 return _PyObject_CallMethodId(self->buffer, &PyId_readable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002497}
2498
2499static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002500textiowrapper_writable(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002501{
2502 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002503 return _PyObject_CallMethodId(self->buffer, &PyId_writable, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002504}
2505
2506static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002507textiowrapper_isatty(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002508{
2509 CHECK_INITIALIZED(self);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002510 return _PyObject_CallMethodId(self->buffer, &PyId_isatty, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002511}
2512
2513static PyObject *
Antoine Pitrou243757e2010-11-05 21:15:39 +00002514textiowrapper_getstate(textio *self, PyObject *args)
2515{
2516 PyErr_Format(PyExc_TypeError,
2517 "cannot serialize '%s' object", Py_TYPE(self)->tp_name);
2518 return NULL;
2519}
2520
2521static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002522textiowrapper_flush(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002523{
2524 CHECK_INITIALIZED(self);
2525 CHECK_CLOSED(self);
2526 self->telling = self->seekable;
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002527 if (_textiowrapper_writeflush(self) < 0)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002528 return NULL;
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002529 return _PyObject_CallMethodId(self->buffer, &PyId_flush, NULL);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002530}
2531
2532static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002533textiowrapper_close(textio *self, PyObject *args)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002534{
2535 PyObject *res;
Antoine Pitrou6be88762010-05-03 16:48:20 +00002536 int r;
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002537 CHECK_INITIALIZED(self);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002538
Antoine Pitrou6be88762010-05-03 16:48:20 +00002539 res = textiowrapper_closed_get(self, NULL);
2540 if (res == NULL)
2541 return NULL;
2542 r = PyObject_IsTrue(res);
2543 Py_DECREF(res);
2544 if (r < 0)
2545 return NULL;
Victor Stinnerf6c57832010-05-19 01:17:01 +00002546
Antoine Pitrou6be88762010-05-03 16:48:20 +00002547 if (r > 0) {
2548 Py_RETURN_NONE; /* stream already closed */
2549 }
2550 else {
Antoine Pitroue033e062010-10-29 10:38:18 +00002551 if (self->deallocating) {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002552 res = _PyObject_CallMethodId(self->buffer, &PyId__dealloc_warn, "O", self);
Antoine Pitroue033e062010-10-29 10:38:18 +00002553 if (res)
2554 Py_DECREF(res);
2555 else
2556 PyErr_Clear();
2557 }
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002558 res = _PyObject_CallMethodId((PyObject *)self, &PyId_flush, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002559 if (res == NULL) {
2560 return NULL;
2561 }
2562 else
2563 Py_DECREF(res);
2564
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002565 return _PyObject_CallMethodId(self->buffer, &PyId_close, NULL);
Antoine Pitrou6be88762010-05-03 16:48:20 +00002566 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002567}
2568
2569static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002570textiowrapper_iternext(textio *self)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002571{
2572 PyObject *line;
2573
2574 CHECK_INITIALIZED(self);
2575
2576 self->telling = 0;
2577 if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
2578 /* Skip method call overhead for speed */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002579 line = _textiowrapper_readline(self, -1);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002580 }
2581 else {
2582 line = PyObject_CallMethodObjArgs((PyObject *)self,
2583 _PyIO_str_readline, NULL);
2584 if (line && !PyUnicode_Check(line)) {
2585 PyErr_Format(PyExc_IOError,
2586 "readline() should have returned an str object, "
2587 "not '%.200s'", Py_TYPE(line)->tp_name);
2588 Py_DECREF(line);
2589 return NULL;
2590 }
2591 }
2592
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002593 if (line == NULL || PyUnicode_READY(line) == -1)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002594 return NULL;
2595
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02002596 if (PyUnicode_GET_LENGTH(line) == 0) {
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002597 /* Reached EOF or would have blocked */
2598 Py_DECREF(line);
2599 Py_CLEAR(self->snapshot);
2600 self->telling = self->seekable;
2601 return NULL;
2602 }
2603
2604 return line;
2605}
2606
2607static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002608textiowrapper_name_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002609{
2610 CHECK_INITIALIZED(self);
Martin v. Löwis767046a2011-10-14 15:35:36 +02002611 return _PyObject_GetAttrId(self->buffer, &PyId_name);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002612}
2613
2614static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002615textiowrapper_closed_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002616{
2617 CHECK_INITIALIZED(self);
2618 return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
2619}
2620
2621static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002622textiowrapper_newlines_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002623{
2624 PyObject *res;
2625 CHECK_INITIALIZED(self);
2626 if (self->decoder == NULL)
2627 Py_RETURN_NONE;
2628 res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
2629 if (res == NULL) {
Benjamin Peterson2cfca792009-06-06 20:46:48 +00002630 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2631 PyErr_Clear();
2632 Py_RETURN_NONE;
2633 }
2634 else {
2635 return NULL;
2636 }
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002637 }
2638 return res;
2639}
2640
2641static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002642textiowrapper_errors_get(textio *self, void *context)
Benjamin Peterson0926ad12009-06-06 18:02:12 +00002643{
2644 CHECK_INITIALIZED(self);
2645 return PyUnicode_FromString(PyBytes_AS_STRING(self->errors));
2646}
2647
2648static PyObject *
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002649textiowrapper_chunk_size_get(textio *self, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002650{
2651 CHECK_INITIALIZED(self);
2652 return PyLong_FromSsize_t(self->chunk_size);
2653}
2654
2655static int
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002656textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002657{
2658 Py_ssize_t n;
2659 CHECK_INITIALIZED_INT(self);
Antoine Pitroucb4ae812011-07-13 21:07:49 +02002660 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002661 if (n == -1 && PyErr_Occurred())
2662 return -1;
2663 if (n <= 0) {
2664 PyErr_SetString(PyExc_ValueError,
2665 "a strictly positive integer is required");
2666 return -1;
2667 }
2668 self->chunk_size = n;
2669 return 0;
2670}
2671
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002672static PyMethodDef textiowrapper_methods[] = {
2673 {"detach", (PyCFunction)textiowrapper_detach, METH_NOARGS},
2674 {"write", (PyCFunction)textiowrapper_write, METH_VARARGS},
2675 {"read", (PyCFunction)textiowrapper_read, METH_VARARGS},
2676 {"readline", (PyCFunction)textiowrapper_readline, METH_VARARGS},
2677 {"flush", (PyCFunction)textiowrapper_flush, METH_NOARGS},
2678 {"close", (PyCFunction)textiowrapper_close, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002679
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002680 {"fileno", (PyCFunction)textiowrapper_fileno, METH_NOARGS},
2681 {"seekable", (PyCFunction)textiowrapper_seekable, METH_NOARGS},
2682 {"readable", (PyCFunction)textiowrapper_readable, METH_NOARGS},
2683 {"writable", (PyCFunction)textiowrapper_writable, METH_NOARGS},
2684 {"isatty", (PyCFunction)textiowrapper_isatty, METH_NOARGS},
Antoine Pitrou243757e2010-11-05 21:15:39 +00002685 {"__getstate__", (PyCFunction)textiowrapper_getstate, METH_NOARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002686
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002687 {"seek", (PyCFunction)textiowrapper_seek, METH_VARARGS},
2688 {"tell", (PyCFunction)textiowrapper_tell, METH_NOARGS},
2689 {"truncate", (PyCFunction)textiowrapper_truncate, METH_VARARGS},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002690 {NULL, NULL}
2691};
2692
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002693static PyMemberDef textiowrapper_members[] = {
2694 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
2695 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
2696 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002697 {NULL}
2698};
2699
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002700static PyGetSetDef textiowrapper_getset[] = {
2701 {"name", (getter)textiowrapper_name_get, NULL, NULL},
2702 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002703/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
2704*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002705 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
2706 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
2707 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
2708 (setter)textiowrapper_chunk_size_set, NULL},
Benjamin Peterson1fea3212009-04-19 03:15:20 +00002709 {NULL}
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002710};
2711
2712PyTypeObject PyTextIOWrapper_Type = {
2713 PyVarObject_HEAD_INIT(NULL, 0)
2714 "_io.TextIOWrapper", /*tp_name*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002715 sizeof(textio), /*tp_basicsize*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002716 0, /*tp_itemsize*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002717 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002718 0, /*tp_print*/
2719 0, /*tp_getattr*/
Benjamin Petersonc4c0eae2009-03-09 00:07:03 +00002720 0, /*tps_etattr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002721 0, /*tp_compare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002722 (reprfunc)textiowrapper_repr,/*tp_repr*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002723 0, /*tp_as_number*/
2724 0, /*tp_as_sequence*/
2725 0, /*tp_as_mapping*/
2726 0, /*tp_hash */
2727 0, /*tp_call*/
2728 0, /*tp_str*/
2729 0, /*tp_getattro*/
2730 0, /*tp_setattro*/
2731 0, /*tp_as_buffer*/
2732 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
2733 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002734 textiowrapper_doc, /* tp_doc */
2735 (traverseproc)textiowrapper_traverse, /* tp_traverse */
2736 (inquiry)textiowrapper_clear, /* tp_clear */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002737 0, /* tp_richcompare */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002738 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002739 0, /* tp_iter */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002740 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
2741 textiowrapper_methods, /* tp_methods */
2742 textiowrapper_members, /* tp_members */
2743 textiowrapper_getset, /* tp_getset */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002744 0, /* tp_base */
2745 0, /* tp_dict */
2746 0, /* tp_descr_get */
2747 0, /* tp_descr_set */
Benjamin Peterson680bf1a2009-06-12 02:07:12 +00002748 offsetof(textio, dict), /*tp_dictoffset*/
2749 (initproc)textiowrapper_init, /* tp_init */
Benjamin Peterson4fa88fa2009-03-04 00:14:51 +00002750 0, /* tp_alloc */
2751 PyType_GenericNew, /* tp_new */
2752};